From da908a8734344f9356354be7a95b89ac50558404 Mon Sep 17 00:00:00 2001
From: Leo Fang <leof@nvidia.com>
Date: Fri, 11 Oct 2024 23:36:19 +0000
Subject: [PATCH] clean up gh-pages branch

---
 .gitattributes                                |     1 -
 .github/actions/build/action.yml.j2           |    90 -
 .github/actions/download-artifacts/action.yml |    59 -
 .github/actions/setup/action.yml              |    68 -
 .github/copy-pr-bot.yaml                      |     4 -
 .github/workflows/ci-gh.yml                   |    35 -
 .github/workflows/gh-build-and-test.yml       |    34 -
 .github/workflows/gh-build.yml                |    95 -
 .github/workflows/triagelabel.yml             |    28 -
 .gitignore                                    |   173 -
 CONTRIBUTING.md                               |    12 -
 LICENSE                                       |    48 -
 README.md                                     |    76 -
 continuous_integration/environment.yml        |    24 -
 continuous_integration/no_dependencies.json   |     1 -
 continuous_integration/scripts/build          |    30 -
 continuous_integration/scripts/conda-utils    |    16 -
 continuous_integration/scripts/entrypoint     |    20 -
 continuous_integration/scripts/make-conda-env |    35 -
 .../scripts/render-template.py                |    52 -
 continuous_integration/scripts/setup-utils    |   156 -
 cuda_bindings/LICENSE                         |    48 -
 cuda_bindings/MANIFEST.in                     |     4 -
 cuda_bindings/README.md                       |    76 -
 cuda_bindings/benchmarks/__init__.py          |     0
 cuda_bindings/benchmarks/kernels.py           |   163 -
 cuda_bindings/benchmarks/perf_test_utils.py   |    86 -
 cuda_bindings/benchmarks/pytest.ini           |     3 -
 cuda_bindings/benchmarks/test_cupy.py         |   193 -
 .../benchmarks/test_launch_latency.py         |   311 -
 cuda_bindings/benchmarks/test_numba.py        |    50 -
 .../benchmarks/test_pointer_attributes.py     |   106 -
 cuda_bindings/cuda/__init__.pxd               |     0
 cuda_bindings/cuda/__init__.py                |    10 -
 cuda_bindings/cuda/bindings/__init__.pxd      |     0
 cuda_bindings/cuda/bindings/__init__.py       |     3 -
 .../cuda/bindings/_bindings/__init__.py       |     0
 .../cuda/bindings/_bindings/cydriver.pxd.in   |  2289 -
 .../cuda/bindings/_bindings/cydriver.pyx.in   | 14081 -----
 .../cuda/bindings/_bindings/cynvrtc.pxd.in    |   114 -
 .../cuda/bindings/_bindings/cynvrtc.pyx.in    |   552 -
 .../cuda/bindings/_bindings/loader.cpp        |   354 -
 .../cuda/bindings/_bindings/loader.h          |     8 -
 .../cuda/bindings/_bindings/loader.pxd        |    10 -
 cuda_bindings/cuda/bindings/_lib/__init__.py  |     0
 .../cuda/bindings/_lib/cyruntime/__init__.py  |     0
 .../bindings/_lib/cyruntime/cyruntime.pxd.in  |   311 -
 .../bindings/_lib/cyruntime/cyruntime.pyx.in  |  4930 --
 .../cuda/bindings/_lib/cyruntime/utils.pxd.in |   121 -
 .../cuda/bindings/_lib/cyruntime/utils.pyx.in |  3548 --
 cuda_bindings/cuda/bindings/_lib/dlfcn.pxd    |    18 -
 .../cuda/bindings/_lib/param_packer.cpp       |   163 -
 .../cuda/bindings/_lib/param_packer.h         |    11 -
 .../cuda/bindings/_lib/param_packer.pxd       |     9 -
 cuda_bindings/cuda/bindings/_lib/utils.pxd.in |   127 -
 cuda_bindings/cuda/bindings/_lib/utils.pyx.in |   593 -
 cuda_bindings/cuda/bindings/_version.py       |   683 -
 cuda_bindings/cuda/bindings/cydriver.pxd.in   |  4837 --
 cuda_bindings/cuda/bindings/cydriver.pyx.in   |  2744 -
 cuda_bindings/cuda/bindings/cynvrtc.pxd.in    |   136 -
 cuda_bindings/cuda/bindings/cynvrtc.pyx.in    |   134 -
 cuda_bindings/cuda/bindings/cyruntime.pxd.in  |  3312 --
 cuda_bindings/cuda/bindings/cyruntime.pyx.in  |  2501 -
 cuda_bindings/cuda/bindings/driver.pxd.in     |  7587 ---
 cuda_bindings/cuda/bindings/driver.pyx.in     | 46867 ----------------
 cuda_bindings/cuda/bindings/nvrtc.pxd.in      |    26 -
 cuda_bindings/cuda/bindings/nvrtc.pyx.in      |   882 -
 cuda_bindings/cuda/bindings/runtime.pxd.in    |  3645 --
 cuda_bindings/cuda/bindings/runtime.pyx.in    | 32369 -----------
 cuda_bindings/cuda/ccuda.pxd                  |     7 -
 cuda_bindings/cuda/ccuda.pyx                  |     7 -
 cuda_bindings/cuda/ccudart.pxd                |     7 -
 cuda_bindings/cuda/ccudart.pyx                |     7 -
 cuda_bindings/cuda/cnvrtc.pxd                 |     7 -
 cuda_bindings/cuda/cnvrtc.pyx                 |     7 -
 cuda_bindings/cuda/cuda.pyx                   |    14 -
 cuda_bindings/cuda/cudart.pyx                 |    14 -
 cuda_bindings/cuda/nvrtc.pyx                  |    14 -
 .../0_Introduction/clock_nvrtc_test.py        |   101 -
 .../simpleCubemapTexture_test.py              |   201 -
 .../examples/0_Introduction/simpleP2P_test.py |   206 -
 .../0_Introduction/simpleZeroCopy_test.py     |   157 -
 .../0_Introduction/systemWideAtomics_test.py  |   236 -
 .../0_Introduction/vectorAddDrv_test.py       |   106 -
 .../0_Introduction/vectorAddMMAP_test.py      |   267 -
 .../streamOrderedAllocation_test.py           |   210 -
 .../globalToShmemAsyncCopy_test.py            |  1075 -
 .../3_CUDA_Features/simpleCudaGraphs_test.py  |   375 -
 .../conjugateGradientMultiBlockCG_test.py     |   330 -
 cuda_bindings/examples/common/common.py       |    58 -
 cuda_bindings/examples/common/helper_cuda.py  |    45 -
 .../examples/common/helper_string.py          |    24 -
 .../examples/extra/isoFDModelling_test.py     |   664 -
 .../examples/extra/jit_program_test.py        |   165 -
 .../examples/extra/numba_emm_plugin.py        |   161 -
 cuda_bindings/examples/pytest.ini             |     4 -
 cuda_bindings/pyproject.toml                  |    58 -
 cuda_bindings/requirements.txt                |    10 -
 cuda_bindings/setup.py                        |   270 -
 cuda_bindings/tests/test_ccuda.pyx            |    58 -
 cuda_bindings/tests/test_ccudart.pyx          |    86 -
 cuda_bindings/tests/test_cuda.py              |   875 -
 cuda_bindings/tests/test_cudart.py            |  1302 -
 cuda_bindings/tests/test_cython.py            |    42 -
 cuda_bindings/tests/test_interoperability.py  |   233 -
 .../tests/test_interoperability_cython.pyx    |   217 -
 cuda_bindings/tests/test_kernelParams.py      |   728 -
 cuda_bindings/tests/test_nvrtc.py             |    27 -
 cuda_core/MANIFEST.in                         |     1 -
 cuda_core/README.md                           |     9 -
 cuda_core/cuda/core/__init__.pxd              |     0
 cuda_core/cuda/core/__init__.py               |    10 -
 cuda_core/cuda/core/_context.py               |    29 -
 cuda_core/cuda/core/_device.py                |   187 -
 cuda_core/cuda/core/_dlpack.pxd               |    79 -
 cuda_core/cuda/core/_dlpack.pyx               |   108 -
 cuda_core/cuda/core/_event.py                 |    95 -
 cuda_core/cuda/core/_kernel_arg_handler.pyx   |   218 -
 cuda_core/cuda/core/_launcher.py              |    90 -
 cuda_core/cuda/core/_memory.py                |   241 -
 cuda_core/cuda/core/_memoryview.pyx           |   297 -
 cuda_core/cuda/core/_module.py                |    85 -
 cuda_core/cuda/core/_program.py               |    85 -
 cuda_core/cuda/core/_stream.py                |   243 -
 cuda_core/cuda/core/_utils.py                 |   131 -
 cuda_core/cuda/core/_version.py               |     5 -
 cuda_core/cuda/core/dlpack.h                  |   332 -
 cuda_core/cuda/core/utils.py                  |     5 -
 cuda_core/examples/saxpy.py                   |   104 -
 cuda_core/examples/vector_add.py              |    62 -
 cuda_core/pyproject.toml                      |    55 -
 cuda_core/setup.py                            |    49 -
 docs_src/Makefile                             |    20 -
 docs_src/environment-docs.yml                 |    19 -
 docs_src/make.bat                             |    35 -
 .../images/Nsigth-Compute-CLI-625x473.png     |   Bin 339615 -> 0 bytes
 docs_src/source/_static/logo-dark-mode.png    |   Bin 50546 -> 0 bytes
 docs_src/source/_static/logo-light-mode.png   |   Bin 48816 -> 0 bytes
 docs_src/source/api.rst                       |    11 -
 docs_src/source/conduct.md                    |    82 -
 docs_src/source/conf.py                       |    67 -
 docs_src/source/contribute.md                 |    12 -
 docs_src/source/index.rst                     |    28 -
 docs_src/source/install.md                    |    92 -
 docs_src/source/module/driver.rst             |  6792 ---
 docs_src/source/module/nvrtc.rst              |  1119 -
 docs_src/source/module/runtime.rst            |  5274 --
 docs_src/source/motivation.md                 |    41 -
 docs_src/source/overview.md                   |   321 -
 docs_src/source/release.md                    |    28 -
 docs_src/source/release/11.4.0-notes.md       |    42 -
 docs_src/source/release/11.5.0-notes.md       |   110 -
 docs_src/source/release/11.6.0-notes.md       |    73 -
 docs_src/source/release/11.6.1-notes.md       |    31 -
 docs_src/source/release/11.7.0-notes.md       |    31 -
 docs_src/source/release/11.7.1-notes.md       |    47 -
 docs_src/source/release/11.8.0-notes.md       |    40 -
 docs_src/source/release/11.8.1-notes.md       |    32 -
 docs_src/source/release/11.8.2-notes.md       |    31 -
 docs_src/source/release/11.8.3-notes.md       |    33 -
 docs_src/source/release/11.8.4-notes.md       |    32 -
 docs_src/source/release/12.0.0-notes.md       |    33 -
 docs_src/source/release/12.1.0-notes.md       |    34 -
 docs_src/source/release/12.2.0-notes.md       |    33 -
 docs_src/source/release/12.2.1-notes.md       |    31 -
 docs_src/source/release/12.3.0-notes.md       |    36 -
 docs_src/source/release/12.4.0-notes.md       |    34 -
 docs_src/source/release/12.5.0-notes.md       |    34 -
 docs_src/source/release/12.6.0-notes.md       |    36 -
 docs_src/source/release/12.6.1-notes.md       |    53 -
 170 files changed, 160734 deletions(-)
 delete mode 100644 .gitattributes
 delete mode 100644 .github/actions/build/action.yml.j2
 delete mode 100644 .github/actions/download-artifacts/action.yml
 delete mode 100644 .github/actions/setup/action.yml
 delete mode 100644 .github/copy-pr-bot.yaml
 delete mode 100644 .github/workflows/ci-gh.yml
 delete mode 100644 .github/workflows/gh-build-and-test.yml
 delete mode 100644 .github/workflows/gh-build.yml
 delete mode 100644 .github/workflows/triagelabel.yml
 delete mode 100644 .gitignore
 delete mode 100644 CONTRIBUTING.md
 delete mode 100644 LICENSE
 delete mode 100644 README.md
 delete mode 100644 continuous_integration/environment.yml
 delete mode 100644 continuous_integration/no_dependencies.json
 delete mode 100755 continuous_integration/scripts/build
 delete mode 100755 continuous_integration/scripts/conda-utils
 delete mode 100755 continuous_integration/scripts/entrypoint
 delete mode 100755 continuous_integration/scripts/make-conda-env
 delete mode 100755 continuous_integration/scripts/render-template.py
 delete mode 100755 continuous_integration/scripts/setup-utils
 delete mode 100644 cuda_bindings/LICENSE
 delete mode 100644 cuda_bindings/MANIFEST.in
 delete mode 100644 cuda_bindings/README.md
 delete mode 100644 cuda_bindings/benchmarks/__init__.py
 delete mode 100644 cuda_bindings/benchmarks/kernels.py
 delete mode 100644 cuda_bindings/benchmarks/perf_test_utils.py
 delete mode 100644 cuda_bindings/benchmarks/pytest.ini
 delete mode 100644 cuda_bindings/benchmarks/test_cupy.py
 delete mode 100755 cuda_bindings/benchmarks/test_launch_latency.py
 delete mode 100644 cuda_bindings/benchmarks/test_numba.py
 delete mode 100644 cuda_bindings/benchmarks/test_pointer_attributes.py
 delete mode 100644 cuda_bindings/cuda/__init__.pxd
 delete mode 100644 cuda_bindings/cuda/__init__.py
 delete mode 100644 cuda_bindings/cuda/bindings/__init__.pxd
 delete mode 100644 cuda_bindings/cuda/bindings/__init__.py
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/__init__.py
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/loader.cpp
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/loader.h
 delete mode 100644 cuda_bindings/cuda/bindings/_bindings/loader.pxd
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/__init__.py
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/cyruntime/__init__.py
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/dlfcn.pxd
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/param_packer.cpp
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/param_packer.h
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/param_packer.pxd
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/utils.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/_lib/utils.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/_version.py
 delete mode 100644 cuda_bindings/cuda/bindings/cydriver.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/cydriver.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/cynvrtc.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/cynvrtc.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/cyruntime.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/cyruntime.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/driver.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/driver.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/nvrtc.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/nvrtc.pyx.in
 delete mode 100644 cuda_bindings/cuda/bindings/runtime.pxd.in
 delete mode 100644 cuda_bindings/cuda/bindings/runtime.pyx.in
 delete mode 100644 cuda_bindings/cuda/ccuda.pxd
 delete mode 100644 cuda_bindings/cuda/ccuda.pyx
 delete mode 100644 cuda_bindings/cuda/ccudart.pxd
 delete mode 100644 cuda_bindings/cuda/ccudart.pyx
 delete mode 100644 cuda_bindings/cuda/cnvrtc.pxd
 delete mode 100644 cuda_bindings/cuda/cnvrtc.pyx
 delete mode 100644 cuda_bindings/cuda/cuda.pyx
 delete mode 100644 cuda_bindings/cuda/cudart.pyx
 delete mode 100644 cuda_bindings/cuda/nvrtc.pyx
 delete mode 100644 cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/simpleP2P_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/simpleZeroCopy_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/systemWideAtomics_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/vectorAddDrv_test.py
 delete mode 100644 cuda_bindings/examples/0_Introduction/vectorAddMMAP_test.py
 delete mode 100644 cuda_bindings/examples/2_Concepts_and_Techniques/streamOrderedAllocation_test.py
 delete mode 100644 cuda_bindings/examples/3_CUDA_Features/globalToShmemAsyncCopy_test.py
 delete mode 100644 cuda_bindings/examples/3_CUDA_Features/simpleCudaGraphs_test.py
 delete mode 100644 cuda_bindings/examples/4_CUDA_Libraries/conjugateGradientMultiBlockCG_test.py
 delete mode 100644 cuda_bindings/examples/common/common.py
 delete mode 100644 cuda_bindings/examples/common/helper_cuda.py
 delete mode 100644 cuda_bindings/examples/common/helper_string.py
 delete mode 100644 cuda_bindings/examples/extra/isoFDModelling_test.py
 delete mode 100644 cuda_bindings/examples/extra/jit_program_test.py
 delete mode 100644 cuda_bindings/examples/extra/numba_emm_plugin.py
 delete mode 100644 cuda_bindings/examples/pytest.ini
 delete mode 100644 cuda_bindings/pyproject.toml
 delete mode 100644 cuda_bindings/requirements.txt
 delete mode 100644 cuda_bindings/setup.py
 delete mode 100644 cuda_bindings/tests/test_ccuda.pyx
 delete mode 100644 cuda_bindings/tests/test_ccudart.pyx
 delete mode 100644 cuda_bindings/tests/test_cuda.py
 delete mode 100644 cuda_bindings/tests/test_cudart.py
 delete mode 100644 cuda_bindings/tests/test_cython.py
 delete mode 100644 cuda_bindings/tests/test_interoperability.py
 delete mode 100644 cuda_bindings/tests/test_interoperability_cython.pyx
 delete mode 100644 cuda_bindings/tests/test_kernelParams.py
 delete mode 100644 cuda_bindings/tests/test_nvrtc.py
 delete mode 100644 cuda_core/MANIFEST.in
 delete mode 100644 cuda_core/README.md
 delete mode 100644 cuda_core/cuda/core/__init__.pxd
 delete mode 100644 cuda_core/cuda/core/__init__.py
 delete mode 100644 cuda_core/cuda/core/_context.py
 delete mode 100644 cuda_core/cuda/core/_device.py
 delete mode 100644 cuda_core/cuda/core/_dlpack.pxd
 delete mode 100644 cuda_core/cuda/core/_dlpack.pyx
 delete mode 100644 cuda_core/cuda/core/_event.py
 delete mode 100644 cuda_core/cuda/core/_kernel_arg_handler.pyx
 delete mode 100644 cuda_core/cuda/core/_launcher.py
 delete mode 100644 cuda_core/cuda/core/_memory.py
 delete mode 100644 cuda_core/cuda/core/_memoryview.pyx
 delete mode 100644 cuda_core/cuda/core/_module.py
 delete mode 100644 cuda_core/cuda/core/_program.py
 delete mode 100644 cuda_core/cuda/core/_stream.py
 delete mode 100644 cuda_core/cuda/core/_utils.py
 delete mode 100644 cuda_core/cuda/core/_version.py
 delete mode 100644 cuda_core/cuda/core/dlpack.h
 delete mode 100644 cuda_core/cuda/core/utils.py
 delete mode 100644 cuda_core/examples/saxpy.py
 delete mode 100644 cuda_core/examples/vector_add.py
 delete mode 100644 cuda_core/pyproject.toml
 delete mode 100644 cuda_core/setup.py
 delete mode 100644 docs_src/Makefile
 delete mode 100644 docs_src/environment-docs.yml
 delete mode 100644 docs_src/make.bat
 delete mode 100644 docs_src/source/_static/images/Nsigth-Compute-CLI-625x473.png
 delete mode 100644 docs_src/source/_static/logo-dark-mode.png
 delete mode 100644 docs_src/source/_static/logo-light-mode.png
 delete mode 100644 docs_src/source/api.rst
 delete mode 100644 docs_src/source/conduct.md
 delete mode 100644 docs_src/source/conf.py
 delete mode 100644 docs_src/source/contribute.md
 delete mode 100644 docs_src/source/index.rst
 delete mode 100644 docs_src/source/install.md
 delete mode 100644 docs_src/source/module/driver.rst
 delete mode 100644 docs_src/source/module/nvrtc.rst
 delete mode 100644 docs_src/source/module/runtime.rst
 delete mode 100644 docs_src/source/motivation.md
 delete mode 100644 docs_src/source/overview.md
 delete mode 100644 docs_src/source/release.md
 delete mode 100644 docs_src/source/release/11.4.0-notes.md
 delete mode 100644 docs_src/source/release/11.5.0-notes.md
 delete mode 100644 docs_src/source/release/11.6.0-notes.md
 delete mode 100644 docs_src/source/release/11.6.1-notes.md
 delete mode 100644 docs_src/source/release/11.7.0-notes.md
 delete mode 100644 docs_src/source/release/11.7.1-notes.md
 delete mode 100644 docs_src/source/release/11.8.0-notes.md
 delete mode 100644 docs_src/source/release/11.8.1-notes.md
 delete mode 100644 docs_src/source/release/11.8.2-notes.md
 delete mode 100644 docs_src/source/release/11.8.3-notes.md
 delete mode 100644 docs_src/source/release/11.8.4-notes.md
 delete mode 100644 docs_src/source/release/12.0.0-notes.md
 delete mode 100644 docs_src/source/release/12.1.0-notes.md
 delete mode 100644 docs_src/source/release/12.2.0-notes.md
 delete mode 100644 docs_src/source/release/12.2.1-notes.md
 delete mode 100644 docs_src/source/release/12.3.0-notes.md
 delete mode 100644 docs_src/source/release/12.4.0-notes.md
 delete mode 100644 docs_src/source/release/12.5.0-notes.md
 delete mode 100644 docs_src/source/release/12.6.0-notes.md
 delete mode 100644 docs_src/source/release/12.6.1-notes.md

diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 8c8fc427..00000000
--- a/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-cuda/_version.py export-subst
diff --git a/.github/actions/build/action.yml.j2 b/.github/actions/build/action.yml.j2
deleted file mode 100644
index 59a6ba29..00000000
--- a/.github/actions/build/action.yml.j2
+++ /dev/null
@@ -1,90 +0,0 @@
-name: build
-
-description: Build specified project
-
-inputs:
-  build-type:
-    required: true
-    type: string
-    description: One of ci / release
-  target-device:
-    required: true
-    type: string
-  host-platform:
-    required: true
-    type: string
-  use-container:
-    required: true
-    type: boolean
-  docker-image:
-    type: string
-    required: true
-  upload-enabled:
-    required: true
-    type: boolean
-
-runs:
-  using: composite
-  steps:
-
-<% for package_id, package_info in packages.items() %>
-    - name: Download <<package_info.repo>> (artifacts)
-      uses: ./.github/actions/download-artifacts
-      with:
-        artifact-repo: "<<package_info.repo>>"
-        artifact-name: "<<package_info.artifact_name | replace_placeholder('repo', package_info.repo) | replace_placeholder('git_tag', package_info.git_tag) >>"
-        target-device: "${{ inputs.target-device }}"
-        git_sha: "<<package_info.git_tag>>"
-        host-platform: ${{ inputs.host-platform }}
-        dest-dir: ${{ env.ARTIFACTS_DIR }}
-        dependencies-workflow: <<package_info.artifact_workflow>>
-<% endfor %>
-
-<% if packages %>
-
-    - name: Display structure of downloaded artifacts
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-        pwd
-        ls -lahR ${{ env.ARTIFACTS_DIR }}
-<% endif %>
-
-    - if: ${{ inputs.use-container }}
-      name: Build (in container)
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-
-        docker run \
-          -e AWS_REGION \
-          -e AWS_SESSION_TOKEN \
-          -e AWS_ACCESS_KEY_ID \
-          -e AWS_SECRET_ACCESS_KEY \
-          -e GITHUB_TOKEN \
-          -e ARTIFACTS_DIR="$ARTIFACTS_DIR" \
-          -e UPLOAD_ENABLED="$UPLOAD_ENABLED" \
-          -e USE_CUDA="$USE_CUDA" \
-          -e REPO_DIR="$REPO_DIR" \
-          -e LEGATE_CORE_BUILD_MODE="$LEGATE_CORE_BUILD_MODE" \
-          -e PYTHON_VERSION="$PYTHON_VERSION" \
-          -v "${{ env.REPO_DIR }}:${{ env.REPO_DIR }}" \
-          -v "${{ env.ARTIFACTS_DIR }}:${{ env.ARTIFACTS_DIR }}" \
-          --rm "${{ inputs.docker-image }}" \
-          /bin/bash -c "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint ${{ env.REPO_DIR }}/continuous_integration/scripts/build ${{ inputs.build-type}} ${{ inputs.target-device }}"
-
-    - if: ${{ !inputs.use-container }}
-      name: Build (without container)
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-        "${{ env.REPO_DIR }}/continuous_integration/scripts/entrypoint" "${{ env.REPO_DIR }}/continuous_integration/scripts/build" "${{ inputs.build-type}}" "${{ inputs.target-device }}"
-
-    - name: Display structure of the artifacts folder (post build)
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-        sudo chown -R $(whoami) ${{ env.ARTIFACTS_DIR }}
-        ls -lahR ${{ env.ARTIFACTS_DIR }}
-
-    - name: Upload build artifacts
-      uses: actions/upload-artifact@v4
-      with:
-        name: ${{ env.ARTIFACT_NAME }}
-        path: ${{ env.ARTIFACTS_DIR }}
diff --git a/.github/actions/download-artifacts/action.yml b/.github/actions/download-artifacts/action.yml
deleted file mode 100644
index c3dffa02..00000000
--- a/.github/actions/download-artifacts/action.yml
+++ /dev/null
@@ -1,59 +0,0 @@
-name: download-artifacts
-
-description: Download dependencies (artifacts)
-
-inputs:
-  artifact-repo:
-    type: string
-    require: true
-  artifact-name:
-    type: string
-    require: true
-  target-device:
-    type: string
-    required: true
-  git_sha:
-    type: string
-    required: true
-  host-platform:
-    type: string
-    required: true
-  dest-dir:
-    type: string
-    required: true
-  dependencies-workflow:
-    required: true
-    type: string
-
-runs:
-  using: composite
-  steps:
-
-    - id: cache
-      name: Cache conda artifacts
-      uses: actions/cache@v4
-      with:
-        key: "nvidia/{ inputs.artifact-repo }}@${{ inputs.host-platform }}-${{ inputs.git_sha }}-${{ inputs.target-device }}"
-        path: ${{ inputs.dest-dir }}
-
-    - if: steps.cache.outputs.cache-hit != 'true'
-      name: Download ${{ inputs.artifact-repo }} artifacts
-      uses: dawidd6/action-download-artifact@v3
-      with:
-        path: ${{ inputs.dest-dir }}
-        repo: nvidia/${{ inputs.artifact-repo }}
-        check_artifacts: true
-        commit: ${{ inputs.git_sha }}
-        workflow_conclusion: ""
-        workflow: ${{ inputs.dependencies-workflow }}
-        name: ${{ inputs.artifact-name }}
-        skip_unpack: true
-        if_no_artifact_found: fail
-        allow_forks: false
-
-    - if: steps.cache.outputs.cache-hit != 'true'
-      name: Unpack artifact
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-        cd ${{ inputs.dest-dir }}
-        unzip *.zip
diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml
deleted file mode 100644
index adeb48df..00000000
--- a/.github/actions/setup/action.yml
+++ /dev/null
@@ -1,68 +0,0 @@
-name: Common setup
-
-inputs:
-  client-repo:
-    required: true
-    type: string
-  build-type:
-    required: true
-    type: string
-  target-device:
-    required: true
-    type: string
-  host-platform:
-    required: true
-    type: string
-  build-mode:
-    required: true
-    type: string
-  upload-enabled:
-    required: true
-    type: boolean
-  python-version:
-    required: false
-    type: string
-
-runs:
-  using: composite
-  steps:
-    - name: Set REPO_DIR and Dump environment
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-        echo "REPO_DIR=$(pwd)" >> $GITHUB_ENV
-        env
-
-    - name: Set environment variables
-      shell: bash --noprofile --norc -xeuo pipefail {0}
-      run: |
-
-        WITH_TESTS_STR=''
-        if [[ ("${{ inputs.upload-enabled }}" == "false") && ("${{ inputs.build-type }}" != "ci") ]]; then
-          WITH_TESTS_STR='-with_tests'
-        fi
-
-        TARGET_PLATFORM='linux-64'
-        if [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then
-          TARGET_PLATFORM='linux-aarch64'
-        fi
-
-        BUILD_MODE="${{ inputs.build-mode }}"
-        BUILD_MODE_STR=""
-        [ -n "${BUILD_MODE}" ] && BUILD_MODE_STR="-${BUILD_MODE}"
-
-        if [[ ("${BUILD_MODE}" == "") || ("${BUILD_MODE}" == "release") ]]; then
-          # We upload release versions in the default folder.
-          PKG_DIR="${TARGET_PLATFORM}"
-        else
-          PKG_DIR="${BUILD_MODE}/${TARGET_PLATFORM}"
-        fi
-
-        echo "ARTIFACT_NAME=${{ inputs.host-platform }}-${{ inputs.build-type }}-${{ inputs.client-repo }}-python${{ inputs.python-version }}-${{ inputs.target-device }}${BUILD_MODE_STR}${WITH_TESTS_STR}-${{ github.sha }}" >> $GITHUB_ENV
-        echo "ARTIFACTS_DIR=$(realpath "$(pwd)/dist")" >> $GITHUB_ENV
-        echo "USE_CUDA=${{ (inputs.target-device == 'cpu' && 'OFF') || 'ON' }}" >> $GITHUB_ENV
-        echo "UPLOAD_ENABLED=${{ (inputs.upload-enabled  == 'true' && 'ON') || 'OFF' }}" >> $GITHUB_ENV
-        echo "LEGATE_CORE_BUILD_MODE=${BUILD_MODE}" >> $GITHUB_ENV
-        echo "BUILD_DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
-        echo "TARGET_PLATFORM=${TARGET_PLATFORM}" >> $GITHUB_ENV
-        echo "PKG_DIR=${PKG_DIR}" >> $GITHUB_ENV
-        echo "PYTHON_VERSION=${{ inputs.python-version }}" >> $GITHUB_ENV
\ No newline at end of file
diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml
deleted file mode 100644
index 895ba83e..00000000
--- a/.github/copy-pr-bot.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-# Configuration file for `copy-pr-bot` GitHub App
-# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/
-
-enabled: true
diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml
deleted file mode 100644
index 2c43d03f..00000000
--- a/.github/workflows/ci-gh.yml
+++ /dev/null
@@ -1,35 +0,0 @@
-name: Build and test
-
-concurrency:
-  group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }}
-  cancel-in-progress: true
-
-on:
-  push:
-    branches:
-      - "pull-request/[0-9]+"
-      - "main"
-
-jobs:
-  build-and-test:
-    name: Build and test (${{ matrix.host-platform }}, ${{ matrix.target-device }}, ${{ matrix.build-mode }})
-    strategy:
-      fail-fast: false
-      matrix:
-        host-platform:
-          - linux-x64
-        target-device:
-          - gpu
-        build-mode:
-          - release
-        upload-enabled:
-          - false
-    uses:
-      ./.github/workflows/gh-build-and-test.yml
-    with:
-      host-platform: ${{ matrix.host-platform }}
-      target-device: ${{ matrix.target-device }}
-      build-mode: ${{ matrix.build-mode }}
-      build-type: ci
-      upload-enabled: ${{ matrix.upload-enabled }}
-    secrets: inherit
diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml
deleted file mode 100644
index 4376776d..00000000
--- a/.github/workflows/gh-build-and-test.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-on:
-  workflow_call:
-    inputs:
-      host-platform:
-        type: string
-        required: true
-      target-device:
-        type: string
-        required: true
-      build-mode:
-        type: string
-        required: true
-      build-type:
-        type: string
-        required: true
-      upload-enabled:
-        type: boolean
-        required: true
-jobs:
-  build:
-    if: ${{ github.repository_owner == 'nvidia' }}
-    uses:
-      ./.github/workflows/gh-build.yml
-    with:
-      client-repo: ${{ github.event.repository.name }}
-      target-device: ${{ inputs.target-device }}
-      runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu16') || (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu16') || (inputs.host-platform == 'mac' && 'macos-latest') }}
-      build-type: ${{ inputs.build-type }}
-      use-container: ${{ inputs.host-platform == 'linux-x64' || inputs.host-platform == 'linux-aarch64'}}
-      host-platform: ${{ inputs.host-platform }}
-      dependencies-file: ""
-      build-mode: ${{ inputs.build-mode }}
-      upload-enabled: ${{ inputs.upload-enabled }}
-    secrets: inherit
diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml
deleted file mode 100644
index 922e04ec..00000000
--- a/.github/workflows/gh-build.yml
+++ /dev/null
@@ -1,95 +0,0 @@
-name: Build
-
-on:
-  workflow_call:
-    inputs:
-      client-repo:
-        required: true
-        type: string
-      target-device:
-        required: true
-        type: string
-      runs-on:
-        required: true
-        type: string
-      build-type:
-        required: true
-        type: string
-        description: One of ci / release
-      use-container:
-        required: true
-        type: boolean
-      host-platform:
-        required: true
-        type: string
-      dependencies-file:
-        required: true
-        type: string
-        description: path to versions.json relative to the target repo dir
-      build-mode:
-        required: true
-        type: string
-      upload-enabled:
-        required: true
-        type: boolean
-      python-version:
-        required: false
-        type: string
-
-jobs:
-  build:
-    name: Build (${{ inputs.host-platform }}, ${{ inputs.target-device }}, ${{ inputs.build-type }}, CMake build-mode=${{ inputs.build-mode }}, Python "${{ inputs.python-version }}", Use container=${{ inputs.use-container }} )
-
-    permissions:
-      id-token: write # This is required for configure-aws-credentials
-      contents: read  # This is required for actions/checkout
-
-    runs-on: ${{ inputs.runs-on }}
-
-    steps:
-      - name: Checkout ${{ inputs.client-repo }}
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Setup
-        uses: ./.github/actions/setup
-        with:
-          client-repo: ${{ inputs.client-repo }}
-          build-type: ${{ inputs.build-type }}
-          target-device: "${{ inputs.target-device }}"
-          host-platform: ${{ inputs.host-platform }}
-          build-mode: ${{ inputs.build-mode }}
-          upload-enabled: ${{ inputs.upload-enabled }}
-          python-version: ${{ inputs.python-version }}
-
-      - name: Render templates
-        shell: bash --noprofile --norc -xeuo pipefail {0}
-        run: |
-          pip -q install jinja2
-
-          DEPENDENCIES_FILE=""
-
-          if [ -z "${{ inputs.dependencies-file }}" ]; then
-            DEPENDENCIES_FILE="${REPO_DIR}/continuous_integration/no_dependencies.json"
-          else
-            DEPENDENCIES_FILE="${REPO_DIR}/${{ inputs.dependencies-file }}"
-          fi
-
-          ${REPO_DIR}/continuous_integration/scripts/render-template.py .github/actions/build/action.yml.j2 "${DEPENDENCIES_FILE}" .github/actions/build/action.yml
-
-      - name: Dump templates
-        shell: bash --noprofile --norc -xeuo pipefail {0}
-        run: |
-          echo ${REPO_DIR}/.github/actions/build/action.yml
-          cat ${REPO_DIR}/.github/actions/build/action.yml
-
-      - name: Call build action
-        uses: ./.github/actions/build
-        with:
-          build-type: ${{ inputs.build-type }}
-          target-device: "${{ inputs.target-device }}"
-          host-platform: ${{ inputs.host-platform }}
-          use-container: ${{ inputs.use-container }}
-          docker-image: "condaforge/miniforge3:latest"
-          upload-enabled: ${{ inputs.upload-enabled }}
diff --git a/.github/workflows/triagelabel.yml b/.github/workflows/triagelabel.yml
deleted file mode 100644
index 74127b0c..00000000
--- a/.github/workflows/triagelabel.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Add Triage Label
-
-on:
-  issues:
-    types: [opened]
-
-jobs:
-  triage:
-    runs-on: ubuntu-latest
-    steps:
-    - name: Check for existing labels
-      id: check_labels
-      uses: actions/github-script@v6
-      with:
-        script: |
-          const labels = await github.issues.listLabelsOnIssue({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            issue_number: context.issue.number
-          });
-          return labels.data.length > 0;
-
-    - name: Add Triage Label
-      if: steps.check_labels.outputs.result == 'false'
-      uses: actions-ecosystem/action-add-labels@v1
-      with:
-        github_token: ${{ secrets.GITHUB_TOKEN }}
-        labels: triage
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 0f7ad5be..00000000
--- a/.gitignore
+++ /dev/null
@@ -1,173 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# CUDA Python specific
-.cache/
-.pytest_cache/
-.benchmarks/
-*.cpp
-!cuda_bindings/cuda/bindings/_lib/param_packer.cpp
-!cuda_bindings/cuda/bindings/_bindings/loader.cpp
-
-# CUDA Python specific (auto-generated)
-cuda_bindings/cuda/bindings/_bindings/cydriver.pxd
-cuda_bindings/cuda/bindings/_bindings/cydriver.pyx
-cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd
-cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx
-cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd
-cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx
-cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pxd
-cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pyx
-cuda_bindings/cuda/bindings/_lib/utils.pxd
-cuda_bindings/cuda/bindings/_lib/utils.pyx
-cuda_bindings/cuda/bindings/cydriver.pxd
-cuda_bindings/cuda/bindings/cydriver.pyx
-cuda_bindings/cuda/bindings/cyruntime.pxd
-cuda_bindings/cuda/bindings/cyruntime.pyx
-cuda_bindings/cuda/bindings/cynvrtc.pxd
-cuda_bindings/cuda/bindings/cynvrtc.pyx
-cuda_bindings/cuda/bindings/driver.pxd
-cuda_bindings/cuda/bindings/driver.pyx
-cuda_bindings/cuda/bindings/runtime.pxd
-cuda_bindings/cuda/bindings/runtime.pyx
-cuda_bindings/cuda/bindings/nvrtc.pxd
-cuda_bindings/cuda/bindings/nvrtc.pyx
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs_src/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
-# Dont ignore
-!.github/actions/build/
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index 44f1d02b..00000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Contributing to CUDA Python
-
-Thank you for your interest in contributing to CUDA Python! Based on the type of contribution, it will fall into two categories:
-
-1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/NVIDIA/cuda-python/issues/new)
-    describing what you encountered or what you want to see changed.
-    - The NVIDIA team will evaluate the issues and triage them, scheduling
-    them for a release. If you believe the issue needs priority attention
-    comment on the issue to notify the team.
-2. You want to implement a feature or bug-fix
-    - At this time we do not accept code contributions.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index b7d042fc..00000000
--- a/LICENSE
+++ /dev/null
@@ -1,48 +0,0 @@
-NVIDIA SOFTWARE LICENSE
-
-This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA CUDA Python software and materials provided hereunder ("SOFTWARE").
-
-This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. 
-
-You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions.
-
-1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license.
-
-2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: 
-a.  The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA's intellectual property rights. 
-b.  You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. 
-
-3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows:
-a.  The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs.
-b.  You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. 
-c.  You may not modify or create derivative works of any portion of the SOFTWARE. 
-d.  You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE.
-e.  You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge.
-f.  Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. 
-g.  You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney's fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. 
-
-4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. 
-
-5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE.
- 
-6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict.  
-
-7. FEEDBACK. You may, but don't have to, provide to NVIDIA any Feedback. "Feedback" means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice.
-
-8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. 
-
-9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA'S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. 
-
-10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA's sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you.  
-
-11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. 
-
-12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA's permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. 
- 
-13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury's Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE.
-
-14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is "commercial items" consisting of "commercial computer software" and "commercial computer software documentation" provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. 
-
-15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party.
-
-(v. May 12, 2021)
diff --git a/README.md b/README.md
deleted file mode 100644
index 0072e062..00000000
--- a/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# CUDA-Python
-
-CUDA Python is a standard set of low-level interfaces, providing full coverage of and access to the CUDA host APIs from Python. Checkout the [Overview](https://nvidia.github.io/cuda-python/overview.html) for the workflow and performance results.
-
-## Installing
-
-CUDA Python can be installed from:
-
-* PYPI
-* Conda (nvidia channel)
-* Source builds
-
-There're differences in each of these options that are described further in [Installation](https://nvidia.github.io/cuda-python/install.html) documentation. Each package will guarantee minor version compatibility.
-
-## Runtime Dependencies
-
-CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows:
-
-* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
-* CUDA Toolkit 12.0 to 12.6
-
-Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer.
-
-### Supported Python Versions
-
-CUDA Python follows [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html) for supported Python version guarantee.
-
-Before dropping support, an issue will be raised to look for feedback.
-
-Source builds work for multiple Python versions, however pre-build PyPI and Conda packages are only provided for a subset:
-
-* Python 3.9 to 3.12
-
-## Testing
-
-### Requirements
-
-Latest dependencies can be found in [requirements.txt](https://github.com/NVIDIA/cuda-python/blob/main/requirements.txt).
-
-### Unit-tests
-
-You can run the included tests with:
-
-```
-python -m pytest tests/
-```
-
-### Benchmark
-
-You can run benchmark only tests with:
-
-```
-python -m pytest --benchmark-only benchmarks/
-```
-
-### Samples
-
-You can run the included tests with:
-
-```
-python -m pytest examples/
-```
-
-## Examples
-
-CUDA Samples rewriten using CUDA Python are found in `examples`.
-
-Custom extra included examples:
-
-- `examples/extra/jit_program_test.py`: Demonstrates the use of the API to compile and
-  launch a kernel on the device. Includes device memory allocation /
-  deallocation, transfers between host and device, creation and usage of
-  streams, and context management.
-- `examples/extra/numba_emm_plugin.py`: Implements a Numba External Memory Management
-  plugin, showing that this CUDA Python Driver API can coexist with other
-  wrappers of the driver API.
diff --git a/continuous_integration/environment.yml b/continuous_integration/environment.yml
deleted file mode 100644
index 6d922d43..00000000
--- a/continuous_integration/environment.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: cuda_python
-channels:
-  - defaults
-dependencies:
-  - python>=3.10
-  - cython>=3.0.0
-  - pytest>=6.2.4
-  - numpy>=1.21.1
-  - setuptools
-  - wheel
-  - pip
-  - cuda-version=12.6
-  - cuda-cudart-static
-  - cuda-driver-dev
-  - cuda-cudart-dev
-  - cuda-profiler-api
-  - cuda-nvrtc-dev
-  - cuda-nvcc
-  - pip:
-    - pytest-benchmark>=3.4.1
-    - pyclibrary>=0.1.7
-    - versioneer==0.29
-    - tomli; python_version < "3.11"
-    - pywin32; sys_platform == 'win32'
diff --git a/continuous_integration/no_dependencies.json b/continuous_integration/no_dependencies.json
deleted file mode 100644
index e2d7bd79..00000000
--- a/continuous_integration/no_dependencies.json
+++ /dev/null
@@ -1 +0,0 @@
-{ "packages" : {} }
diff --git a/continuous_integration/scripts/build b/continuous_integration/scripts/build
deleted file mode 100755
index 5db25e67..00000000
--- a/continuous_integration/scripts/build
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env bash
-
-build_ci() {
-    set -xeou pipefail
-
-    cd "${REPO_DIR}"
-
-    export CUDA_HOME="${CONDA_PREFIX}/targets/x86_64-linux"
-    export PARALLEL_LEVEL=$(nproc --ignore 1)
-
-    python setup.py bdist_wheel
-}
-
-build_project() {
-    set -xeou pipefail
-
-    export PYTHONUNBUFFERED=1
-
-    . setup-utils;
-    init_build_env "$@";
-
-    git config --global --add safe.directory "$REPO_DIR/.git"
-
-    case "${BUILD_TYPE}" in
-        ci) build_ci;;
-        *) return 1;;
-    esac
-}
-
-(build_project "$@");
diff --git a/continuous_integration/scripts/conda-utils b/continuous_integration/scripts/conda-utils
deleted file mode 100755
index e0dd32ca..00000000
--- a/continuous_integration/scripts/conda-utils
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-activate_conda_env() {
-    set +xu
-    eval "$(conda shell.bash hook)"
-    conda activate "${CONDA_ENV}";
-    set -xu
-    : ${PYTHON_VERSION:=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")}
-    export PYTHON_VERSION
-}
-
-conda_info() {
-    set +x
-    conda info
-    set -x
-}
diff --git a/continuous_integration/scripts/entrypoint b/continuous_integration/scripts/entrypoint
deleted file mode 100755
index fe4f5cea..00000000
--- a/continuous_integration/scripts/entrypoint
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-set_initial_env() {
-    set -xeuo pipefail
-
-    export PATH="${PATH}:${REPO_DIR}/continuous_integration/scripts"
-}
-
-entrypoint() {
-    set -xeuo pipefail
-    set_initial_env;
-
-    git config --global --add safe.directory "$REPO_DIR/.git"
-
-    cd "${REPO_DIR}"
-
-    exec "$@";
-}
-
-entrypoint "$@";
diff --git a/continuous_integration/scripts/make-conda-env b/continuous_integration/scripts/make-conda-env
deleted file mode 100755
index 1294f038..00000000
--- a/continuous_integration/scripts/make-conda-env
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-
-set -x
-
-make_ci_env() {
-    mamba env create -n "${CONDA_ENV}" -f "${REPO_DIR}/continuous_integration/environment.yml"
-}
-
-make_test_env() {
-    . conda-utils
-
-    mamba env create -n "${CONDA_ENV}" -f "${REPO_DIR}/continuous_integration/environment.yml"
-
-    activate_conda_env
-
-    pip install "${ARTIFACTS_DIR}"/*.whl
-
-}
-
-make_conda_env() {
-    set -xeuo pipefail
-
-    . setup-utils;
-    set_base_defs;
-
-    case "$1" in
-        ci) make_ci_env;;
-        test) make_test_env;;
-        *) return 1;;
-    esac
-
-    return 0;
-}
-
-(make_conda_env "$@");
diff --git a/continuous_integration/scripts/render-template.py b/continuous_integration/scripts/render-template.py
deleted file mode 100755
index b887e361..00000000
--- a/continuous_integration/scripts/render-template.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import json
-from jinja2 import Environment, FileSystemLoader
-import os
-import re
-
-# TODO: make this work for arbitrary context. ie. implement replace_using_context()
-def replace_placeholder(source_str, variable_name, variable_value):
-    # Escaping any regex special characters in variable_name
-    variable_name_escaped = re.escape(variable_name)
-
-    # Using regular expression to replace ${variable_name} with actual variable_value
-    # \s* means any amount of whitespace (including none)
-    # pattern = rf'\$\{{\s*\{{\s*{variable_name_escaped}\s*\}}\s*\}}'
-    pattern = rf'<<\s*{variable_name_escaped}\s*>>'
-    return re.sub(pattern, variable_value.strip(), source_str)
-
-# Setup command-line argument parsing
-parser = argparse.ArgumentParser(description='Render a Jinja2 template using a JSON context.')
-parser.add_argument('template_file', type=str, help='Path to the Jinja2 template file (with .j2 extension).')
-parser.add_argument('json_file', type=str, help='Path to the JSON file to use as the rendering context.')
-parser.add_argument('output_file', type=str, help='Path to the output file.')
-
-args = parser.parse_args()
-
-# Load JSON file as the rendering context
-with open(args.json_file, 'r') as file:
-    context = json.load(file)
-
-# Setup Jinja2 environment and load the template
-env = Environment(
-    loader=FileSystemLoader(searchpath='./'),
-    variable_start_string='<<',
-    variable_end_string='>>',
-    block_start_string='<%',
-    block_end_string='%>',
-    comment_start_string='<#',
-    comment_end_string='#>')
-env.filters['replace_placeholder'] = replace_placeholder
-
-template = env.get_template(args.template_file)
-
-# Render the template with the context
-rendered_content = template.render(context)
-# print(rendered_content)
-
-with open(args.output_file, 'w') as file:
-    file.write(rendered_content)
-
-print(f'Template rendered successfully. Output saved to {args.output_file}')
diff --git a/continuous_integration/scripts/setup-utils b/continuous_integration/scripts/setup-utils
deleted file mode 100755
index 62579e63..00000000
--- a/continuous_integration/scripts/setup-utils
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env bash
-
-install_from_apt() {
-    set -xeuo pipefail
-
-    export DEBIAN_FRONTEND=non-interactive
-
-    # Run package updates and install packages
-    apt-get -q update
-    apt-get -q install -y wget curl jq sudo ninja-build vim numactl rsync
-}
-
-install_cmake() {
-    set -xeuo pipefail
-
-    wget -q https://github.com/Kitware/CMake/releases/download/v3.26.5/cmake-3.26.5-linux-x86_64.tar.gz
-
-    tar -xzf cmake-3.26.5-linux-x86_64.tar.gz
-}
-
-setup_linux_build_env() {
-    set -xeuo pipefail
-    export OS_SHORT_NAME=linux
-    export PATH="${PATH}:${PREBUILD_DIR}/cmake-3.26.5-linux-x86_64/bin"
-
-    mkdir -p /tmp/out /tmp/env_yaml
-}
-
-install_linux_tools() {
-    set -xeuo pipefail
-
-    export SED=sed
-    export READLINK=readlink
-
-    install_from_apt;
-    install_cmake;
-
-    mkdir -p /tmp/out /tmp/env_yaml
-}
-
-install_linux_test_tools() {
-    set -xeuo pipefail
-
-    export SED=sed
-    export READLINK=readlink
-
-    # Run package updates and install packages
-    apt-get -q update
-    apt-get -q install -y numactl
-}
-
-set_base_defs() {
-    set -xeuo pipefail
-
-    export CONDA_ENV=cuda_python
-
-    CONDA_PLATFORM=$(conda info | grep 'platform' | awk -F ' : ' '{print $2}')
-    export CONDA_PLATFORM
-
-    export PREBUILD_DIR=/tmp/prebuild
-    mkdir -p "$PREBUILD_DIR"
-
-    export BUILD_DIR="$REPO_DIR/build"
-
-    # Get the machine architecture
-    ARCH=$(uname -m)
-
-    if [ "$ARCH" == "aarch64" ]; then
-        # Use the gcc march value used by aarch64 Ubuntu.
-        BUILD_MARCH=armv8-a
-    else
-        # Use uname -m otherwise
-        BUILD_MARCH=$(uname -m | tr '_' '-')
-    fi
-
-    export BUILD_MARCH
-
-    export CUDA_VERSION=12.2.2
-
-    export MAX_LIBSANITIZER_VERSION=11.4
-
-    export USE_OPENMP=ON
-}
-
-# -----------------------------------------------------------------------------
-
-prep_git() {
-    # Temporarily disable exit on error
-    set +e
-    git config --global user.email > /dev/null
-    local email_exit_status=$?
-    git config --global user.name > /dev/null
-    local name_exit_status=$?
-    # Re-enable exit on error
-    set -e
-
-    if [ $email_exit_status -ne 0 ]; then
-        git config --global --add user.email "users.noreply.github.com"
-        echo "git user.email was not set. It's now set to users.noreply.github.com"
-    else
-        echo "Note: git user.email is already set."
-    fi
-
-    if [ $name_exit_status -ne 0 ]; then
-        git config --global --add user.name "anon"
-        echo "git user.name was not set. It's now set to anon"
-    else
-        echo "Note: git user.name is already set."
-    fi
-
-    # Fix "fatal: detected dubious ownership in repository at '/tmp/legate.core'"
-    # during local builds.
-    git config --global --add safe.directory "$REPO_DIR"
-}
-
-
-setup_build_env() {
-    set -xeuo pipefail
-
-    install_linux_tools;
-
-    setup_linux_build_env;
-
-    rm -rf "$PREBUILD_DIR"
-    mkdir -p "$PREBUILD_DIR"
-    cd $PREBUILD_DIR
-
-    prep_git;
-}
-
-init_build_env() {
-    set -x;
-
-    . conda-utils;
-
-    export BUILD_TYPE=$1
-
-    set -xeuo pipefail;
-
-    set_base_defs;
-
-    cd "$PREBUILD_DIR"
-
-    setup_build_env;
-
-    cd "$REPO_DIR";
-
-    if [[ -d "${BUILD_DIR}" ]]; then
-        rm -rf "${BUILD_DIR}"
-    fi
-
-    make-conda-env "$BUILD_TYPE";
-
-    activate_conda_env;
-    conda_info;
-}
\ No newline at end of file
diff --git a/cuda_bindings/LICENSE b/cuda_bindings/LICENSE
deleted file mode 100644
index b7d042fc..00000000
--- a/cuda_bindings/LICENSE
+++ /dev/null
@@ -1,48 +0,0 @@
-NVIDIA SOFTWARE LICENSE
-
-This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA CUDA Python software and materials provided hereunder ("SOFTWARE").
-
-This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. 
-
-You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions.
-
-1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license.
-
-2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: 
-a.  The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA's intellectual property rights. 
-b.  You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. 
-
-3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows:
-a.  The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs.
-b.  You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. 
-c.  You may not modify or create derivative works of any portion of the SOFTWARE. 
-d.  You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE.
-e.  You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge.
-f.  Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. 
-g.  You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney's fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. 
-
-4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. 
-
-5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE.
- 
-6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict.  
-
-7. FEEDBACK. You may, but don't have to, provide to NVIDIA any Feedback. "Feedback" means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice.
-
-8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. 
-
-9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA'S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. 
-
-10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA's sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you.  
-
-11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. 
-
-12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA's permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. 
- 
-13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury's Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE.
-
-14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is "commercial items" consisting of "commercial computer software" and "commercial computer software documentation" provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. 
-
-15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party.
-
-(v. May 12, 2021)
diff --git a/cuda_bindings/MANIFEST.in b/cuda_bindings/MANIFEST.in
deleted file mode 100644
index ef9def92..00000000
--- a/cuda_bindings/MANIFEST.in
+++ /dev/null
@@ -1,4 +0,0 @@
-recursive-include cuda/ *.pyx *.pxd
-# at least with setuptools 75.0.0 this folder was added erroneously
-# to the payload, causing file copying to the build environment failed
-exclude cuda/bindings
diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md
deleted file mode 100644
index 1cbafb56..00000000
--- a/cuda_bindings/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# CUDA-Python
-
-CUDA Python is a standard set of low-level interfaces, providing full coverage of and access to the CUDA host APIs from Python. Checkout the [Overview](https://nvidia.github.io/cuda-python/overview.html) for the workflow and performance results.
-
-## Installing
-
-CUDA Python can be installed from:
-
-* PYPI
-* Conda (nvidia channel)
-* Source builds
-
-There're differences in each of these options that are described further in [Installation](https://nvidia.github.io/cuda-python/install.html) documentation. Each package will guarantee minor version compatibility.
-
-## Runtime Dependencies
-
-CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows:
-
-* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
-* CUDA Toolkit 12.0 to 12.6
-
-Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer.
-
-### Supported Python Versions
-
-CUDA Python follows [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html) for supported Python version guarantee.
-
-Before dropping support, an issue will be raised to look for feedback.
-
-Source builds work for multiple Python versions, however pre-build PyPI and Conda packages are only provided for a subset:
-
-* Python 3.9 to 3.12
-
-## Testing
-
-### Requirements
-
-Latest dependencies can be found in [requirements.txt](https://github.com/NVIDIA/cuda-python/blob/main/requirements.txt).
-
-### Unit-tests
-
-You can run the included tests with:
-
-```
-python -m pytest
-```
-
-### Benchmark
-
-You can run benchmark only tests with:
-
-```
-python -m pytest --benchmark-only
-```
-
-### Samples
-
-You can run the included tests with:
-
-```
-python -m pytest examples
-```
-
-## Examples
-
-CUDA Samples rewriten using CUDA Python are found in `examples`.
-
-Custom extra included examples:
-
-- `examples/extra/jit_program_test.py`: Demonstrates the use of the API to compile and
-  launch a kernel on the device. Includes device memory allocation /
-  deallocation, transfers between host and device, creation and usage of
-  streams, and context management.
-- `examples/extra/numba_emm_plugin.py`: Implements a Numba External Memory Management
-  plugin, showing that this CUDA Python Driver API can coexist with other
-  wrappers of the driver API.
diff --git a/cuda_bindings/benchmarks/__init__.py b/cuda_bindings/benchmarks/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/benchmarks/kernels.py b/cuda_bindings/benchmarks/kernels.py
deleted file mode 100644
index d31cc58a..00000000
--- a/cuda_bindings/benchmarks/kernels.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-kernel_string = '''\
-#define ITEM_PARAM(x, T) T x
-#define REP1(x, T)   , ITEM_PARAM(x, T)	
-#define REP2(x, T)   REP1(x##0, T)   REP1(x##1, T)
-#define REP4(x, T)   REP2(x##0, T)   REP2(x##1, T)
-#define REP8(x, T)   REP4(x##0, T)   REP4(x##1, T)
-#define REP16(x, T)  REP8(x##0, T)   REP8(x##1, T)
-#define REP32(x, T)  REP16(x##0, T)  REP16(x##1, T)
-#define REP64(x, T)  REP32(x##0, T)  REP32(x##1, T)
-#define REP128(x, T) REP64(x##0, T)  REP64(x##1, T)
-#define REP256(x, T) REP128(x##0, T) REP128(x##1, T)
-
-template<size_t maxBytes>
-struct KernelFunctionParam
-{
-   unsigned char p[maxBytes];
-};
-
-extern "C" __global__ void small_kernel(float *f)
-{
-   *f = 0.0f;
-}
-
-extern "C" __global__ void empty_kernel()
-{
-   return;
-}
-
-extern "C" __global__
-void small_kernel_512_args(
-	ITEM_PARAM(F, int*)
-	REP1(A, int*)
-	REP2(A, int*)
-	REP4(A, int*)
-	REP8(A, int*)
-	REP16(A, int*)
-	REP32(A, int*)
-	REP64(A, int*)
-	REP128(A, int*)
-	REP256(A, int*))
-{
-    *F = 0;
-}
-
-extern "C" __global__
-void small_kernel_512_bools(
-	ITEM_PARAM(F, bool)
-	REP1(A, bool)
-	REP2(A, bool)
-	REP4(A, bool)
-	REP8(A, bool)
-	REP16(A, bool)
-	REP32(A, bool)
-	REP64(A, bool)
-	REP128(A, bool)
-	REP256(A, bool))
-{
-    return;
-}
-
-extern "C" __global__
-void small_kernel_512_ints(
-	ITEM_PARAM(F, int)
-	REP1(A, int)
-	REP2(A, int)
-	REP4(A, int)
-	REP8(A, int)
-	REP16(A, int)
-	REP32(A, int)
-	REP64(A, int)
-	REP128(A, int)
-	REP256(A, int))
-{
-    return;
-}
-
-extern "C" __global__
-void small_kernel_512_doubles(
-	ITEM_PARAM(F, double)
-	REP1(A, double)
-	REP2(A, double)
-	REP4(A, double)
-	REP8(A, double)
-	REP16(A, double)
-	REP32(A, double)
-	REP64(A, double)
-	REP128(A, double)
-	REP256(A, double))
-{
-    return;
-}
-
-extern "C" __global__
-void small_kernel_512_chars(
-	ITEM_PARAM(F, char)
-	REP1(A, char)
-	REP2(A, char)
-	REP4(A, char)
-	REP8(A, char)
-	REP16(A, char)
-	REP32(A, char)
-	REP64(A, char)
-	REP128(A, char)
-	REP256(A, char))
-{
-    return;
-}
-
-extern "C" __global__
-void small_kernel_512_longlongs(
-	ITEM_PARAM(F, long long)
-	REP1(A, long long)
-	REP2(A, long long)
-	REP4(A, long long)
-	REP8(A, long long)
-	REP16(A, long long)
-	REP32(A, long long)
-	REP64(A, long long)
-	REP128(A, long long)
-	REP256(A, long long))
-{
-    return;
-}
-
-extern "C" __global__
-void small_kernel_256_args(
-	ITEM_PARAM(F, int*)
-	REP1(A, int*)
-	REP2(A, int*)
-	REP4(A, int*)
-	REP8(A, int*)
-	REP16(A, int*)
-	REP32(A, int*)
-	REP64(A, int*)
-	REP128(A, int*))
-{
-    *F = 0;
-}
-
-extern "C" __global__
-void small_kernel_16_args(
-	ITEM_PARAM(F, int*)
-	REP1(A, int*)
-	REP2(A, int*)
-	REP4(A, int*)
-	REP8(A, int*))
-{
-    *F = 0;
-}
-
-extern "C" __global__ void small_kernel_2048B(KernelFunctionParam<2048> param)
-{
-    // Do not touch param to prevent compiler from copying
-    // the whole structure from const bank to lmem.
-}
-'''
diff --git a/cuda_bindings/benchmarks/perf_test_utils.py b/cuda_bindings/benchmarks/perf_test_utils.py
deleted file mode 100644
index 11f5cc30..00000000
--- a/cuda_bindings/benchmarks/perf_test_utils.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-from cuda import cuda, cudart, nvrtc
-import numpy as np
-
-def ASSERT_DRV(err):
-    if isinstance(err, cuda.CUresult):
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            raise RuntimeError('Cuda Error: {}'.format(err))
-    elif isinstance(err, cudart.cudaError_t):
-        if err != cudart.cudaError_t.cudaSuccess:
-            raise RuntimeError('Cudart Error: {}'.format(err))
-    elif isinstance(err, nvrtc.nvrtcResult):
-        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError('Nvrtc Error: {}'.format(err))
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(err))
-
-@pytest.fixture
-def init_cuda():
-    # Initialize
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, device = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    ASSERT_DRV(err)
-
-    # create stream
-    err, stream = cuda.cuStreamCreate(cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value)
-    ASSERT_DRV(err)
-
-    yield device, ctx, stream
-
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(ctx)
-    ASSERT_DRV(err)
-
-@pytest.fixture
-def load_module():
-    module = None
-    def _load_module(kernel_string, device):
-        nonlocal module
-        # Get module
-        err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device)
-        ASSERT_DRV(err)
-        err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device)
-        ASSERT_DRV(err)
-
-        err, prog = nvrtc.nvrtcCreateProgram(str.encode(kernel_string), b'kernelString.cu', 0, [], [])
-        ASSERT_DRV(err)
-        opts = [b'--fmad=false', bytes('--gpu-architecture=sm_' + str(major) + str(minor), 'ascii')]
-        err, = nvrtc.nvrtcCompileProgram(prog, 2, opts)
-
-        err_log, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
-        ASSERT_DRV(err_log)
-        log = b' ' * logSize
-        err_log, = nvrtc.nvrtcGetProgramLog(prog, log)
-        ASSERT_DRV(err_log)
-        result = log.decode()
-        if len(result) > 1:
-            print(result)
-
-        ASSERT_DRV(err)
-        err, cubinSize = nvrtc.nvrtcGetCUBINSize(prog)
-        ASSERT_DRV(err)
-        cubin = b' ' * cubinSize
-        err, = nvrtc.nvrtcGetCUBIN(prog, cubin)
-        ASSERT_DRV(err)
-        cubin = np.char.array(cubin)
-        err, module = cuda.cuModuleLoadData(cubin)
-        ASSERT_DRV(err)
-
-        return module
-
-    yield _load_module
-
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
diff --git a/cuda_bindings/benchmarks/pytest.ini b/cuda_bindings/benchmarks/pytest.ini
deleted file mode 100644
index a74acd3e..00000000
--- a/cuda_bindings/benchmarks/pytest.ini
+++ /dev/null
@@ -1,3 +0,0 @@
-[pytest]
-required_plugins = pytest-benchmark
-addopts = --benchmark-skip
diff --git a/cuda_bindings/benchmarks/test_cupy.py b/cuda_bindings/benchmarks/test_cupy.py
deleted file mode 100644
index 43aaa5b1..00000000
--- a/cuda_bindings/benchmarks/test_cupy.py
+++ /dev/null
@@ -1,193 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-import ctypes
-
-# Always skip since cupy is not CTK 12.x yet
-skip_tests = True
-if not skip_tests:
-    try:
-        import cupy
-        skip_tests = False
-    except ImportError:
-        skip_tests = True
-
-from .kernels import kernel_string
-
-def launch(kernel, args=()):
-    kernel((1,), (1,), args)
-
-# Measure launch latency with no parmaeters
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_empty_kernel(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('empty_kernel')
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel)
-        stream.synchronize()
-
-# Measure launch latency with a single parameter
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel')
-    cupy.cuda.set_allocator()
-    arg = cupy.cuda.alloc(ctypes.sizeof(ctypes.c_float))
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, (arg,))
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_args(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_args')
-    cupy.cuda.set_allocator()
-
-    args = []
-    for _ in range(512):
-        args.append(cupy.cuda.alloc(ctypes.sizeof(ctypes.c_int)))
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_bools(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_bools')
-    cupy.cuda.set_allocator()
-
-    args = [True] * 512
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_doubles(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_doubles')
-    cupy.cuda.set_allocator()
-
-    args = [1.2345] * 512
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_ints(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_ints')
-    cupy.cuda.set_allocator()
-
-    args = [123] * 512
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_bytes(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_chars')
-    cupy.cuda.set_allocator()
-
-    args = [127] * 512
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_512_longlongs(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_512_longlongs')
-    cupy.cuda.set_allocator()
-
-    args = [9223372036854775806] * 512
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_256_args(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_256_args')
-    cupy.cuda.set_allocator()
-
-    args = []
-    for _ in range(256):
-        args.append(cupy.cuda.alloc(ctypes.sizeof(ctypes.c_int)))
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.skipif(skip_tests, reason="cupy is not installed")
-@pytest.mark.benchmark(group="cupy")
-def test_launch_latency_small_kernel_16_args(benchmark):
-    module = cupy.RawModule(code=kernel_string)
-    kernel = module.get_function('small_kernel_16_args')
-    cupy.cuda.set_allocator()
-
-    args = []
-    for _ in range(16):
-        args.append(cupy.cuda.alloc(ctypes.sizeof(ctypes.c_int)))
-    args = tuple(args)
-
-    stream = cupy.cuda.stream.Stream(non_blocking=True)
-
-    with stream:
-        benchmark(launch, kernel, args)
-        stream.synchronize()
diff --git a/cuda_bindings/benchmarks/test_launch_latency.py b/cuda_bindings/benchmarks/test_launch_latency.py
deleted file mode 100755
index 79668a63..00000000
--- a/cuda_bindings/benchmarks/test_launch_latency.py
+++ /dev/null
@@ -1,311 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-from cuda import cuda
-import ctypes
-
-from .perf_test_utils import ASSERT_DRV, init_cuda, load_module
-from .kernels import kernel_string
-
-def launch(kernel, stream, args=(), arg_types=()):
-    cuda.cuLaunchKernel(kernel,
-                        1, 1, 1,   # grid dim
-                        1, 1, 1,   # block dim
-                        0, stream, # shared mem and stream
-                        (args, arg_types), 0) # arguments
-
-def launch_packed(kernel, stream, params):
-    cuda.cuLaunchKernel(kernel,
-                        1, 1, 1,   # grid dim
-                        1, 1, 1,   # block dim
-                        0, stream, # shared mem and stream
-                        params, 0) # arguments
-
-# Measure launch latency with no parmaeters
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_empty_kernel(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'empty_kernel')
-    ASSERT_DRV(err)
-
-    benchmark(launch, func, stream)
-
-    cuda.cuCtxSynchronize()
-
-# Measure launch latency with a single parameter
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel')
-    ASSERT_DRV(err)
-
-    err, f = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_float))
-    ASSERT_DRV(err)
-
-    benchmark(launch, func, stream, args=(f,), arg_types=(None,))
-
-    cuda.cuCtxSynchronize()
-
-    err, = cuda.cuMemFree(f)
-    ASSERT_DRV(err)
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_args(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_args')
-    ASSERT_DRV(err)
-
-    args = []
-    arg_types = [None] * 512
-    for _ in arg_types:
-        err, p = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-        ASSERT_DRV(err)
-        args.append(p)
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-    for p in args:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_bools(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_bools')
-    ASSERT_DRV(err)
-
-    args = [True] * 512
-    arg_types = [ctypes.c_bool] * 512
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_doubles(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_doubles')
-    ASSERT_DRV(err)
-
-    args = [1.2345] * 512
-    arg_types = [ctypes.c_double] * 512
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_ints(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_ints')
-    ASSERT_DRV(err)
-
-    args = [123] * 512
-    arg_types = [ctypes.c_int] * 512
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_bytes(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_chars')
-    ASSERT_DRV(err)
-
-    args = [127] * 512
-    arg_types = [ctypes.c_byte] * 512
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_longlongs(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_longlongs')
-    ASSERT_DRV(err)
-
-    args = [9223372036854775806] * 512
-    arg_types = [ctypes.c_longlong] * 512
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_256_args(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_256_args')
-    ASSERT_DRV(err)
-
-    args = []
-    arg_types = [None] * 256
-    for _ in arg_types:
-        err, p = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-        ASSERT_DRV(err)
-        args.append(p)
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-    for p in args:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-# Measure launch latency with many parameters using builtin parameter packing
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_16_args(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_16_args')
-    ASSERT_DRV(err)
-
-    args = []
-    arg_types = [None] * 16
-    for _ in arg_types:
-        err, p = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-        ASSERT_DRV(err)
-        args.append(p)
-
-    args = tuple(args)
-    arg_types = tuple(arg_types)
-
-    benchmark(launch, func, stream, args=args, arg_types=arg_types)
-
-    cuda.cuCtxSynchronize()
-
-    for p in args:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-# Measure launch latency with many parameters, excluding parameter packing
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_args_ctypes(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_args')
-    ASSERT_DRV(err)
-
-    vals = []
-    val_ps = []
-    for i in range(512):
-        err, p = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-        ASSERT_DRV(err)
-        vals.append(p)
-        val_ps.append(ctypes.c_void_p(int(vals[i])))
-
-    packagedParams = (ctypes.c_void_p * 512)()
-    for i in range(512):
-        packagedParams[i] = ctypes.addressof(val_ps[i])
-
-    benchmark(launch_packed, func, stream, packagedParams)
-
-    cuda.cuCtxSynchronize()
-
-    for p in vals:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-def pack_and_launch(kernel, stream, params):
-    packed_params = (ctypes.c_void_p * len(params))()
-    ptrs = [0] * len(params)
-    for i in range(len(params)):
-        ptrs[i] = ctypes.c_void_p(int(params[i]))
-        packed_params[i] = ctypes.addressof(ptrs[i])
-
-    cuda.cuLaunchKernel(kernel,
-                        1, 1, 1,
-                        1, 1, 1,
-                        0, stream,
-                        packed_params, 0)
-
-# Measure launch latency plus parameter packing using ctypes
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_512_args_ctypes_with_packing(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_512_args')
-    ASSERT_DRV(err)
-
-    vals = []
-    for i in range(512):
-        err, p = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-        ASSERT_DRV(err)
-        vals.append(p)
-
-    benchmark(pack_and_launch, func, stream, vals)
-
-    cuda.cuCtxSynchronize()
-
-    for p in vals:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-# Measure launch latency with a single large struct parameter
-@pytest.mark.benchmark(group="launch-latency")
-def test_launch_latency_small_kernel_2048B(benchmark, init_cuda, load_module):
-    device, ctx, stream = init_cuda
-    module = load_module(kernel_string, device)
-
-    err, func = cuda.cuModuleGetFunction(module, b'small_kernel_2048B')
-    ASSERT_DRV(err)
-
-    class struct_2048B(ctypes.Structure):
-        _fields_ = [('values',ctypes.c_uint8 * 2048)]
-
-    benchmark(launch, func, stream, args=(struct_2048B(),), arg_types=(None,))
-
-    cuda.cuCtxSynchronize()
diff --git a/cuda_bindings/benchmarks/test_numba.py b/cuda_bindings/benchmarks/test_numba.py
deleted file mode 100644
index f0dd1231..00000000
--- a/cuda_bindings/benchmarks/test_numba.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-import numpy as np
-try:
-    from numba import cuda
-    skip_tests = False
-except ImportError:
-    skip_tests = True
-
-def launch_empty(kernel, stream):
-    kernel[1,1, stream]()
-
-def launch(kernel, stream, arg):
-    kernel[1,1, stream](arg)
-
-# Measure launch latency with no parmaeters
-@pytest.mark.skipif(skip_tests, reason="Numba is not installed")
-@pytest.mark.benchmark(group="numba", min_rounds=1000)
-def test_launch_latency_empty_kernel(benchmark):
-    stream = cuda.stream()
-
-    @cuda.jit
-    def empty_kernel():
-        return
-
-    benchmark(launch_empty, empty_kernel, stream)
-
-    cuda.synchronize()
-
-# Measure launch latency with a single parameter
-@pytest.mark.skipif(skip_tests, reason="Numba is not installed")
-@pytest.mark.benchmark(group="numba", min_rounds=1000)
-def test_launch_latency_small_kernel(benchmark):
-    stream = cuda.stream()
-
-    arg = cuda.device_array(1, dtype=np.float32, stream=stream)
-
-    @cuda.jit
-    def small_kernel(array):
-        array[0] = 0.0
-
-    benchmark(launch, small_kernel, stream, arg)
-
-    cuda.synchronize()
diff --git a/cuda_bindings/benchmarks/test_pointer_attributes.py b/cuda_bindings/benchmarks/test_pointer_attributes.py
deleted file mode 100644
index 72de3964..00000000
--- a/cuda_bindings/benchmarks/test_pointer_attributes.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-from cuda import cuda
-import ctypes
-import random
-
-from .perf_test_utils import ASSERT_DRV, init_cuda
-
-random.seed(0)
-
-idx = 0
-def query_attribute(attribute, ptrs):
-    global idx
-    ptr = ptrs[idx]
-    idx = (idx + 1 ) % len(ptrs)
-
-    cuda.cuPointerGetAttribute(attribute, ptr)
-
-def query_attributes(attributes, ptrs):
-    global idx
-    ptr = ptrs[idx]
-    idx = (idx + 1 ) % len(ptrs)
-
-    cuda.cuPointerGetAttributes(len(attributes), attributes, ptr)
-
-@pytest.mark.benchmark(group="pointer-attributes")
-# Measure cuPointerGetAttribute in the same way as C benchmarks
-def test_pointer_get_attribute(benchmark, init_cuda):
-    _ = init_cuda
-
-    ptrs = []
-    for _ in range(500):
-        err, ptr = cuda.cuMemAlloc(1 << 18)
-        ASSERT_DRV(err)
-        ptrs.append(ptr)
-
-    random.shuffle(ptrs)
-
-    benchmark(query_attribute, cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE, ptrs)
-
-    for p in ptrs:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-@pytest.mark.benchmark(group="pointer-attributes")
-# Measure cuPointerGetAttributes with all attributes
-def test_pointer_get_attributes_all(benchmark, init_cuda):
-    _ = init_cuda
-
-    ptrs = []
-    for _ in range(500):
-        err, ptr = cuda.cuMemAlloc(1 << 18)
-        ASSERT_DRV(err)
-        ptrs.append(ptr)
-
-    random.shuffle(ptrs)
-
-    attributes = [cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_CONTEXT,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_HOST_POINTER,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_P2P_TOKENS,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_BUFFER_ID,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_SIZE,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MAPPED,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS,
-                  cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE]
-
-    benchmark(query_attributes, attributes, ptrs)
-
-    for p in ptrs:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
-
-@pytest.mark.benchmark(group="pointer-attributes")
-# Measure cuPointerGetAttributes with a single attribute
-def test_pointer_get_attributes_single(benchmark, init_cuda):
-    _ = init_cuda
-
-    ptrs = []
-    for _ in range(500):
-        err, ptr = cuda.cuMemAlloc(1 << 18)
-        ASSERT_DRV(err)
-        ptrs.append(ptr)
-
-    random.shuffle(ptrs)
-
-    attributes = [cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,]
-
-    benchmark(query_attributes, attributes, ptrs)
-
-    for p in ptrs:
-        err, = cuda.cuMemFree(p)
-        ASSERT_DRV(err)
diff --git a/cuda_bindings/cuda/__init__.pxd b/cuda_bindings/cuda/__init__.pxd
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/cuda/__init__.py b/cuda_bindings/cuda/__init__.py
deleted file mode 100644
index 8b302752..00000000
--- a/cuda_bindings/cuda/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-def __getattr__(name):
-    if name == "__version__":
-        import warnings
-        warnings.warn("accessing cuda.__version__ is deprecated, "
-                      "please switch to use cuda.bindings.__version__ instead",
-                      DeprecationWarning, stacklevel=2)
-        from . import bindings
-        return bindings.__version__
-
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/cuda_bindings/cuda/bindings/__init__.pxd b/cuda_bindings/cuda/bindings/__init__.pxd
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/cuda/bindings/__init__.py b/cuda_bindings/cuda/bindings/__init__.py
deleted file mode 100644
index ecd3379a..00000000
--- a/cuda_bindings/cuda/bindings/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-
-from . import _version
-__version__ = _version.get_versions()['version']
diff --git a/cuda_bindings/cuda/bindings/_bindings/__init__.py b/cuda_bindings/cuda/bindings/_bindings/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
deleted file mode 100644
index 21cdb520..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in
+++ /dev/null
@@ -1,2289 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cuda.bindings.cydriver cimport *
-
-{{if 'cuGetErrorString' in found_functions}}
-
-cdef CUresult _cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetErrorName' in found_functions}}
-
-cdef CUresult _cuGetErrorName(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuInit' in found_functions}}
-
-cdef CUresult _cuInit(unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDriverGetVersion' in found_functions}}
-
-cdef CUresult _cuDriverGetVersion(int* driverVersion) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGet' in found_functions}}
-
-cdef CUresult _cuDeviceGet(CUdevice* device, int ordinal) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetCount' in found_functions}}
-
-cdef CUresult _cuDeviceGetCount(int* count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetName' in found_functions}}
-
-cdef CUresult _cuDeviceGetName(char* name, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetUuid' in found_functions}}
-
-cdef CUresult _cuDeviceGetUuid(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetUuid_v2' in found_functions}}
-
-cdef CUresult _cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetLuid' in found_functions}}
-
-cdef CUresult _cuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceTotalMem_v2' in found_functions}}
-
-cdef CUresult _cuDeviceTotalMem_v2(size_t* numbytes, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef CUresult _cuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format pformat, unsigned numChannels, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef CUresult _cuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceSetMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-
-cdef CUresult _cuDeviceGetExecAffinitySupport(int* pi, CUexecAffinityType typename, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetProperties' in found_functions}}
-
-cdef CUresult _cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceComputeCapability' in found_functions}}
-
-cdef CUresult _cuDeviceComputeCapability(int* major, int* minor, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxRelease_v2(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxReset_v2(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v2' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v3' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v4' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDestroy_v2' in found_functions}}
-
-cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxPushCurrent_v2' in found_functions}}
-
-cdef CUresult _cuCtxPushCurrent_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxPopCurrent_v2' in found_functions}}
-
-cdef CUresult _cuCtxPopCurrent_v2(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetCurrent' in found_functions}}
-
-cdef CUresult _cuCtxSetCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetCurrent' in found_functions}}
-
-cdef CUresult _cuCtxGetCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetDevice' in found_functions}}
-
-cdef CUresult _cuCtxGetDevice(CUdevice* device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetFlags' in found_functions}}
-
-cdef CUresult _cuCtxGetFlags(unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetFlags' in found_functions}}
-
-cdef CUresult _cuCtxSetFlags(unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetId' in found_functions}}
-
-cdef CUresult _cuCtxGetId(CUcontext ctx, unsigned long long* ctxId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSynchronize' in found_functions}}
-
-cdef CUresult _cuCtxSynchronize() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetLimit' in found_functions}}
-
-cdef CUresult _cuCtxSetLimit(CUlimit limit, size_t value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetLimit' in found_functions}}
-
-cdef CUresult _cuCtxGetLimit(size_t* pvalue, CUlimit limit) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetCacheConfig' in found_functions}}
-
-cdef CUresult _cuCtxGetCacheConfig(CUfunc_cache* pconfig) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuCtxSetCacheConfig(CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetApiVersion' in found_functions}}
-
-cdef CUresult _cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-
-cdef CUresult _cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-
-cdef CUresult _cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetExecAffinity' in found_functions}}
-
-cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxRecordEvent' in found_functions}}
-
-cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxWaitEvent' in found_functions}}
-
-cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxAttach' in found_functions}}
-
-cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDetach' in found_functions}}
-
-cdef CUresult _cuCtxDetach(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuCtxSetSharedMemConfig(CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoad' in found_functions}}
-
-cdef CUresult _cuModuleLoad(CUmodule* module, const char* fname) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadData' in found_functions}}
-
-cdef CUresult _cuModuleLoadData(CUmodule* module, const void* image) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadDataEx' in found_functions}}
-
-cdef CUresult _cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadFatBinary' in found_functions}}
-
-cdef CUresult _cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleUnload' in found_functions}}
-
-cdef CUresult _cuModuleUnload(CUmodule hmod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetLoadingMode' in found_functions}}
-
-cdef CUresult _cuModuleGetLoadingMode(CUmoduleLoadingMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetFunction' in found_functions}}
-
-cdef CUresult _cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetFunctionCount' in found_functions}}
-
-cdef CUresult _cuModuleGetFunctionCount(unsigned int* count, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleEnumerateFunctions' in found_functions}}
-
-cdef CUresult _cuModuleEnumerateFunctions(CUfunction* functions, unsigned int numFunctions, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetGlobal_v2' in found_functions}}
-
-cdef CUresult _cuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* numbytes, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkCreate_v2' in found_functions}}
-
-cdef CUresult _cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkAddData_v2' in found_functions}}
-
-cdef CUresult _cuLinkAddData_v2(CUlinkState state, CUjitInputType typename, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkAddFile_v2' in found_functions}}
-
-cdef CUresult _cuLinkAddFile_v2(CUlinkState state, CUjitInputType typename, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkComplete' in found_functions}}
-
-cdef CUresult _cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkDestroy' in found_functions}}
-
-cdef CUresult _cuLinkDestroy(CUlinkState state) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetTexRef' in found_functions}}
-
-cdef CUresult _cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetSurfRef' in found_functions}}
-
-cdef CUresult _cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryLoadData' in found_functions}}
-
-cdef CUresult _cuLibraryLoadData(CUlibrary* library, const void* code, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryLoadFromFile' in found_functions}}
-
-cdef CUresult _cuLibraryLoadFromFile(CUlibrary* library, const char* fileName, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryUnload' in found_functions}}
-
-cdef CUresult _cuLibraryUnload(CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetKernel' in found_functions}}
-
-cdef CUresult _cuLibraryGetKernel(CUkernel* pKernel, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetKernelCount' in found_functions}}
-
-cdef CUresult _cuLibraryGetKernelCount(unsigned int* count, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryEnumerateKernels' in found_functions}}
-
-cdef CUresult _cuLibraryEnumerateKernels(CUkernel* kernels, unsigned int numKernels, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetModule' in found_functions}}
-
-cdef CUresult _cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetFunction' in found_functions}}
-
-cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetLibrary' in found_functions}}
-
-cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetGlobal' in found_functions}}
-
-cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetManaged' in found_functions}}
-
-cdef CUresult _cuLibraryGetManaged(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-
-cdef CUresult _cuLibraryGetUnifiedFunction(void** fptr, CUlibrary library, const char* symbol) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetAttribute' in found_functions}}
-
-cdef CUresult _cuKernelGetAttribute(int* pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelSetAttribute' in found_functions}}
-
-cdef CUresult _cuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetName' in found_functions}}
-
-cdef CUresult _cuKernelGetName(const char** name, CUkernel hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetParamInfo' in found_functions}}
-
-cdef CUresult _cuKernelGetParamInfo(CUkernel kernel, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetInfo_v2' in found_functions}}
-
-cdef CUresult _cuMemGetInfo_v2(size_t* free, size_t* total) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAlloc_v2' in found_functions}}
-
-cdef CUresult _cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocPitch_v2' in found_functions}}
-
-cdef CUresult _cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFree_v2' in found_functions}}
-
-cdef CUresult _cuMemFree_v2(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAddressRange_v2' in found_functions}}
-
-cdef CUresult _cuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocHost_v2' in found_functions}}
-
-cdef CUresult _cuMemAllocHost_v2(void** pp, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFreeHost' in found_functions}}
-
-cdef CUresult _cuMemFreeHost(void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostAlloc' in found_functions}}
-
-cdef CUresult _cuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-
-cdef CUresult _cuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostGetFlags' in found_functions}}
-
-cdef CUresult _cuMemHostGetFlags(unsigned int* pFlags, void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocManaged' in found_functions}}
-
-cdef CUresult _cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef CUresult _cuDeviceRegisterAsyncNotification(CUdevice device, CUasyncCallback callbackFunc, void* userData, CUasyncCallbackHandle* callback) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef CUresult _cuDeviceUnregisterAsyncNotification(CUdevice device, CUasyncCallbackHandle callback) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}
-
-cdef CUresult _cuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetPCIBusId' in found_functions}}
-
-cdef CUresult _cuDeviceGetPCIBusId(char* pciBusId, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcGetEventHandle' in found_functions}}
-
-cdef CUresult _cuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcOpenEventHandle' in found_functions}}
-
-cdef CUresult _cuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcGetMemHandle' in found_functions}}
-
-cdef CUresult _cuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-
-cdef CUresult _cuIpcOpenMemHandle_v2(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcCloseMemHandle' in found_functions}}
-
-cdef CUresult _cuIpcCloseMemHandle(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostRegister_v2' in found_functions}}
-
-cdef CUresult _cuMemHostRegister_v2(void* p, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostUnregister' in found_functions}}
-
-cdef CUresult _cuMemHostUnregister(void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy' in found_functions}}
-
-cdef CUresult _cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyPeer' in found_functions}}
-
-cdef CUresult _cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoH_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoH_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2D_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3D_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DPeer' in found_functions}}
-
-cdef CUresult _cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAsync' in found_functions}}
-
-cdef CUresult _cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyPeerAsync' in found_functions}}
-
-cdef CUresult _cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}
-
-cdef CUresult _cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD8_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD16_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD32_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D8_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D16_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D32_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD8Async' in found_functions}}
-
-cdef CUresult _cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD16Async' in found_functions}}
-
-cdef CUresult _cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD32Async' in found_functions}}
-
-cdef CUresult _cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D8Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D16Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D32Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayCreate_v2' in found_functions}}
-
-cdef CUresult _cuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}
-
-cdef CUresult _cuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult _cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult _cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUarray array, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUmipmappedArray mipmap, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetPlane' in found_functions}}
-
-cdef CUresult _cuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayDestroy' in found_functions}}
-
-cdef CUresult _cuArrayDestroy(CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArray3DCreate_v2' in found_functions}}
-
-cdef CUresult _cuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-
-cdef CUresult _cuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayCreate' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayDestroy' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}
-
-cdef CUresult _cuMemGetHandleForAddressRange(void* handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAddressReserve' in found_functions}}
-
-cdef CUresult _cuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAddressFree' in found_functions}}
-
-cdef CUresult _cuMemAddressFree(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemCreate' in found_functions}}
-
-cdef CUresult _cuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRelease' in found_functions}}
-
-cdef CUresult _cuMemRelease(CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemMap' in found_functions}}
-
-cdef CUresult _cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemMapArrayAsync' in found_functions}}
-
-cdef CUresult _cuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemUnmap' in found_functions}}
-
-cdef CUresult _cuMemUnmap(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemSetAccess' in found_functions}}
-
-cdef CUresult _cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAccess' in found_functions}}
-
-cdef CUresult _cuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemExportToShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemImportFromShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAllocationGranularity' in found_functions}}
-
-cdef CUresult _cuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-
-cdef CUresult _cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRetainAllocationHandle' in found_functions}}
-
-cdef CUresult _cuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFreeAsync' in found_functions}}
-
-cdef CUresult _cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocAsync' in found_functions}}
-
-cdef CUresult _cuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolTrimTo' in found_functions}}
-
-cdef CUresult _cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolSetAttribute' in found_functions}}
-
-cdef CUresult _cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolGetAttribute' in found_functions}}
-
-cdef CUresult _cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolSetAccess' in found_functions}}
-
-cdef CUresult _cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolGetAccess' in found_functions}}
-
-cdef CUresult _cuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolCreate' in found_functions}}
-
-cdef CUresult _cuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolDestroy' in found_functions}}
-
-cdef CUresult _cuMemPoolDestroy(CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}
-
-cdef CUresult _cuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolExportPointer' in found_functions}}
-
-cdef CUresult _cuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolImportPointer' in found_functions}}
-
-cdef CUresult _cuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastCreate' in found_functions}}
-
-cdef CUresult _cuMulticastCreate(CUmemGenericAllocationHandle* mcHandle, const CUmulticastObjectProp* prop) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastAddDevice' in found_functions}}
-
-cdef CUresult _cuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastBindMem' in found_functions}}
-
-cdef CUresult _cuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastBindAddr' in found_functions}}
-
-cdef CUresult _cuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastUnbind' in found_functions}}
-
-cdef CUresult _cuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastGetGranularity' in found_functions}}
-
-cdef CUresult _cuMulticastGetGranularity(size_t* granularity, const CUmulticastObjectProp* prop, CUmulticastGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerGetAttribute' in found_functions}}
-
-cdef CUresult _cuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPrefetchAsync' in found_functions}}
-
-cdef CUresult _cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAdvise' in found_functions}}
-
-cdef CUresult _cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAdvise_v2' in found_functions}}
-
-cdef CUresult _cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRangeGetAttribute' in found_functions}}
-
-cdef CUresult _cuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRangeGetAttributes' in found_functions}}
-
-cdef CUresult _cuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerSetAttribute' in found_functions}}
-
-cdef CUresult _cuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerGetAttributes' in found_functions}}
-
-cdef CUresult _cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCreate' in found_functions}}
-
-cdef CUresult _cuStreamCreate(CUstream* phStream, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCreateWithPriority' in found_functions}}
-
-cdef CUresult _cuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetPriority' in found_functions}}
-
-cdef CUresult _cuStreamGetPriority(CUstream hStream, int* priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetFlags' in found_functions}}
-
-cdef CUresult _cuStreamGetFlags(CUstream hStream, unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetId' in found_functions}}
-
-cdef CUresult _cuStreamGetId(CUstream hStream, unsigned long long* streamId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCtx' in found_functions}}
-
-cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCtx_v2' in found_functions}}
-
-cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitEvent' in found_functions}}
-
-cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamAddCallback' in found_functions}}
-
-cdef CUresult _cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBeginCapture_v2' in found_functions}}
-
-cdef CUresult _cuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-
-cdef CUresult _cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef CUresult _cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamEndCapture' in found_functions}}
-
-cdef CUresult _cuStreamEndCapture(CUstream hStream, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamIsCapturing' in found_functions}}
-
-cdef CUresult _cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef CUresult _cuStreamGetCaptureInfo_v2(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef CUresult _cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, const CUgraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef CUresult _cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode* dependencies, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef CUresult _cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamAttachMemAsync' in found_functions}}
-
-cdef CUresult _cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamQuery' in found_functions}}
-
-cdef CUresult _cuStreamQuery(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamSynchronize' in found_functions}}
-
-cdef CUresult _cuStreamSynchronize(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamDestroy_v2' in found_functions}}
-
-cdef CUresult _cuStreamDestroy_v2(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCopyAttributes' in found_functions}}
-
-cdef CUresult _cuStreamCopyAttributes(CUstream dst, CUstream src) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetAttribute' in found_functions}}
-
-cdef CUresult _cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamSetAttribute' in found_functions}}
-
-cdef CUresult _cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventCreate' in found_functions}}
-
-cdef CUresult _cuEventCreate(CUevent* phEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventRecord' in found_functions}}
-
-cdef CUresult _cuEventRecord(CUevent hEvent, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventRecordWithFlags' in found_functions}}
-
-cdef CUresult _cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventQuery' in found_functions}}
-
-cdef CUresult _cuEventQuery(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventSynchronize' in found_functions}}
-
-cdef CUresult _cuEventSynchronize(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventDestroy_v2' in found_functions}}
-
-cdef CUresult _cuEventDestroy_v2(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventElapsedTime' in found_functions}}
-
-cdef CUresult _cuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuImportExternalMemory' in found_functions}}
-
-cdef CUresult _cuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef CUresult _cuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult _cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDestroyExternalMemory' in found_functions}}
-
-cdef CUresult _cuDestroyExternalMemory(CUexternalMemory extMem) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuImportExternalSemaphore' in found_functions}}
-
-cdef CUresult _cuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult _cuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult _cuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDestroyExternalSemaphore' in found_functions}}
-
-cdef CUresult _cuDestroyExternalSemaphore(CUexternalSemaphore extSem) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitValue32_v2' in found_functions}}
-
-cdef CUresult _cuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitValue64_v2' in found_functions}}
-
-cdef CUresult _cuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWriteValue32_v2' in found_functions}}
-
-cdef CUresult _cuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWriteValue64_v2' in found_functions}}
-
-cdef CUresult _cuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}
-
-cdef CUresult _cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetAttribute' in found_functions}}
-
-cdef CUresult _cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetAttribute' in found_functions}}
-
-cdef CUresult _cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetModule' in found_functions}}
-
-cdef CUresult _cuFuncGetModule(CUmodule* hmod, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetName' in found_functions}}
-
-cdef CUresult _cuFuncGetName(const char** name, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetParamInfo' in found_functions}}
-
-cdef CUresult _cuFuncGetParamInfo(CUfunction func, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncIsLoaded' in found_functions}}
-
-cdef CUresult _cuFuncIsLoaded(CUfunctionLoadingState* state, CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncLoad' in found_functions}}
-
-cdef CUresult _cuFuncLoad(CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchKernel' in found_functions}}
-
-cdef CUresult _cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchKernelEx' in found_functions}}
-
-cdef CUresult _cuLaunchKernelEx(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernel' in found_functions}}
-
-cdef CUresult _cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-
-cdef CUresult _cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchHostFunc' in found_functions}}
-
-cdef CUresult _cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetBlockShape' in found_functions}}
-
-cdef CUresult _cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetSharedSize' in found_functions}}
-
-cdef CUresult _cuFuncSetSharedSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetSize' in found_functions}}
-
-cdef CUresult _cuParamSetSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSeti' in found_functions}}
-
-cdef CUresult _cuParamSeti(CUfunction hfunc, int offset, unsigned int value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetf' in found_functions}}
-
-cdef CUresult _cuParamSetf(CUfunction hfunc, int offset, float value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetv' in found_functions}}
-
-cdef CUresult _cuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunch' in found_functions}}
-
-cdef CUresult _cuLaunch(CUfunction f) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchGrid' in found_functions}}
-
-cdef CUresult _cuLaunchGrid(CUfunction f, int grid_width, int grid_height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchGridAsync' in found_functions}}
-
-cdef CUresult _cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetTexRef' in found_functions}}
-
-cdef CUresult _cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphCreate' in found_functions}}
-
-cdef CUresult _cuGraphCreate(CUgraph* phGraph, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddKernelNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeGetParams_v2(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeSetParams_v2(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemcpyNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemsetNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddHostNode' in found_functions}}
-
-cdef CUresult _cuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphHostNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphHostNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddChildGraphNode' in found_functions}}
-
-cdef CUresult _cuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef CUresult _cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEmptyNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEventRecordNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEventWaitNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef CUresult _cuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef CUresult _cuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-
-cdef CUresult _cuGraphAddBatchMemOpNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemAllocNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemAllocNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemFreeNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemFreeNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr* dptr_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGraphMemTrim' in found_functions}}
-
-cdef CUresult _cuDeviceGraphMemTrim(CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphClone' in found_functions}}
-
-cdef CUresult _cuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeFindInClone' in found_functions}}
-
-cdef CUresult _cuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetType' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetNodes' in found_functions}}
-
-cdef CUresult _cuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetRootNodes' in found_functions}}
-
-cdef CUresult _cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetEdges' in found_functions}}
-
-cdef CUresult _cuGraphGetEdges(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetEdges_v2' in found_functions}}
-
-cdef CUresult _cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, CUgraphEdgeData* edgeData, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode* dependencies, CUgraphEdgeData* edgeData, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode* dependentNodes, CUgraphEdgeData* edgeData, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddDependencies' in found_functions}}
-
-cdef CUresult _cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies' in found_functions}}
-
-cdef CUresult _cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDestroyNode' in found_functions}}
-
-cdef CUresult _cuGraphDestroyNode(CUgraphNode hNode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}
-
-cdef CUresult _cuGraphInstantiateWithFlags(CUgraphExec* phGraphExec, CUgraph hGraph, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphInstantiateWithParams' in found_functions}}
-
-cdef CUresult _cuGraphInstantiateWithParams(CUgraphExec* phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS* instantiateParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecGetFlags' in found_functions}}
-
-cdef CUresult _cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphExecKernelNodeSetParams_v2(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeSetEnabled' in found_functions}}
-
-cdef CUresult _cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetEnabled' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int* isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphUpload' in found_functions}}
-
-cdef CUresult _cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphLaunch' in found_functions}}
-
-cdef CUresult _cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecDestroy' in found_functions}}
-
-cdef CUresult _cuGraphExecDestroy(CUgraphExec hGraphExec) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDestroy' in found_functions}}
-
-cdef CUresult _cuGraphDestroy(CUgraph hGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecUpdate_v2' in found_functions}}
-
-cdef CUresult _cuGraphExecUpdate_v2(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo* resultInfo) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDebugDotPrint' in found_functions}}
-
-cdef CUresult _cuGraphDebugDotPrint(CUgraph hGraph, const char* path, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectCreate' in found_functions}}
-
-cdef CUresult _cuUserObjectCreate(CUuserObject* object_out, void* ptr, CUhostFn destroy, unsigned int initialRefcount, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectRetain' in found_functions}}
-
-cdef CUresult _cuUserObjectRetain(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectRelease' in found_functions}}
-
-cdef CUresult _cuUserObjectRelease(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRetainUserObject' in found_functions}}
-
-cdef CUresult _cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphReleaseUserObject' in found_functions}}
-
-cdef CUresult _cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddNode' in found_functions}}
-
-cdef CUresult _cuGraphAddNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddNode_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}
-
-cdef CUresult _cuGraphConditionalHandleCreate(CUgraphConditionalHandle* pHandle_out, CUgraph hGraph, CUcontext ctx, unsigned int defaultLaunchValue, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef CUresult _cuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialClusterSize(int* clusterSize, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveClusters(int* numClusters, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetArray' in found_functions}}
-
-cdef CUresult _cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddress_v2' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFormat' in found_functions}}
-
-cdef CUresult _cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddressMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-
-cdef CUresult _cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetBorderColor' in found_functions}}
-
-cdef CUresult _cuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFlags' in found_functions}}
-
-cdef CUresult _cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetAddress_v2' in found_functions}}
-
-cdef CUresult _cuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetArray' in found_functions}}
-
-cdef CUresult _cuTexRefGetArray(CUarray* phArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetAddressMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFormat' in found_functions}}
-
-cdef CUresult _cuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-
-cdef CUresult _cuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetBorderColor' in found_functions}}
-
-cdef CUresult _cuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFlags' in found_functions}}
-
-cdef CUresult _cuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefCreate' in found_functions}}
-
-cdef CUresult _cuTexRefCreate(CUtexref* pTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefDestroy' in found_functions}}
-
-cdef CUresult _cuTexRefDestroy(CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfRefSetArray' in found_functions}}
-
-cdef CUresult _cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfRefGetArray' in found_functions}}
-
-cdef CUresult _cuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectCreate' in found_functions}}
-
-cdef CUresult _cuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectDestroy' in found_functions}}
-
-cdef CUresult _cuTexObjectDestroy(CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectCreate' in found_functions}}
-
-cdef CUresult _cuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectDestroy' in found_functions}}
-
-cdef CUresult _cuSurfObjectDestroy(CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult _cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapEncodeTiled' in found_functions}}
-
-cdef CUresult _cuTensorMapEncodeTiled(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const cuuint32_t* boxDim, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}
-
-cdef CUresult _cuTensorMapEncodeIm2col(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const int* pixelBoxLowerCorner, const int* pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapReplaceAddress' in found_functions}}
-
-cdef CUresult _cuTensorMapReplaceAddress(CUtensorMap* tensorMap, void* globalAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceCanAccessPeer' in found_functions}}
-
-cdef CUresult _cuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxEnablePeerAccess' in found_functions}}
-
-cdef CUresult _cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDisablePeerAccess' in found_functions}}
-
-cdef CUresult _cuCtxDisablePeerAccess(CUcontext peerContext) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsUnregisterResource' in found_functions}}
-
-cdef CUresult _cuGraphicsUnregisterResource(CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef CUresult _cuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsMapResources' in found_functions}}
-
-cdef CUresult _cuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsUnmapResources' in found_functions}}
-
-cdef CUresult _cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetProcAddress_v2' in found_functions}}
-
-cdef CUresult _cuGetProcAddress_v2(const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpGetAttribute' in found_functions}}
-
-cdef CUresult _cuCoredumpGetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-
-cdef CUresult _cuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpSetAttribute' in found_functions}}
-
-cdef CUresult _cuCoredumpSetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-
-cdef CUresult _cuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetExportTable' in found_functions}}
-
-cdef CUresult _cuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxCreate' in found_functions}}
-
-cdef CUresult _cuGreenCtxCreate(CUgreenCtx* phCtx, CUdevResourceDesc desc, CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxDestroy' in found_functions}}
-
-cdef CUresult _cuGreenCtxDestroy(CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxFromGreenCtx' in found_functions}}
-
-cdef CUresult _cuCtxFromGreenCtx(CUcontext* pContext, CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetDevResource' in found_functions}}
-
-cdef CUresult _cuDeviceGetDevResource(CUdevice device, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetDevResource' in found_functions}}
-
-cdef CUresult _cuCtxGetDevResource(CUcontext hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxGetDevResource' in found_functions}}
-
-cdef CUresult _cuGreenCtxGetDevResource(CUgreenCtx hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}
-
-cdef CUresult _cuDevSmResourceSplitByCount(CUdevResource* result, unsigned int* nbGroups, const CUdevResource* input, CUdevResource* remaining, unsigned int useFlags, unsigned int minCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevResourceGenerateDesc' in found_functions}}
-
-cdef CUresult _cuDevResourceGenerateDesc(CUdevResourceDesc* phDesc, CUdevResource* resources, unsigned int nbResources) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxRecordEvent' in found_functions}}
-
-cdef CUresult _cuGreenCtxRecordEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxWaitEvent' in found_functions}}
-
-cdef CUresult _cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetGreenCtx' in found_functions}}
-
-cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxStreamCreate' in found_functions}}
-
-cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuProfilerStart' in found_functions}}
-
-cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuProfilerStop' in found_functions}}
-
-cdef CUresult _cuProfilerStop() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsEGLRegisterImage(CUgraphicsResource* pCudaResource, EGLImageKHR image, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerConnectWithFlags(CUeglStreamConnection* conn, EGLStreamKHR stream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerAcquireFrame(CUeglStreamConnection* conn, CUgraphicsResource* pCudaResource, CUstream* pStream, unsigned int timeout) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerReleaseFrame(CUeglStreamConnection* conn, CUgraphicsResource pCudaResource, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream, EGLint width, EGLint height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerPresentFrame(CUeglStreamConnection* conn, CUeglFrame eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerReturnFrame(CUeglStreamConnection* conn, CUeglFrame* eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEventCreateFromEGLSync(CUevent* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuVDPAUGetDevice(CUdevice* pDevice, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuVDPAUCtxCreate_v2(CUcontext* pCtx, unsigned int flags, CUdevice device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResource, VdpVideoSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
deleted file mode 100644
index a1c260e1..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in
+++ /dev/null
@@ -1,14081 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-{{if 'Windows' == platform.system()}}
-import win32api
-import struct
-from pywintypes import error
-{{else}}
-cimport cuda.bindings._lib.dlfcn as dlfcn
-{{endif}}
-import os
-import sys
-cimport cuda.bindings._bindings.loader as loader
-cdef bint __cuPythonInit = False
-{{if 'cuGetErrorString' in found_functions}}cdef void *__cuGetErrorString = NULL{{endif}}
-{{if 'cuGetErrorName' in found_functions}}cdef void *__cuGetErrorName = NULL{{endif}}
-{{if 'cuInit' in found_functions}}cdef void *__cuInit = NULL{{endif}}
-{{if 'cuDriverGetVersion' in found_functions}}cdef void *__cuDriverGetVersion = NULL{{endif}}
-{{if 'cuDeviceGet' in found_functions}}cdef void *__cuDeviceGet = NULL{{endif}}
-{{if 'cuDeviceGetCount' in found_functions}}cdef void *__cuDeviceGetCount = NULL{{endif}}
-{{if 'cuDeviceGetName' in found_functions}}cdef void *__cuDeviceGetName = NULL{{endif}}
-{{if 'cuDeviceGetUuid' in found_functions}}cdef void *__cuDeviceGetUuid = NULL{{endif}}
-{{if 'cuDeviceGetUuid_v2' in found_functions}}cdef void *__cuDeviceGetUuid_v2 = NULL{{endif}}
-{{if 'cuDeviceGetLuid' in found_functions}}cdef void *__cuDeviceGetLuid = NULL{{endif}}
-{{if 'cuDeviceTotalMem_v2' in found_functions}}cdef void *__cuDeviceTotalMem_v2 = NULL{{endif}}
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}cdef void *__cuDeviceGetTexture1DLinearMaxWidth = NULL{{endif}}
-{{if 'cuDeviceGetAttribute' in found_functions}}cdef void *__cuDeviceGetAttribute = NULL{{endif}}
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}cdef void *__cuDeviceGetNvSciSyncAttributes = NULL{{endif}}
-{{if 'cuDeviceSetMemPool' in found_functions}}cdef void *__cuDeviceSetMemPool = NULL{{endif}}
-{{if 'cuDeviceGetMemPool' in found_functions}}cdef void *__cuDeviceGetMemPool = NULL{{endif}}
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}cdef void *__cuDeviceGetDefaultMemPool = NULL{{endif}}
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}cdef void *__cuDeviceGetExecAffinitySupport = NULL{{endif}}
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}cdef void *__cuFlushGPUDirectRDMAWrites = NULL{{endif}}
-{{if 'cuDeviceGetProperties' in found_functions}}cdef void *__cuDeviceGetProperties = NULL{{endif}}
-{{if 'cuDeviceComputeCapability' in found_functions}}cdef void *__cuDeviceComputeCapability = NULL{{endif}}
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}cdef void *__cuDevicePrimaryCtxRetain = NULL{{endif}}
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}cdef void *__cuDevicePrimaryCtxRelease_v2 = NULL{{endif}}
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}cdef void *__cuDevicePrimaryCtxSetFlags_v2 = NULL{{endif}}
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}cdef void *__cuDevicePrimaryCtxGetState = NULL{{endif}}
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}cdef void *__cuDevicePrimaryCtxReset_v2 = NULL{{endif}}
-{{if 'cuCtxCreate_v2' in found_functions}}cdef void *__cuCtxCreate_v2 = NULL{{endif}}
-{{if 'cuCtxCreate_v3' in found_functions}}cdef void *__cuCtxCreate_v3 = NULL{{endif}}
-{{if 'cuCtxCreate_v4' in found_functions}}cdef void *__cuCtxCreate_v4 = NULL{{endif}}
-{{if 'cuCtxDestroy_v2' in found_functions}}cdef void *__cuCtxDestroy_v2 = NULL{{endif}}
-{{if 'cuCtxPushCurrent_v2' in found_functions}}cdef void *__cuCtxPushCurrent_v2 = NULL{{endif}}
-{{if 'cuCtxPopCurrent_v2' in found_functions}}cdef void *__cuCtxPopCurrent_v2 = NULL{{endif}}
-{{if 'cuCtxSetCurrent' in found_functions}}cdef void *__cuCtxSetCurrent = NULL{{endif}}
-{{if 'cuCtxGetCurrent' in found_functions}}cdef void *__cuCtxGetCurrent = NULL{{endif}}
-{{if 'cuCtxGetDevice' in found_functions}}cdef void *__cuCtxGetDevice = NULL{{endif}}
-{{if 'cuCtxGetFlags' in found_functions}}cdef void *__cuCtxGetFlags = NULL{{endif}}
-{{if 'cuCtxSetFlags' in found_functions}}cdef void *__cuCtxSetFlags = NULL{{endif}}
-{{if 'cuCtxGetId' in found_functions}}cdef void *__cuCtxGetId = NULL{{endif}}
-{{if 'cuCtxSynchronize' in found_functions}}cdef void *__cuCtxSynchronize = NULL{{endif}}
-{{if 'cuCtxSetLimit' in found_functions}}cdef void *__cuCtxSetLimit = NULL{{endif}}
-{{if 'cuCtxGetLimit' in found_functions}}cdef void *__cuCtxGetLimit = NULL{{endif}}
-{{if 'cuCtxGetCacheConfig' in found_functions}}cdef void *__cuCtxGetCacheConfig = NULL{{endif}}
-{{if 'cuCtxSetCacheConfig' in found_functions}}cdef void *__cuCtxSetCacheConfig = NULL{{endif}}
-{{if 'cuCtxGetApiVersion' in found_functions}}cdef void *__cuCtxGetApiVersion = NULL{{endif}}
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}cdef void *__cuCtxGetStreamPriorityRange = NULL{{endif}}
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}cdef void *__cuCtxResetPersistingL2Cache = NULL{{endif}}
-{{if 'cuCtxGetExecAffinity' in found_functions}}cdef void *__cuCtxGetExecAffinity = NULL{{endif}}
-{{if 'cuCtxRecordEvent' in found_functions}}cdef void *__cuCtxRecordEvent = NULL{{endif}}
-{{if 'cuCtxWaitEvent' in found_functions}}cdef void *__cuCtxWaitEvent = NULL{{endif}}
-{{if 'cuCtxAttach' in found_functions}}cdef void *__cuCtxAttach = NULL{{endif}}
-{{if 'cuCtxDetach' in found_functions}}cdef void *__cuCtxDetach = NULL{{endif}}
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}cdef void *__cuCtxGetSharedMemConfig = NULL{{endif}}
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}cdef void *__cuCtxSetSharedMemConfig = NULL{{endif}}
-{{if 'cuModuleLoad' in found_functions}}cdef void *__cuModuleLoad = NULL{{endif}}
-{{if 'cuModuleLoadData' in found_functions}}cdef void *__cuModuleLoadData = NULL{{endif}}
-{{if 'cuModuleLoadDataEx' in found_functions}}cdef void *__cuModuleLoadDataEx = NULL{{endif}}
-{{if 'cuModuleLoadFatBinary' in found_functions}}cdef void *__cuModuleLoadFatBinary = NULL{{endif}}
-{{if 'cuModuleUnload' in found_functions}}cdef void *__cuModuleUnload = NULL{{endif}}
-{{if 'cuModuleGetLoadingMode' in found_functions}}cdef void *__cuModuleGetLoadingMode = NULL{{endif}}
-{{if 'cuModuleGetFunction' in found_functions}}cdef void *__cuModuleGetFunction = NULL{{endif}}
-{{if 'cuModuleGetFunctionCount' in found_functions}}cdef void *__cuModuleGetFunctionCount = NULL{{endif}}
-{{if 'cuModuleEnumerateFunctions' in found_functions}}cdef void *__cuModuleEnumerateFunctions = NULL{{endif}}
-{{if 'cuModuleGetGlobal_v2' in found_functions}}cdef void *__cuModuleGetGlobal_v2 = NULL{{endif}}
-{{if 'cuLinkCreate_v2' in found_functions}}cdef void *__cuLinkCreate_v2 = NULL{{endif}}
-{{if 'cuLinkAddData_v2' in found_functions}}cdef void *__cuLinkAddData_v2 = NULL{{endif}}
-{{if 'cuLinkAddFile_v2' in found_functions}}cdef void *__cuLinkAddFile_v2 = NULL{{endif}}
-{{if 'cuLinkComplete' in found_functions}}cdef void *__cuLinkComplete = NULL{{endif}}
-{{if 'cuLinkDestroy' in found_functions}}cdef void *__cuLinkDestroy = NULL{{endif}}
-{{if 'cuModuleGetTexRef' in found_functions}}cdef void *__cuModuleGetTexRef = NULL{{endif}}
-{{if 'cuModuleGetSurfRef' in found_functions}}cdef void *__cuModuleGetSurfRef = NULL{{endif}}
-{{if 'cuLibraryLoadData' in found_functions}}cdef void *__cuLibraryLoadData = NULL{{endif}}
-{{if 'cuLibraryLoadFromFile' in found_functions}}cdef void *__cuLibraryLoadFromFile = NULL{{endif}}
-{{if 'cuLibraryUnload' in found_functions}}cdef void *__cuLibraryUnload = NULL{{endif}}
-{{if 'cuLibraryGetKernel' in found_functions}}cdef void *__cuLibraryGetKernel = NULL{{endif}}
-{{if 'cuLibraryGetKernelCount' in found_functions}}cdef void *__cuLibraryGetKernelCount = NULL{{endif}}
-{{if 'cuLibraryEnumerateKernels' in found_functions}}cdef void *__cuLibraryEnumerateKernels = NULL{{endif}}
-{{if 'cuLibraryGetModule' in found_functions}}cdef void *__cuLibraryGetModule = NULL{{endif}}
-{{if 'cuKernelGetFunction' in found_functions}}cdef void *__cuKernelGetFunction = NULL{{endif}}
-{{if 'cuKernelGetLibrary' in found_functions}}cdef void *__cuKernelGetLibrary = NULL{{endif}}
-{{if 'cuLibraryGetGlobal' in found_functions}}cdef void *__cuLibraryGetGlobal = NULL{{endif}}
-{{if 'cuLibraryGetManaged' in found_functions}}cdef void *__cuLibraryGetManaged = NULL{{endif}}
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}cdef void *__cuLibraryGetUnifiedFunction = NULL{{endif}}
-{{if 'cuKernelGetAttribute' in found_functions}}cdef void *__cuKernelGetAttribute = NULL{{endif}}
-{{if 'cuKernelSetAttribute' in found_functions}}cdef void *__cuKernelSetAttribute = NULL{{endif}}
-{{if 'cuKernelSetCacheConfig' in found_functions}}cdef void *__cuKernelSetCacheConfig = NULL{{endif}}
-{{if 'cuKernelGetName' in found_functions}}cdef void *__cuKernelGetName = NULL{{endif}}
-{{if 'cuKernelGetParamInfo' in found_functions}}cdef void *__cuKernelGetParamInfo = NULL{{endif}}
-{{if 'cuMemGetInfo_v2' in found_functions}}cdef void *__cuMemGetInfo_v2 = NULL{{endif}}
-{{if 'cuMemAlloc_v2' in found_functions}}cdef void *__cuMemAlloc_v2 = NULL{{endif}}
-{{if 'cuMemAllocPitch_v2' in found_functions}}cdef void *__cuMemAllocPitch_v2 = NULL{{endif}}
-{{if 'cuMemFree_v2' in found_functions}}cdef void *__cuMemFree_v2 = NULL{{endif}}
-{{if 'cuMemGetAddressRange_v2' in found_functions}}cdef void *__cuMemGetAddressRange_v2 = NULL{{endif}}
-{{if 'cuMemAllocHost_v2' in found_functions}}cdef void *__cuMemAllocHost_v2 = NULL{{endif}}
-{{if 'cuMemFreeHost' in found_functions}}cdef void *__cuMemFreeHost = NULL{{endif}}
-{{if 'cuMemHostAlloc' in found_functions}}cdef void *__cuMemHostAlloc = NULL{{endif}}
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}cdef void *__cuMemHostGetDevicePointer_v2 = NULL{{endif}}
-{{if 'cuMemHostGetFlags' in found_functions}}cdef void *__cuMemHostGetFlags = NULL{{endif}}
-{{if 'cuMemAllocManaged' in found_functions}}cdef void *__cuMemAllocManaged = NULL{{endif}}
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}cdef void *__cuDeviceRegisterAsyncNotification = NULL{{endif}}
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}cdef void *__cuDeviceUnregisterAsyncNotification = NULL{{endif}}
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}cdef void *__cuDeviceGetByPCIBusId = NULL{{endif}}
-{{if 'cuDeviceGetPCIBusId' in found_functions}}cdef void *__cuDeviceGetPCIBusId = NULL{{endif}}
-{{if 'cuIpcGetEventHandle' in found_functions}}cdef void *__cuIpcGetEventHandle = NULL{{endif}}
-{{if 'cuIpcOpenEventHandle' in found_functions}}cdef void *__cuIpcOpenEventHandle = NULL{{endif}}
-{{if 'cuIpcGetMemHandle' in found_functions}}cdef void *__cuIpcGetMemHandle = NULL{{endif}}
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}cdef void *__cuIpcOpenMemHandle_v2 = NULL{{endif}}
-{{if 'cuIpcCloseMemHandle' in found_functions}}cdef void *__cuIpcCloseMemHandle = NULL{{endif}}
-{{if 'cuMemHostRegister_v2' in found_functions}}cdef void *__cuMemHostRegister_v2 = NULL{{endif}}
-{{if 'cuMemHostUnregister' in found_functions}}cdef void *__cuMemHostUnregister = NULL{{endif}}
-{{if 'cuMemcpy' in found_functions}}cdef void *__cuMemcpy = NULL{{endif}}
-{{if 'cuMemcpyPeer' in found_functions}}cdef void *__cuMemcpyPeer = NULL{{endif}}
-{{if 'cuMemcpyHtoD_v2' in found_functions}}cdef void *__cuMemcpyHtoD_v2 = NULL{{endif}}
-{{if 'cuMemcpyDtoH_v2' in found_functions}}cdef void *__cuMemcpyDtoH_v2 = NULL{{endif}}
-{{if 'cuMemcpyDtoD_v2' in found_functions}}cdef void *__cuMemcpyDtoD_v2 = NULL{{endif}}
-{{if 'cuMemcpyDtoA_v2' in found_functions}}cdef void *__cuMemcpyDtoA_v2 = NULL{{endif}}
-{{if 'cuMemcpyAtoD_v2' in found_functions}}cdef void *__cuMemcpyAtoD_v2 = NULL{{endif}}
-{{if 'cuMemcpyHtoA_v2' in found_functions}}cdef void *__cuMemcpyHtoA_v2 = NULL{{endif}}
-{{if 'cuMemcpyAtoH_v2' in found_functions}}cdef void *__cuMemcpyAtoH_v2 = NULL{{endif}}
-{{if 'cuMemcpyAtoA_v2' in found_functions}}cdef void *__cuMemcpyAtoA_v2 = NULL{{endif}}
-{{if 'cuMemcpy2D_v2' in found_functions}}cdef void *__cuMemcpy2D_v2 = NULL{{endif}}
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}cdef void *__cuMemcpy2DUnaligned_v2 = NULL{{endif}}
-{{if 'cuMemcpy3D_v2' in found_functions}}cdef void *__cuMemcpy3D_v2 = NULL{{endif}}
-{{if 'cuMemcpy3DPeer' in found_functions}}cdef void *__cuMemcpy3DPeer = NULL{{endif}}
-{{if 'cuMemcpyAsync' in found_functions}}cdef void *__cuMemcpyAsync = NULL{{endif}}
-{{if 'cuMemcpyPeerAsync' in found_functions}}cdef void *__cuMemcpyPeerAsync = NULL{{endif}}
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}cdef void *__cuMemcpyHtoDAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}cdef void *__cuMemcpyDtoHAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}cdef void *__cuMemcpyDtoDAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}cdef void *__cuMemcpyHtoAAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}cdef void *__cuMemcpyAtoHAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}cdef void *__cuMemcpy2DAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}cdef void *__cuMemcpy3DAsync_v2 = NULL{{endif}}
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}cdef void *__cuMemcpy3DPeerAsync = NULL{{endif}}
-{{if 'cuMemsetD8_v2' in found_functions}}cdef void *__cuMemsetD8_v2 = NULL{{endif}}
-{{if 'cuMemsetD16_v2' in found_functions}}cdef void *__cuMemsetD16_v2 = NULL{{endif}}
-{{if 'cuMemsetD32_v2' in found_functions}}cdef void *__cuMemsetD32_v2 = NULL{{endif}}
-{{if 'cuMemsetD2D8_v2' in found_functions}}cdef void *__cuMemsetD2D8_v2 = NULL{{endif}}
-{{if 'cuMemsetD2D16_v2' in found_functions}}cdef void *__cuMemsetD2D16_v2 = NULL{{endif}}
-{{if 'cuMemsetD2D32_v2' in found_functions}}cdef void *__cuMemsetD2D32_v2 = NULL{{endif}}
-{{if 'cuMemsetD8Async' in found_functions}}cdef void *__cuMemsetD8Async = NULL{{endif}}
-{{if 'cuMemsetD16Async' in found_functions}}cdef void *__cuMemsetD16Async = NULL{{endif}}
-{{if 'cuMemsetD32Async' in found_functions}}cdef void *__cuMemsetD32Async = NULL{{endif}}
-{{if 'cuMemsetD2D8Async' in found_functions}}cdef void *__cuMemsetD2D8Async = NULL{{endif}}
-{{if 'cuMemsetD2D16Async' in found_functions}}cdef void *__cuMemsetD2D16Async = NULL{{endif}}
-{{if 'cuMemsetD2D32Async' in found_functions}}cdef void *__cuMemsetD2D32Async = NULL{{endif}}
-{{if 'cuArrayCreate_v2' in found_functions}}cdef void *__cuArrayCreate_v2 = NULL{{endif}}
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}cdef void *__cuArrayGetDescriptor_v2 = NULL{{endif}}
-{{if 'cuArrayGetSparseProperties' in found_functions}}cdef void *__cuArrayGetSparseProperties = NULL{{endif}}
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}cdef void *__cuMipmappedArrayGetSparseProperties = NULL{{endif}}
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}cdef void *__cuArrayGetMemoryRequirements = NULL{{endif}}
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}cdef void *__cuMipmappedArrayGetMemoryRequirements = NULL{{endif}}
-{{if 'cuArrayGetPlane' in found_functions}}cdef void *__cuArrayGetPlane = NULL{{endif}}
-{{if 'cuArrayDestroy' in found_functions}}cdef void *__cuArrayDestroy = NULL{{endif}}
-{{if 'cuArray3DCreate_v2' in found_functions}}cdef void *__cuArray3DCreate_v2 = NULL{{endif}}
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}cdef void *__cuArray3DGetDescriptor_v2 = NULL{{endif}}
-{{if 'cuMipmappedArrayCreate' in found_functions}}cdef void *__cuMipmappedArrayCreate = NULL{{endif}}
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}cdef void *__cuMipmappedArrayGetLevel = NULL{{endif}}
-{{if 'cuMipmappedArrayDestroy' in found_functions}}cdef void *__cuMipmappedArrayDestroy = NULL{{endif}}
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}cdef void *__cuMemGetHandleForAddressRange = NULL{{endif}}
-{{if 'cuMemAddressReserve' in found_functions}}cdef void *__cuMemAddressReserve = NULL{{endif}}
-{{if 'cuMemAddressFree' in found_functions}}cdef void *__cuMemAddressFree = NULL{{endif}}
-{{if 'cuMemCreate' in found_functions}}cdef void *__cuMemCreate = NULL{{endif}}
-{{if 'cuMemRelease' in found_functions}}cdef void *__cuMemRelease = NULL{{endif}}
-{{if 'cuMemMap' in found_functions}}cdef void *__cuMemMap = NULL{{endif}}
-{{if 'cuMemMapArrayAsync' in found_functions}}cdef void *__cuMemMapArrayAsync = NULL{{endif}}
-{{if 'cuMemUnmap' in found_functions}}cdef void *__cuMemUnmap = NULL{{endif}}
-{{if 'cuMemSetAccess' in found_functions}}cdef void *__cuMemSetAccess = NULL{{endif}}
-{{if 'cuMemGetAccess' in found_functions}}cdef void *__cuMemGetAccess = NULL{{endif}}
-{{if 'cuMemExportToShareableHandle' in found_functions}}cdef void *__cuMemExportToShareableHandle = NULL{{endif}}
-{{if 'cuMemImportFromShareableHandle' in found_functions}}cdef void *__cuMemImportFromShareableHandle = NULL{{endif}}
-{{if 'cuMemGetAllocationGranularity' in found_functions}}cdef void *__cuMemGetAllocationGranularity = NULL{{endif}}
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}cdef void *__cuMemGetAllocationPropertiesFromHandle = NULL{{endif}}
-{{if 'cuMemRetainAllocationHandle' in found_functions}}cdef void *__cuMemRetainAllocationHandle = NULL{{endif}}
-{{if 'cuMemFreeAsync' in found_functions}}cdef void *__cuMemFreeAsync = NULL{{endif}}
-{{if 'cuMemAllocAsync' in found_functions}}cdef void *__cuMemAllocAsync = NULL{{endif}}
-{{if 'cuMemPoolTrimTo' in found_functions}}cdef void *__cuMemPoolTrimTo = NULL{{endif}}
-{{if 'cuMemPoolSetAttribute' in found_functions}}cdef void *__cuMemPoolSetAttribute = NULL{{endif}}
-{{if 'cuMemPoolGetAttribute' in found_functions}}cdef void *__cuMemPoolGetAttribute = NULL{{endif}}
-{{if 'cuMemPoolSetAccess' in found_functions}}cdef void *__cuMemPoolSetAccess = NULL{{endif}}
-{{if 'cuMemPoolGetAccess' in found_functions}}cdef void *__cuMemPoolGetAccess = NULL{{endif}}
-{{if 'cuMemPoolCreate' in found_functions}}cdef void *__cuMemPoolCreate = NULL{{endif}}
-{{if 'cuMemPoolDestroy' in found_functions}}cdef void *__cuMemPoolDestroy = NULL{{endif}}
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}cdef void *__cuMemAllocFromPoolAsync = NULL{{endif}}
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}cdef void *__cuMemPoolExportToShareableHandle = NULL{{endif}}
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}cdef void *__cuMemPoolImportFromShareableHandle = NULL{{endif}}
-{{if 'cuMemPoolExportPointer' in found_functions}}cdef void *__cuMemPoolExportPointer = NULL{{endif}}
-{{if 'cuMemPoolImportPointer' in found_functions}}cdef void *__cuMemPoolImportPointer = NULL{{endif}}
-{{if 'cuMulticastCreate' in found_functions}}cdef void *__cuMulticastCreate = NULL{{endif}}
-{{if 'cuMulticastAddDevice' in found_functions}}cdef void *__cuMulticastAddDevice = NULL{{endif}}
-{{if 'cuMulticastBindMem' in found_functions}}cdef void *__cuMulticastBindMem = NULL{{endif}}
-{{if 'cuMulticastBindAddr' in found_functions}}cdef void *__cuMulticastBindAddr = NULL{{endif}}
-{{if 'cuMulticastUnbind' in found_functions}}cdef void *__cuMulticastUnbind = NULL{{endif}}
-{{if 'cuMulticastGetGranularity' in found_functions}}cdef void *__cuMulticastGetGranularity = NULL{{endif}}
-{{if 'cuPointerGetAttribute' in found_functions}}cdef void *__cuPointerGetAttribute = NULL{{endif}}
-{{if 'cuMemPrefetchAsync' in found_functions}}cdef void *__cuMemPrefetchAsync = NULL{{endif}}
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}cdef void *__cuMemPrefetchAsync_v2 = NULL{{endif}}
-{{if 'cuMemAdvise' in found_functions}}cdef void *__cuMemAdvise = NULL{{endif}}
-{{if 'cuMemAdvise_v2' in found_functions}}cdef void *__cuMemAdvise_v2 = NULL{{endif}}
-{{if 'cuMemRangeGetAttribute' in found_functions}}cdef void *__cuMemRangeGetAttribute = NULL{{endif}}
-{{if 'cuMemRangeGetAttributes' in found_functions}}cdef void *__cuMemRangeGetAttributes = NULL{{endif}}
-{{if 'cuPointerSetAttribute' in found_functions}}cdef void *__cuPointerSetAttribute = NULL{{endif}}
-{{if 'cuPointerGetAttributes' in found_functions}}cdef void *__cuPointerGetAttributes = NULL{{endif}}
-{{if 'cuStreamCreate' in found_functions}}cdef void *__cuStreamCreate = NULL{{endif}}
-{{if 'cuStreamCreateWithPriority' in found_functions}}cdef void *__cuStreamCreateWithPriority = NULL{{endif}}
-{{if 'cuStreamGetPriority' in found_functions}}cdef void *__cuStreamGetPriority = NULL{{endif}}
-{{if 'cuStreamGetFlags' in found_functions}}cdef void *__cuStreamGetFlags = NULL{{endif}}
-{{if 'cuStreamGetId' in found_functions}}cdef void *__cuStreamGetId = NULL{{endif}}
-{{if 'cuStreamGetCtx' in found_functions}}cdef void *__cuStreamGetCtx = NULL{{endif}}
-{{if 'cuStreamGetCtx_v2' in found_functions}}cdef void *__cuStreamGetCtx_v2 = NULL{{endif}}
-{{if 'cuStreamWaitEvent' in found_functions}}cdef void *__cuStreamWaitEvent = NULL{{endif}}
-{{if 'cuStreamAddCallback' in found_functions}}cdef void *__cuStreamAddCallback = NULL{{endif}}
-{{if 'cuStreamBeginCapture_v2' in found_functions}}cdef void *__cuStreamBeginCapture_v2 = NULL{{endif}}
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}cdef void *__cuStreamBeginCaptureToGraph = NULL{{endif}}
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}cdef void *__cuThreadExchangeStreamCaptureMode = NULL{{endif}}
-{{if 'cuStreamEndCapture' in found_functions}}cdef void *__cuStreamEndCapture = NULL{{endif}}
-{{if 'cuStreamIsCapturing' in found_functions}}cdef void *__cuStreamIsCapturing = NULL{{endif}}
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}cdef void *__cuStreamGetCaptureInfo_v2 = NULL{{endif}}
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}cdef void *__cuStreamGetCaptureInfo_v3 = NULL{{endif}}
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}cdef void *__cuStreamUpdateCaptureDependencies = NULL{{endif}}
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}cdef void *__cuStreamUpdateCaptureDependencies_v2 = NULL{{endif}}
-{{if 'cuStreamAttachMemAsync' in found_functions}}cdef void *__cuStreamAttachMemAsync = NULL{{endif}}
-{{if 'cuStreamQuery' in found_functions}}cdef void *__cuStreamQuery = NULL{{endif}}
-{{if 'cuStreamSynchronize' in found_functions}}cdef void *__cuStreamSynchronize = NULL{{endif}}
-{{if 'cuStreamDestroy_v2' in found_functions}}cdef void *__cuStreamDestroy_v2 = NULL{{endif}}
-{{if 'cuStreamCopyAttributes' in found_functions}}cdef void *__cuStreamCopyAttributes = NULL{{endif}}
-{{if 'cuStreamGetAttribute' in found_functions}}cdef void *__cuStreamGetAttribute = NULL{{endif}}
-{{if 'cuStreamSetAttribute' in found_functions}}cdef void *__cuStreamSetAttribute = NULL{{endif}}
-{{if 'cuEventCreate' in found_functions}}cdef void *__cuEventCreate = NULL{{endif}}
-{{if 'cuEventRecord' in found_functions}}cdef void *__cuEventRecord = NULL{{endif}}
-{{if 'cuEventRecordWithFlags' in found_functions}}cdef void *__cuEventRecordWithFlags = NULL{{endif}}
-{{if 'cuEventQuery' in found_functions}}cdef void *__cuEventQuery = NULL{{endif}}
-{{if 'cuEventSynchronize' in found_functions}}cdef void *__cuEventSynchronize = NULL{{endif}}
-{{if 'cuEventDestroy_v2' in found_functions}}cdef void *__cuEventDestroy_v2 = NULL{{endif}}
-{{if 'cuEventElapsedTime' in found_functions}}cdef void *__cuEventElapsedTime = NULL{{endif}}
-{{if 'cuImportExternalMemory' in found_functions}}cdef void *__cuImportExternalMemory = NULL{{endif}}
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}cdef void *__cuExternalMemoryGetMappedBuffer = NULL{{endif}}
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}cdef void *__cuExternalMemoryGetMappedMipmappedArray = NULL{{endif}}
-{{if 'cuDestroyExternalMemory' in found_functions}}cdef void *__cuDestroyExternalMemory = NULL{{endif}}
-{{if 'cuImportExternalSemaphore' in found_functions}}cdef void *__cuImportExternalSemaphore = NULL{{endif}}
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}cdef void *__cuSignalExternalSemaphoresAsync = NULL{{endif}}
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}cdef void *__cuWaitExternalSemaphoresAsync = NULL{{endif}}
-{{if 'cuDestroyExternalSemaphore' in found_functions}}cdef void *__cuDestroyExternalSemaphore = NULL{{endif}}
-{{if 'cuStreamWaitValue32_v2' in found_functions}}cdef void *__cuStreamWaitValue32_v2 = NULL{{endif}}
-{{if 'cuStreamWaitValue64_v2' in found_functions}}cdef void *__cuStreamWaitValue64_v2 = NULL{{endif}}
-{{if 'cuStreamWriteValue32_v2' in found_functions}}cdef void *__cuStreamWriteValue32_v2 = NULL{{endif}}
-{{if 'cuStreamWriteValue64_v2' in found_functions}}cdef void *__cuStreamWriteValue64_v2 = NULL{{endif}}
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}cdef void *__cuStreamBatchMemOp_v2 = NULL{{endif}}
-{{if 'cuFuncGetAttribute' in found_functions}}cdef void *__cuFuncGetAttribute = NULL{{endif}}
-{{if 'cuFuncSetAttribute' in found_functions}}cdef void *__cuFuncSetAttribute = NULL{{endif}}
-{{if 'cuFuncSetCacheConfig' in found_functions}}cdef void *__cuFuncSetCacheConfig = NULL{{endif}}
-{{if 'cuFuncGetModule' in found_functions}}cdef void *__cuFuncGetModule = NULL{{endif}}
-{{if 'cuFuncGetName' in found_functions}}cdef void *__cuFuncGetName = NULL{{endif}}
-{{if 'cuFuncGetParamInfo' in found_functions}}cdef void *__cuFuncGetParamInfo = NULL{{endif}}
-{{if 'cuFuncIsLoaded' in found_functions}}cdef void *__cuFuncIsLoaded = NULL{{endif}}
-{{if 'cuFuncLoad' in found_functions}}cdef void *__cuFuncLoad = NULL{{endif}}
-{{if 'cuLaunchKernel' in found_functions}}cdef void *__cuLaunchKernel = NULL{{endif}}
-{{if 'cuLaunchKernelEx' in found_functions}}cdef void *__cuLaunchKernelEx = NULL{{endif}}
-{{if 'cuLaunchCooperativeKernel' in found_functions}}cdef void *__cuLaunchCooperativeKernel = NULL{{endif}}
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}cdef void *__cuLaunchCooperativeKernelMultiDevice = NULL{{endif}}
-{{if 'cuLaunchHostFunc' in found_functions}}cdef void *__cuLaunchHostFunc = NULL{{endif}}
-{{if 'cuFuncSetBlockShape' in found_functions}}cdef void *__cuFuncSetBlockShape = NULL{{endif}}
-{{if 'cuFuncSetSharedSize' in found_functions}}cdef void *__cuFuncSetSharedSize = NULL{{endif}}
-{{if 'cuParamSetSize' in found_functions}}cdef void *__cuParamSetSize = NULL{{endif}}
-{{if 'cuParamSeti' in found_functions}}cdef void *__cuParamSeti = NULL{{endif}}
-{{if 'cuParamSetf' in found_functions}}cdef void *__cuParamSetf = NULL{{endif}}
-{{if 'cuParamSetv' in found_functions}}cdef void *__cuParamSetv = NULL{{endif}}
-{{if 'cuLaunch' in found_functions}}cdef void *__cuLaunch = NULL{{endif}}
-{{if 'cuLaunchGrid' in found_functions}}cdef void *__cuLaunchGrid = NULL{{endif}}
-{{if 'cuLaunchGridAsync' in found_functions}}cdef void *__cuLaunchGridAsync = NULL{{endif}}
-{{if 'cuParamSetTexRef' in found_functions}}cdef void *__cuParamSetTexRef = NULL{{endif}}
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}cdef void *__cuFuncSetSharedMemConfig = NULL{{endif}}
-{{if 'cuGraphCreate' in found_functions}}cdef void *__cuGraphCreate = NULL{{endif}}
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}cdef void *__cuGraphAddKernelNode_v2 = NULL{{endif}}
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}cdef void *__cuGraphKernelNodeGetParams_v2 = NULL{{endif}}
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}cdef void *__cuGraphKernelNodeSetParams_v2 = NULL{{endif}}
-{{if 'cuGraphAddMemcpyNode' in found_functions}}cdef void *__cuGraphAddMemcpyNode = NULL{{endif}}
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}cdef void *__cuGraphMemcpyNodeGetParams = NULL{{endif}}
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}cdef void *__cuGraphMemcpyNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddMemsetNode' in found_functions}}cdef void *__cuGraphAddMemsetNode = NULL{{endif}}
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}cdef void *__cuGraphMemsetNodeGetParams = NULL{{endif}}
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}cdef void *__cuGraphMemsetNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddHostNode' in found_functions}}cdef void *__cuGraphAddHostNode = NULL{{endif}}
-{{if 'cuGraphHostNodeGetParams' in found_functions}}cdef void *__cuGraphHostNodeGetParams = NULL{{endif}}
-{{if 'cuGraphHostNodeSetParams' in found_functions}}cdef void *__cuGraphHostNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddChildGraphNode' in found_functions}}cdef void *__cuGraphAddChildGraphNode = NULL{{endif}}
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}cdef void *__cuGraphChildGraphNodeGetGraph = NULL{{endif}}
-{{if 'cuGraphAddEmptyNode' in found_functions}}cdef void *__cuGraphAddEmptyNode = NULL{{endif}}
-{{if 'cuGraphAddEventRecordNode' in found_functions}}cdef void *__cuGraphAddEventRecordNode = NULL{{endif}}
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}cdef void *__cuGraphEventRecordNodeGetEvent = NULL{{endif}}
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}cdef void *__cuGraphEventRecordNodeSetEvent = NULL{{endif}}
-{{if 'cuGraphAddEventWaitNode' in found_functions}}cdef void *__cuGraphAddEventWaitNode = NULL{{endif}}
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}cdef void *__cuGraphEventWaitNodeGetEvent = NULL{{endif}}
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}cdef void *__cuGraphEventWaitNodeSetEvent = NULL{{endif}}
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}cdef void *__cuGraphAddExternalSemaphoresSignalNode = NULL{{endif}}
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}cdef void *__cuGraphExternalSemaphoresSignalNodeGetParams = NULL{{endif}}
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}cdef void *__cuGraphExternalSemaphoresSignalNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}cdef void *__cuGraphAddExternalSemaphoresWaitNode = NULL{{endif}}
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}cdef void *__cuGraphExternalSemaphoresWaitNodeGetParams = NULL{{endif}}
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}cdef void *__cuGraphExternalSemaphoresWaitNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}cdef void *__cuGraphAddBatchMemOpNode = NULL{{endif}}
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}cdef void *__cuGraphBatchMemOpNodeGetParams = NULL{{endif}}
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}cdef void *__cuGraphBatchMemOpNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}cdef void *__cuGraphExecBatchMemOpNodeSetParams = NULL{{endif}}
-{{if 'cuGraphAddMemAllocNode' in found_functions}}cdef void *__cuGraphAddMemAllocNode = NULL{{endif}}
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}cdef void *__cuGraphMemAllocNodeGetParams = NULL{{endif}}
-{{if 'cuGraphAddMemFreeNode' in found_functions}}cdef void *__cuGraphAddMemFreeNode = NULL{{endif}}
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}cdef void *__cuGraphMemFreeNodeGetParams = NULL{{endif}}
-{{if 'cuDeviceGraphMemTrim' in found_functions}}cdef void *__cuDeviceGraphMemTrim = NULL{{endif}}
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}cdef void *__cuDeviceGetGraphMemAttribute = NULL{{endif}}
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}cdef void *__cuDeviceSetGraphMemAttribute = NULL{{endif}}
-{{if 'cuGraphClone' in found_functions}}cdef void *__cuGraphClone = NULL{{endif}}
-{{if 'cuGraphNodeFindInClone' in found_functions}}cdef void *__cuGraphNodeFindInClone = NULL{{endif}}
-{{if 'cuGraphNodeGetType' in found_functions}}cdef void *__cuGraphNodeGetType = NULL{{endif}}
-{{if 'cuGraphGetNodes' in found_functions}}cdef void *__cuGraphGetNodes = NULL{{endif}}
-{{if 'cuGraphGetRootNodes' in found_functions}}cdef void *__cuGraphGetRootNodes = NULL{{endif}}
-{{if 'cuGraphGetEdges' in found_functions}}cdef void *__cuGraphGetEdges = NULL{{endif}}
-{{if 'cuGraphGetEdges_v2' in found_functions}}cdef void *__cuGraphGetEdges_v2 = NULL{{endif}}
-{{if 'cuGraphNodeGetDependencies' in found_functions}}cdef void *__cuGraphNodeGetDependencies = NULL{{endif}}
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}cdef void *__cuGraphNodeGetDependencies_v2 = NULL{{endif}}
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}cdef void *__cuGraphNodeGetDependentNodes = NULL{{endif}}
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}cdef void *__cuGraphNodeGetDependentNodes_v2 = NULL{{endif}}
-{{if 'cuGraphAddDependencies' in found_functions}}cdef void *__cuGraphAddDependencies = NULL{{endif}}
-{{if 'cuGraphAddDependencies_v2' in found_functions}}cdef void *__cuGraphAddDependencies_v2 = NULL{{endif}}
-{{if 'cuGraphRemoveDependencies' in found_functions}}cdef void *__cuGraphRemoveDependencies = NULL{{endif}}
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}cdef void *__cuGraphRemoveDependencies_v2 = NULL{{endif}}
-{{if 'cuGraphDestroyNode' in found_functions}}cdef void *__cuGraphDestroyNode = NULL{{endif}}
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}cdef void *__cuGraphInstantiateWithFlags = NULL{{endif}}
-{{if 'cuGraphInstantiateWithParams' in found_functions}}cdef void *__cuGraphInstantiateWithParams = NULL{{endif}}
-{{if 'cuGraphExecGetFlags' in found_functions}}cdef void *__cuGraphExecGetFlags = NULL{{endif}}
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}cdef void *__cuGraphExecKernelNodeSetParams_v2 = NULL{{endif}}
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}cdef void *__cuGraphExecMemcpyNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}cdef void *__cuGraphExecMemsetNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}cdef void *__cuGraphExecHostNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}cdef void *__cuGraphExecChildGraphNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}cdef void *__cuGraphExecEventRecordNodeSetEvent = NULL{{endif}}
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}cdef void *__cuGraphExecEventWaitNodeSetEvent = NULL{{endif}}
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}cdef void *__cuGraphExecExternalSemaphoresSignalNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}cdef void *__cuGraphExecExternalSemaphoresWaitNodeSetParams = NULL{{endif}}
-{{if 'cuGraphNodeSetEnabled' in found_functions}}cdef void *__cuGraphNodeSetEnabled = NULL{{endif}}
-{{if 'cuGraphNodeGetEnabled' in found_functions}}cdef void *__cuGraphNodeGetEnabled = NULL{{endif}}
-{{if 'cuGraphUpload' in found_functions}}cdef void *__cuGraphUpload = NULL{{endif}}
-{{if 'cuGraphLaunch' in found_functions}}cdef void *__cuGraphLaunch = NULL{{endif}}
-{{if 'cuGraphExecDestroy' in found_functions}}cdef void *__cuGraphExecDestroy = NULL{{endif}}
-{{if 'cuGraphDestroy' in found_functions}}cdef void *__cuGraphDestroy = NULL{{endif}}
-{{if 'cuGraphExecUpdate_v2' in found_functions}}cdef void *__cuGraphExecUpdate_v2 = NULL{{endif}}
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}cdef void *__cuGraphKernelNodeCopyAttributes = NULL{{endif}}
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}cdef void *__cuGraphKernelNodeGetAttribute = NULL{{endif}}
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}cdef void *__cuGraphKernelNodeSetAttribute = NULL{{endif}}
-{{if 'cuGraphDebugDotPrint' in found_functions}}cdef void *__cuGraphDebugDotPrint = NULL{{endif}}
-{{if 'cuUserObjectCreate' in found_functions}}cdef void *__cuUserObjectCreate = NULL{{endif}}
-{{if 'cuUserObjectRetain' in found_functions}}cdef void *__cuUserObjectRetain = NULL{{endif}}
-{{if 'cuUserObjectRelease' in found_functions}}cdef void *__cuUserObjectRelease = NULL{{endif}}
-{{if 'cuGraphRetainUserObject' in found_functions}}cdef void *__cuGraphRetainUserObject = NULL{{endif}}
-{{if 'cuGraphReleaseUserObject' in found_functions}}cdef void *__cuGraphReleaseUserObject = NULL{{endif}}
-{{if 'cuGraphAddNode' in found_functions}}cdef void *__cuGraphAddNode = NULL{{endif}}
-{{if 'cuGraphAddNode_v2' in found_functions}}cdef void *__cuGraphAddNode_v2 = NULL{{endif}}
-{{if 'cuGraphNodeSetParams' in found_functions}}cdef void *__cuGraphNodeSetParams = NULL{{endif}}
-{{if 'cuGraphExecNodeSetParams' in found_functions}}cdef void *__cuGraphExecNodeSetParams = NULL{{endif}}
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}cdef void *__cuGraphConditionalHandleCreate = NULL{{endif}}
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}cdef void *__cuOccupancyMaxActiveBlocksPerMultiprocessor = NULL{{endif}}
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}cdef void *__cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = NULL{{endif}}
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}cdef void *__cuOccupancyMaxPotentialBlockSize = NULL{{endif}}
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}cdef void *__cuOccupancyMaxPotentialBlockSizeWithFlags = NULL{{endif}}
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}cdef void *__cuOccupancyAvailableDynamicSMemPerBlock = NULL{{endif}}
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}cdef void *__cuOccupancyMaxPotentialClusterSize = NULL{{endif}}
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}cdef void *__cuOccupancyMaxActiveClusters = NULL{{endif}}
-{{if 'cuTexRefSetArray' in found_functions}}cdef void *__cuTexRefSetArray = NULL{{endif}}
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}cdef void *__cuTexRefSetMipmappedArray = NULL{{endif}}
-{{if 'cuTexRefSetAddress_v2' in found_functions}}cdef void *__cuTexRefSetAddress_v2 = NULL{{endif}}
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}cdef void *__cuTexRefSetAddress2D_v3 = NULL{{endif}}
-{{if 'cuTexRefSetFormat' in found_functions}}cdef void *__cuTexRefSetFormat = NULL{{endif}}
-{{if 'cuTexRefSetAddressMode' in found_functions}}cdef void *__cuTexRefSetAddressMode = NULL{{endif}}
-{{if 'cuTexRefSetFilterMode' in found_functions}}cdef void *__cuTexRefSetFilterMode = NULL{{endif}}
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}cdef void *__cuTexRefSetMipmapFilterMode = NULL{{endif}}
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}cdef void *__cuTexRefSetMipmapLevelBias = NULL{{endif}}
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}cdef void *__cuTexRefSetMipmapLevelClamp = NULL{{endif}}
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}cdef void *__cuTexRefSetMaxAnisotropy = NULL{{endif}}
-{{if 'cuTexRefSetBorderColor' in found_functions}}cdef void *__cuTexRefSetBorderColor = NULL{{endif}}
-{{if 'cuTexRefSetFlags' in found_functions}}cdef void *__cuTexRefSetFlags = NULL{{endif}}
-{{if 'cuTexRefGetAddress_v2' in found_functions}}cdef void *__cuTexRefGetAddress_v2 = NULL{{endif}}
-{{if 'cuTexRefGetArray' in found_functions}}cdef void *__cuTexRefGetArray = NULL{{endif}}
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}cdef void *__cuTexRefGetMipmappedArray = NULL{{endif}}
-{{if 'cuTexRefGetAddressMode' in found_functions}}cdef void *__cuTexRefGetAddressMode = NULL{{endif}}
-{{if 'cuTexRefGetFilterMode' in found_functions}}cdef void *__cuTexRefGetFilterMode = NULL{{endif}}
-{{if 'cuTexRefGetFormat' in found_functions}}cdef void *__cuTexRefGetFormat = NULL{{endif}}
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}cdef void *__cuTexRefGetMipmapFilterMode = NULL{{endif}}
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}cdef void *__cuTexRefGetMipmapLevelBias = NULL{{endif}}
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}cdef void *__cuTexRefGetMipmapLevelClamp = NULL{{endif}}
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}cdef void *__cuTexRefGetMaxAnisotropy = NULL{{endif}}
-{{if 'cuTexRefGetBorderColor' in found_functions}}cdef void *__cuTexRefGetBorderColor = NULL{{endif}}
-{{if 'cuTexRefGetFlags' in found_functions}}cdef void *__cuTexRefGetFlags = NULL{{endif}}
-{{if 'cuTexRefCreate' in found_functions}}cdef void *__cuTexRefCreate = NULL{{endif}}
-{{if 'cuTexRefDestroy' in found_functions}}cdef void *__cuTexRefDestroy = NULL{{endif}}
-{{if 'cuSurfRefSetArray' in found_functions}}cdef void *__cuSurfRefSetArray = NULL{{endif}}
-{{if 'cuSurfRefGetArray' in found_functions}}cdef void *__cuSurfRefGetArray = NULL{{endif}}
-{{if 'cuTexObjectCreate' in found_functions}}cdef void *__cuTexObjectCreate = NULL{{endif}}
-{{if 'cuTexObjectDestroy' in found_functions}}cdef void *__cuTexObjectDestroy = NULL{{endif}}
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}cdef void *__cuTexObjectGetResourceDesc = NULL{{endif}}
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}cdef void *__cuTexObjectGetTextureDesc = NULL{{endif}}
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}cdef void *__cuTexObjectGetResourceViewDesc = NULL{{endif}}
-{{if 'cuSurfObjectCreate' in found_functions}}cdef void *__cuSurfObjectCreate = NULL{{endif}}
-{{if 'cuSurfObjectDestroy' in found_functions}}cdef void *__cuSurfObjectDestroy = NULL{{endif}}
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}cdef void *__cuSurfObjectGetResourceDesc = NULL{{endif}}
-{{if 'cuTensorMapEncodeTiled' in found_functions}}cdef void *__cuTensorMapEncodeTiled = NULL{{endif}}
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}cdef void *__cuTensorMapEncodeIm2col = NULL{{endif}}
-{{if 'cuTensorMapReplaceAddress' in found_functions}}cdef void *__cuTensorMapReplaceAddress = NULL{{endif}}
-{{if 'cuDeviceCanAccessPeer' in found_functions}}cdef void *__cuDeviceCanAccessPeer = NULL{{endif}}
-{{if 'cuCtxEnablePeerAccess' in found_functions}}cdef void *__cuCtxEnablePeerAccess = NULL{{endif}}
-{{if 'cuCtxDisablePeerAccess' in found_functions}}cdef void *__cuCtxDisablePeerAccess = NULL{{endif}}
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}cdef void *__cuDeviceGetP2PAttribute = NULL{{endif}}
-{{if 'cuGraphicsUnregisterResource' in found_functions}}cdef void *__cuGraphicsUnregisterResource = NULL{{endif}}
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}cdef void *__cuGraphicsSubResourceGetMappedArray = NULL{{endif}}
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}cdef void *__cuGraphicsResourceGetMappedMipmappedArray = NULL{{endif}}
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}cdef void *__cuGraphicsResourceGetMappedPointer_v2 = NULL{{endif}}
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}cdef void *__cuGraphicsResourceSetMapFlags_v2 = NULL{{endif}}
-{{if 'cuGraphicsMapResources' in found_functions}}cdef void *__cuGraphicsMapResources = NULL{{endif}}
-{{if 'cuGraphicsUnmapResources' in found_functions}}cdef void *__cuGraphicsUnmapResources = NULL{{endif}}
-{{if 'cuGetProcAddress_v2' in found_functions}}cdef void *__cuGetProcAddress_v2 = NULL{{endif}}
-{{if 'cuCoredumpGetAttribute' in found_functions}}cdef void *__cuCoredumpGetAttribute = NULL{{endif}}
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}cdef void *__cuCoredumpGetAttributeGlobal = NULL{{endif}}
-{{if 'cuCoredumpSetAttribute' in found_functions}}cdef void *__cuCoredumpSetAttribute = NULL{{endif}}
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}cdef void *__cuCoredumpSetAttributeGlobal = NULL{{endif}}
-{{if 'cuGetExportTable' in found_functions}}cdef void *__cuGetExportTable = NULL{{endif}}
-{{if 'cuGreenCtxCreate' in found_functions}}cdef void *__cuGreenCtxCreate = NULL{{endif}}
-{{if 'cuGreenCtxDestroy' in found_functions}}cdef void *__cuGreenCtxDestroy = NULL{{endif}}
-{{if 'cuCtxFromGreenCtx' in found_functions}}cdef void *__cuCtxFromGreenCtx = NULL{{endif}}
-{{if 'cuDeviceGetDevResource' in found_functions}}cdef void *__cuDeviceGetDevResource = NULL{{endif}}
-{{if 'cuCtxGetDevResource' in found_functions}}cdef void *__cuCtxGetDevResource = NULL{{endif}}
-{{if 'cuGreenCtxGetDevResource' in found_functions}}cdef void *__cuGreenCtxGetDevResource = NULL{{endif}}
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}cdef void *__cuDevSmResourceSplitByCount = NULL{{endif}}
-{{if 'cuDevResourceGenerateDesc' in found_functions}}cdef void *__cuDevResourceGenerateDesc = NULL{{endif}}
-{{if 'cuGreenCtxRecordEvent' in found_functions}}cdef void *__cuGreenCtxRecordEvent = NULL{{endif}}
-{{if 'cuGreenCtxWaitEvent' in found_functions}}cdef void *__cuGreenCtxWaitEvent = NULL{{endif}}
-{{if 'cuStreamGetGreenCtx' in found_functions}}cdef void *__cuStreamGetGreenCtx = NULL{{endif}}
-{{if 'cuGreenCtxStreamCreate' in found_functions}}cdef void *__cuGreenCtxStreamCreate = NULL{{endif}}
-{{if 'cuProfilerStart' in found_functions}}cdef void *__cuProfilerStart = NULL{{endif}}
-{{if 'cuProfilerStop' in found_functions}}cdef void *__cuProfilerStop = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsEGLRegisterImage = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamConsumerConnect = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamConsumerConnectWithFlags = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamConsumerDisconnect = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamConsumerAcquireFrame = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamConsumerReleaseFrame = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamProducerConnect = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamProducerDisconnect = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamProducerPresentFrame = NULL{{endif}}
-{{if True}}cdef void *__cuEGLStreamProducerReturnFrame = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsResourceGetMappedEglFrame = NULL{{endif}}
-{{if True}}cdef void *__cuEventCreateFromEGLSync = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsGLRegisterBuffer = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsGLRegisterImage = NULL{{endif}}
-{{if True}}cdef void *__cuGLGetDevices_v2 = NULL{{endif}}
-{{if True}}cdef void *__cuVDPAUGetDevice = NULL{{endif}}
-{{if True}}cdef void *__cuVDPAUCtxCreate_v2 = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsVDPAURegisterVideoSurface = NULL{{endif}}
-{{if True}}cdef void *__cuGraphicsVDPAURegisterOutputSurface = NULL{{endif}}
-
-cdef int cuPythonInit() except -1 nogil:
-    global __cuPythonInit
-    cdef bint usePTDS
-    if __cuPythonInit:
-        return 0
-    __cuPythonInit = True
-    with gil:
-        usePTDS = os.getenv('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', default=0)
-
-    # Load library
-    cdef char libPath[260]
-    libPath[0] = 0
-    with gil:
-        status = loader.getCUDALibraryPath(libPath, sys.maxsize > 2**32)
-        if status == 0 and len(libPath) != 0:
-            path = libPath.decode('utf-8')
-        else:
-            {{if 'Windows' == platform.system()}}
-            path = 'nvcuda.dll'
-            {{else}}
-            path = 'libcuda.so.1'
-            {{endif}}
-
-        {{if 'Windows' == platform.system()}}
-        LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800
-        try:
-            handle = win32api.LoadLibraryEx(path, 0, LOAD_LIBRARY_SEARCH_SYSTEM32)
-        except error as e:
-            raise RuntimeError('Failed to LoadLibraryEx ' + path)
-        {{else}}
-        handle = dlfcn.dlopen(bytes(path, encoding='utf-8'), dlfcn.RTLD_NOW)
-        if (handle == NULL):
-            raise RuntimeError('Failed to dlopen ' + path)
-        {{endif}}
-
-    # Get latest __cuGetProcAddress_v2
-    {{if 'Windows' == platform.system()}}
-    with gil:
-        try:
-            global __cuGetProcAddress_v2
-            __cuGetProcAddress_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGetProcAddress_v2')
-        except:
-            pass
-    {{else}}
-    global __cuGetProcAddress_v2
-    __cuGetProcAddress_v2 = dlfcn.dlsym(handle, 'cuGetProcAddress_v2')
-    {{endif}}
-
-    # Load using cuGetProcAddress if available
-    if __cuGetProcAddress_v2 != NULL:
-        if usePTDS:
-            # Get all PTDS version of functions
-            pass
-            {{if 'cuMemcpy' in found_functions}}
-            global __cuMemcpy
-            cuGetProcAddress('cuMemcpy', &__cuMemcpy, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyPeer' in found_functions}}
-            global __cuMemcpyPeer
-            cuGetProcAddress('cuMemcpyPeer', &__cuMemcpyPeer, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoD_v2' in found_functions}}
-            global __cuMemcpyHtoD_v2
-            cuGetProcAddress('cuMemcpyHtoD', &__cuMemcpyHtoD_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoH_v2' in found_functions}}
-            global __cuMemcpyDtoH_v2
-            cuGetProcAddress('cuMemcpyDtoH', &__cuMemcpyDtoH_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoD_v2' in found_functions}}
-            global __cuMemcpyDtoD_v2
-            cuGetProcAddress('cuMemcpyDtoD', &__cuMemcpyDtoD_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoA_v2' in found_functions}}
-            global __cuMemcpyDtoA_v2
-            cuGetProcAddress('cuMemcpyDtoA', &__cuMemcpyDtoA_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoD_v2' in found_functions}}
-            global __cuMemcpyAtoD_v2
-            cuGetProcAddress('cuMemcpyAtoD', &__cuMemcpyAtoD_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoA_v2' in found_functions}}
-            global __cuMemcpyHtoA_v2
-            cuGetProcAddress('cuMemcpyHtoA', &__cuMemcpyHtoA_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoH_v2' in found_functions}}
-            global __cuMemcpyAtoH_v2
-            cuGetProcAddress('cuMemcpyAtoH', &__cuMemcpyAtoH_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoA_v2' in found_functions}}
-            global __cuMemcpyAtoA_v2
-            cuGetProcAddress('cuMemcpyAtoA', &__cuMemcpyAtoA_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2D_v2' in found_functions}}
-            global __cuMemcpy2D_v2
-            cuGetProcAddress('cuMemcpy2D', &__cuMemcpy2D_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-            global __cuMemcpy2DUnaligned_v2
-            cuGetProcAddress('cuMemcpy2DUnaligned', &__cuMemcpy2DUnaligned_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3D_v2' in found_functions}}
-            global __cuMemcpy3D_v2
-            cuGetProcAddress('cuMemcpy3D', &__cuMemcpy3D_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DPeer' in found_functions}}
-            global __cuMemcpy3DPeer
-            cuGetProcAddress('cuMemcpy3DPeer', &__cuMemcpy3DPeer, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAsync' in found_functions}}
-            global __cuMemcpyAsync
-            cuGetProcAddress('cuMemcpyAsync', &__cuMemcpyAsync, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyPeerAsync' in found_functions}}
-            global __cuMemcpyPeerAsync
-            cuGetProcAddress('cuMemcpyPeerAsync', &__cuMemcpyPeerAsync, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-            global __cuMemcpyHtoDAsync_v2
-            cuGetProcAddress('cuMemcpyHtoDAsync', &__cuMemcpyHtoDAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-            global __cuMemcpyDtoHAsync_v2
-            cuGetProcAddress('cuMemcpyDtoHAsync', &__cuMemcpyDtoHAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-            global __cuMemcpyDtoDAsync_v2
-            cuGetProcAddress('cuMemcpyDtoDAsync', &__cuMemcpyDtoDAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-            global __cuMemcpyHtoAAsync_v2
-            cuGetProcAddress('cuMemcpyHtoAAsync', &__cuMemcpyHtoAAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-            global __cuMemcpyAtoHAsync_v2
-            cuGetProcAddress('cuMemcpyAtoHAsync', &__cuMemcpyAtoHAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-            global __cuMemcpy2DAsync_v2
-            cuGetProcAddress('cuMemcpy2DAsync', &__cuMemcpy2DAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-            global __cuMemcpy3DAsync_v2
-            cuGetProcAddress('cuMemcpy3DAsync', &__cuMemcpy3DAsync_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-            global __cuMemcpy3DPeerAsync
-            cuGetProcAddress('cuMemcpy3DPeerAsync', &__cuMemcpy3DPeerAsync, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD8_v2' in found_functions}}
-            global __cuMemsetD8_v2
-            cuGetProcAddress('cuMemsetD8', &__cuMemsetD8_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD16_v2' in found_functions}}
-            global __cuMemsetD16_v2
-            cuGetProcAddress('cuMemsetD16', &__cuMemsetD16_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD32_v2' in found_functions}}
-            global __cuMemsetD32_v2
-            cuGetProcAddress('cuMemsetD32', &__cuMemsetD32_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D8_v2' in found_functions}}
-            global __cuMemsetD2D8_v2
-            cuGetProcAddress('cuMemsetD2D8', &__cuMemsetD2D8_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D16_v2' in found_functions}}
-            global __cuMemsetD2D16_v2
-            cuGetProcAddress('cuMemsetD2D16', &__cuMemsetD2D16_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D32_v2' in found_functions}}
-            global __cuMemsetD2D32_v2
-            cuGetProcAddress('cuMemsetD2D32', &__cuMemsetD2D32_v2, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD8Async' in found_functions}}
-            global __cuMemsetD8Async
-            cuGetProcAddress('cuMemsetD8Async', &__cuMemsetD8Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD16Async' in found_functions}}
-            global __cuMemsetD16Async
-            cuGetProcAddress('cuMemsetD16Async', &__cuMemsetD16Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD32Async' in found_functions}}
-            global __cuMemsetD32Async
-            cuGetProcAddress('cuMemsetD32Async', &__cuMemsetD32Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D8Async' in found_functions}}
-            global __cuMemsetD2D8Async
-            cuGetProcAddress('cuMemsetD2D8Async', &__cuMemsetD2D8Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D16Async' in found_functions}}
-            global __cuMemsetD2D16Async
-            cuGetProcAddress('cuMemsetD2D16Async', &__cuMemsetD2D16Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D32Async' in found_functions}}
-            global __cuMemsetD2D32Async
-            cuGetProcAddress('cuMemsetD2D32Async', &__cuMemsetD2D32Async, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemMapArrayAsync' in found_functions}}
-            global __cuMemMapArrayAsync
-            cuGetProcAddress('cuMemMapArrayAsync', &__cuMemMapArrayAsync, 11010, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemFreeAsync' in found_functions}}
-            global __cuMemFreeAsync
-            cuGetProcAddress('cuMemFreeAsync', &__cuMemFreeAsync, 11020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemAllocAsync' in found_functions}}
-            global __cuMemAllocAsync
-            cuGetProcAddress('cuMemAllocAsync', &__cuMemAllocAsync, 11020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-            global __cuMemAllocFromPoolAsync
-            cuGetProcAddress('cuMemAllocFromPoolAsync', &__cuMemAllocFromPoolAsync, 11020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemPrefetchAsync' in found_functions}}
-            global __cuMemPrefetchAsync
-            cuGetProcAddress('cuMemPrefetchAsync', &__cuMemPrefetchAsync, 8000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-            global __cuMemPrefetchAsync_v2
-            cuGetProcAddress('cuMemPrefetchAsync', &__cuMemPrefetchAsync_v2, 12020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetPriority' in found_functions}}
-            global __cuStreamGetPriority
-            cuGetProcAddress('cuStreamGetPriority', &__cuStreamGetPriority, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetFlags' in found_functions}}
-            global __cuStreamGetFlags
-            cuGetProcAddress('cuStreamGetFlags', &__cuStreamGetFlags, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetId' in found_functions}}
-            global __cuStreamGetId
-            cuGetProcAddress('cuStreamGetId', &__cuStreamGetId, 12000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCtx' in found_functions}}
-            global __cuStreamGetCtx
-            cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx, 9020, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCtx_v2' in found_functions}}
-            global __cuStreamGetCtx_v2
-            cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx_v2, 12050, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitEvent' in found_functions}}
-            global __cuStreamWaitEvent
-            cuGetProcAddress('cuStreamWaitEvent', &__cuStreamWaitEvent, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamAddCallback' in found_functions}}
-            global __cuStreamAddCallback
-            cuGetProcAddress('cuStreamAddCallback', &__cuStreamAddCallback, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamBeginCapture_v2' in found_functions}}
-            global __cuStreamBeginCapture_v2
-            cuGetProcAddress('cuStreamBeginCapture', &__cuStreamBeginCapture_v2, 10010, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-            global __cuStreamBeginCaptureToGraph
-            cuGetProcAddress('cuStreamBeginCaptureToGraph', &__cuStreamBeginCaptureToGraph, 12030, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamEndCapture' in found_functions}}
-            global __cuStreamEndCapture
-            cuGetProcAddress('cuStreamEndCapture', &__cuStreamEndCapture, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamIsCapturing' in found_functions}}
-            global __cuStreamIsCapturing
-            cuGetProcAddress('cuStreamIsCapturing', &__cuStreamIsCapturing, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-            global __cuStreamGetCaptureInfo_v2
-            cuGetProcAddress('cuStreamGetCaptureInfo', &__cuStreamGetCaptureInfo_v2, 11030, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-            global __cuStreamGetCaptureInfo_v3
-            cuGetProcAddress('cuStreamGetCaptureInfo', &__cuStreamGetCaptureInfo_v3, 12030, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-            global __cuStreamUpdateCaptureDependencies
-            cuGetProcAddress('cuStreamUpdateCaptureDependencies', &__cuStreamUpdateCaptureDependencies, 11030, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-            global __cuStreamUpdateCaptureDependencies_v2
-            cuGetProcAddress('cuStreamUpdateCaptureDependencies', &__cuStreamUpdateCaptureDependencies_v2, 12030, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamAttachMemAsync' in found_functions}}
-            global __cuStreamAttachMemAsync
-            cuGetProcAddress('cuStreamAttachMemAsync', &__cuStreamAttachMemAsync, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamQuery' in found_functions}}
-            global __cuStreamQuery
-            cuGetProcAddress('cuStreamQuery', &__cuStreamQuery, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamSynchronize' in found_functions}}
-            global __cuStreamSynchronize
-            cuGetProcAddress('cuStreamSynchronize', &__cuStreamSynchronize, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamCopyAttributes' in found_functions}}
-            global __cuStreamCopyAttributes
-            cuGetProcAddress('cuStreamCopyAttributes', &__cuStreamCopyAttributes, 11000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamGetAttribute' in found_functions}}
-            global __cuStreamGetAttribute
-            cuGetProcAddress('cuStreamGetAttribute', &__cuStreamGetAttribute, 11000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamSetAttribute' in found_functions}}
-            global __cuStreamSetAttribute
-            cuGetProcAddress('cuStreamSetAttribute', &__cuStreamSetAttribute, 11000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuEventRecord' in found_functions}}
-            global __cuEventRecord
-            cuGetProcAddress('cuEventRecord', &__cuEventRecord, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuEventRecordWithFlags' in found_functions}}
-            global __cuEventRecordWithFlags
-            cuGetProcAddress('cuEventRecordWithFlags', &__cuEventRecordWithFlags, 11010, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-            global __cuSignalExternalSemaphoresAsync
-            cuGetProcAddress('cuSignalExternalSemaphoresAsync', &__cuSignalExternalSemaphoresAsync, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-            global __cuWaitExternalSemaphoresAsync
-            cuGetProcAddress('cuWaitExternalSemaphoresAsync', &__cuWaitExternalSemaphoresAsync, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitValue32_v2' in found_functions}}
-            global __cuStreamWaitValue32_v2
-            cuGetProcAddress('cuStreamWaitValue32', &__cuStreamWaitValue32_v2, 11070, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitValue64_v2' in found_functions}}
-            global __cuStreamWaitValue64_v2
-            cuGetProcAddress('cuStreamWaitValue64', &__cuStreamWaitValue64_v2, 11070, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamWriteValue32_v2' in found_functions}}
-            global __cuStreamWriteValue32_v2
-            cuGetProcAddress('cuStreamWriteValue32', &__cuStreamWriteValue32_v2, 11070, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamWriteValue64_v2' in found_functions}}
-            global __cuStreamWriteValue64_v2
-            cuGetProcAddress('cuStreamWriteValue64', &__cuStreamWriteValue64_v2, 11070, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-            global __cuStreamBatchMemOp_v2
-            cuGetProcAddress('cuStreamBatchMemOp', &__cuStreamBatchMemOp_v2, 11070, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuLaunchKernel' in found_functions}}
-            global __cuLaunchKernel
-            cuGetProcAddress('cuLaunchKernel', &__cuLaunchKernel, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuLaunchKernelEx' in found_functions}}
-            global __cuLaunchKernelEx
-            cuGetProcAddress('cuLaunchKernelEx', &__cuLaunchKernelEx, 11060, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuLaunchCooperativeKernel' in found_functions}}
-            global __cuLaunchCooperativeKernel
-            cuGetProcAddress('cuLaunchCooperativeKernel', &__cuLaunchCooperativeKernel, 9000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuLaunchHostFunc' in found_functions}}
-            global __cuLaunchHostFunc
-            cuGetProcAddress('cuLaunchHostFunc', &__cuLaunchHostFunc, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuGraphInstantiateWithParams' in found_functions}}
-            global __cuGraphInstantiateWithParams
-            cuGetProcAddress('cuGraphInstantiateWithParams', &__cuGraphInstantiateWithParams, 12000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuGraphUpload' in found_functions}}
-            global __cuGraphUpload
-            cuGetProcAddress('cuGraphUpload', &__cuGraphUpload, 11010, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuGraphLaunch' in found_functions}}
-            global __cuGraphLaunch
-            cuGetProcAddress('cuGraphLaunch', &__cuGraphLaunch, 10000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuGraphicsMapResources' in found_functions}}
-            global __cuGraphicsMapResources
-            cuGetProcAddress('cuGraphicsMapResources', &__cuGraphicsMapResources, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-            {{if 'cuGraphicsUnmapResources' in found_functions}}
-            global __cuGraphicsUnmapResources
-            cuGetProcAddress('cuGraphicsUnmapResources', &__cuGraphicsUnmapResources, 7000, CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM, NULL)
-            {{endif}}
-        else:
-            # Else get the regular version
-            pass
-            {{if 'cuMemcpy' in found_functions}}
-            global __cuMemcpy
-            cuGetProcAddress('cuMemcpy', &__cuMemcpy, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyPeer' in found_functions}}
-            global __cuMemcpyPeer
-            cuGetProcAddress('cuMemcpyPeer', &__cuMemcpyPeer, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoD_v2' in found_functions}}
-            global __cuMemcpyHtoD_v2
-            cuGetProcAddress('cuMemcpyHtoD', &__cuMemcpyHtoD_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoH_v2' in found_functions}}
-            global __cuMemcpyDtoH_v2
-            cuGetProcAddress('cuMemcpyDtoH', &__cuMemcpyDtoH_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoD_v2' in found_functions}}
-            global __cuMemcpyDtoD_v2
-            cuGetProcAddress('cuMemcpyDtoD', &__cuMemcpyDtoD_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoA_v2' in found_functions}}
-            global __cuMemcpyDtoA_v2
-            cuGetProcAddress('cuMemcpyDtoA', &__cuMemcpyDtoA_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoD_v2' in found_functions}}
-            global __cuMemcpyAtoD_v2
-            cuGetProcAddress('cuMemcpyAtoD', &__cuMemcpyAtoD_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoA_v2' in found_functions}}
-            global __cuMemcpyHtoA_v2
-            cuGetProcAddress('cuMemcpyHtoA', &__cuMemcpyHtoA_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoH_v2' in found_functions}}
-            global __cuMemcpyAtoH_v2
-            cuGetProcAddress('cuMemcpyAtoH', &__cuMemcpyAtoH_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoA_v2' in found_functions}}
-            global __cuMemcpyAtoA_v2
-            cuGetProcAddress('cuMemcpyAtoA', &__cuMemcpyAtoA_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2D_v2' in found_functions}}
-            global __cuMemcpy2D_v2
-            cuGetProcAddress('cuMemcpy2D', &__cuMemcpy2D_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-            global __cuMemcpy2DUnaligned_v2
-            cuGetProcAddress('cuMemcpy2DUnaligned', &__cuMemcpy2DUnaligned_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3D_v2' in found_functions}}
-            global __cuMemcpy3D_v2
-            cuGetProcAddress('cuMemcpy3D', &__cuMemcpy3D_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DPeer' in found_functions}}
-            global __cuMemcpy3DPeer
-            cuGetProcAddress('cuMemcpy3DPeer', &__cuMemcpy3DPeer, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAsync' in found_functions}}
-            global __cuMemcpyAsync
-            cuGetProcAddress('cuMemcpyAsync', &__cuMemcpyAsync, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyPeerAsync' in found_functions}}
-            global __cuMemcpyPeerAsync
-            cuGetProcAddress('cuMemcpyPeerAsync', &__cuMemcpyPeerAsync, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-            global __cuMemcpyHtoDAsync_v2
-            cuGetProcAddress('cuMemcpyHtoDAsync', &__cuMemcpyHtoDAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-            global __cuMemcpyDtoHAsync_v2
-            cuGetProcAddress('cuMemcpyDtoHAsync', &__cuMemcpyDtoHAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-            global __cuMemcpyDtoDAsync_v2
-            cuGetProcAddress('cuMemcpyDtoDAsync', &__cuMemcpyDtoDAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-            global __cuMemcpyHtoAAsync_v2
-            cuGetProcAddress('cuMemcpyHtoAAsync', &__cuMemcpyHtoAAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-            global __cuMemcpyAtoHAsync_v2
-            cuGetProcAddress('cuMemcpyAtoHAsync', &__cuMemcpyAtoHAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-            global __cuMemcpy2DAsync_v2
-            cuGetProcAddress('cuMemcpy2DAsync', &__cuMemcpy2DAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-            global __cuMemcpy3DAsync_v2
-            cuGetProcAddress('cuMemcpy3DAsync', &__cuMemcpy3DAsync_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-            global __cuMemcpy3DPeerAsync
-            cuGetProcAddress('cuMemcpy3DPeerAsync', &__cuMemcpy3DPeerAsync, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD8_v2' in found_functions}}
-            global __cuMemsetD8_v2
-            cuGetProcAddress('cuMemsetD8', &__cuMemsetD8_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD16_v2' in found_functions}}
-            global __cuMemsetD16_v2
-            cuGetProcAddress('cuMemsetD16', &__cuMemsetD16_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD32_v2' in found_functions}}
-            global __cuMemsetD32_v2
-            cuGetProcAddress('cuMemsetD32', &__cuMemsetD32_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D8_v2' in found_functions}}
-            global __cuMemsetD2D8_v2
-            cuGetProcAddress('cuMemsetD2D8', &__cuMemsetD2D8_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D16_v2' in found_functions}}
-            global __cuMemsetD2D16_v2
-            cuGetProcAddress('cuMemsetD2D16', &__cuMemsetD2D16_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D32_v2' in found_functions}}
-            global __cuMemsetD2D32_v2
-            cuGetProcAddress('cuMemsetD2D32', &__cuMemsetD2D32_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD8Async' in found_functions}}
-            global __cuMemsetD8Async
-            cuGetProcAddress('cuMemsetD8Async', &__cuMemsetD8Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD16Async' in found_functions}}
-            global __cuMemsetD16Async
-            cuGetProcAddress('cuMemsetD16Async', &__cuMemsetD16Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD32Async' in found_functions}}
-            global __cuMemsetD32Async
-            cuGetProcAddress('cuMemsetD32Async', &__cuMemsetD32Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D8Async' in found_functions}}
-            global __cuMemsetD2D8Async
-            cuGetProcAddress('cuMemsetD2D8Async', &__cuMemsetD2D8Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D16Async' in found_functions}}
-            global __cuMemsetD2D16Async
-            cuGetProcAddress('cuMemsetD2D16Async', &__cuMemsetD2D16Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemsetD2D32Async' in found_functions}}
-            global __cuMemsetD2D32Async
-            cuGetProcAddress('cuMemsetD2D32Async', &__cuMemsetD2D32Async, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemMapArrayAsync' in found_functions}}
-            global __cuMemMapArrayAsync
-            cuGetProcAddress('cuMemMapArrayAsync', &__cuMemMapArrayAsync, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemFreeAsync' in found_functions}}
-            global __cuMemFreeAsync
-            cuGetProcAddress('cuMemFreeAsync', &__cuMemFreeAsync, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemAllocAsync' in found_functions}}
-            global __cuMemAllocAsync
-            cuGetProcAddress('cuMemAllocAsync', &__cuMemAllocAsync, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-            global __cuMemAllocFromPoolAsync
-            cuGetProcAddress('cuMemAllocFromPoolAsync', &__cuMemAllocFromPoolAsync, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemPrefetchAsync' in found_functions}}
-            global __cuMemPrefetchAsync
-            cuGetProcAddress('cuMemPrefetchAsync', &__cuMemPrefetchAsync, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-            global __cuMemPrefetchAsync_v2
-            cuGetProcAddress('cuMemPrefetchAsync', &__cuMemPrefetchAsync_v2, 12020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetPriority' in found_functions}}
-            global __cuStreamGetPriority
-            cuGetProcAddress('cuStreamGetPriority', &__cuStreamGetPriority, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetFlags' in found_functions}}
-            global __cuStreamGetFlags
-            cuGetProcAddress('cuStreamGetFlags', &__cuStreamGetFlags, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetId' in found_functions}}
-            global __cuStreamGetId
-            cuGetProcAddress('cuStreamGetId', &__cuStreamGetId, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCtx' in found_functions}}
-            global __cuStreamGetCtx
-            cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx, 9020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCtx_v2' in found_functions}}
-            global __cuStreamGetCtx_v2
-            cuGetProcAddress('cuStreamGetCtx', &__cuStreamGetCtx_v2, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitEvent' in found_functions}}
-            global __cuStreamWaitEvent
-            cuGetProcAddress('cuStreamWaitEvent', &__cuStreamWaitEvent, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamAddCallback' in found_functions}}
-            global __cuStreamAddCallback
-            cuGetProcAddress('cuStreamAddCallback', &__cuStreamAddCallback, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamBeginCapture_v2' in found_functions}}
-            global __cuStreamBeginCapture_v2
-            cuGetProcAddress('cuStreamBeginCapture', &__cuStreamBeginCapture_v2, 10010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-            global __cuStreamBeginCaptureToGraph
-            cuGetProcAddress('cuStreamBeginCaptureToGraph', &__cuStreamBeginCaptureToGraph, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamEndCapture' in found_functions}}
-            global __cuStreamEndCapture
-            cuGetProcAddress('cuStreamEndCapture', &__cuStreamEndCapture, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamIsCapturing' in found_functions}}
-            global __cuStreamIsCapturing
-            cuGetProcAddress('cuStreamIsCapturing', &__cuStreamIsCapturing, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-            global __cuStreamGetCaptureInfo_v2
-            cuGetProcAddress('cuStreamGetCaptureInfo', &__cuStreamGetCaptureInfo_v2, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-            global __cuStreamGetCaptureInfo_v3
-            cuGetProcAddress('cuStreamGetCaptureInfo', &__cuStreamGetCaptureInfo_v3, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-            global __cuStreamUpdateCaptureDependencies
-            cuGetProcAddress('cuStreamUpdateCaptureDependencies', &__cuStreamUpdateCaptureDependencies, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-            global __cuStreamUpdateCaptureDependencies_v2
-            cuGetProcAddress('cuStreamUpdateCaptureDependencies', &__cuStreamUpdateCaptureDependencies_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamAttachMemAsync' in found_functions}}
-            global __cuStreamAttachMemAsync
-            cuGetProcAddress('cuStreamAttachMemAsync', &__cuStreamAttachMemAsync, 6000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamQuery' in found_functions}}
-            global __cuStreamQuery
-            cuGetProcAddress('cuStreamQuery', &__cuStreamQuery, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamSynchronize' in found_functions}}
-            global __cuStreamSynchronize
-            cuGetProcAddress('cuStreamSynchronize', &__cuStreamSynchronize, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamCopyAttributes' in found_functions}}
-            global __cuStreamCopyAttributes
-            cuGetProcAddress('cuStreamCopyAttributes', &__cuStreamCopyAttributes, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamGetAttribute' in found_functions}}
-            global __cuStreamGetAttribute
-            cuGetProcAddress('cuStreamGetAttribute', &__cuStreamGetAttribute, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamSetAttribute' in found_functions}}
-            global __cuStreamSetAttribute
-            cuGetProcAddress('cuStreamSetAttribute', &__cuStreamSetAttribute, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuEventRecord' in found_functions}}
-            global __cuEventRecord
-            cuGetProcAddress('cuEventRecord', &__cuEventRecord, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuEventRecordWithFlags' in found_functions}}
-            global __cuEventRecordWithFlags
-            cuGetProcAddress('cuEventRecordWithFlags', &__cuEventRecordWithFlags, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-            global __cuSignalExternalSemaphoresAsync
-            cuGetProcAddress('cuSignalExternalSemaphoresAsync', &__cuSignalExternalSemaphoresAsync, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-            global __cuWaitExternalSemaphoresAsync
-            cuGetProcAddress('cuWaitExternalSemaphoresAsync', &__cuWaitExternalSemaphoresAsync, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitValue32_v2' in found_functions}}
-            global __cuStreamWaitValue32_v2
-            cuGetProcAddress('cuStreamWaitValue32', &__cuStreamWaitValue32_v2, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamWaitValue64_v2' in found_functions}}
-            global __cuStreamWaitValue64_v2
-            cuGetProcAddress('cuStreamWaitValue64', &__cuStreamWaitValue64_v2, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamWriteValue32_v2' in found_functions}}
-            global __cuStreamWriteValue32_v2
-            cuGetProcAddress('cuStreamWriteValue32', &__cuStreamWriteValue32_v2, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamWriteValue64_v2' in found_functions}}
-            global __cuStreamWriteValue64_v2
-            cuGetProcAddress('cuStreamWriteValue64', &__cuStreamWriteValue64_v2, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-            global __cuStreamBatchMemOp_v2
-            cuGetProcAddress('cuStreamBatchMemOp', &__cuStreamBatchMemOp_v2, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuLaunchKernel' in found_functions}}
-            global __cuLaunchKernel
-            cuGetProcAddress('cuLaunchKernel', &__cuLaunchKernel, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuLaunchKernelEx' in found_functions}}
-            global __cuLaunchKernelEx
-            cuGetProcAddress('cuLaunchKernelEx', &__cuLaunchKernelEx, 11060, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuLaunchCooperativeKernel' in found_functions}}
-            global __cuLaunchCooperativeKernel
-            cuGetProcAddress('cuLaunchCooperativeKernel', &__cuLaunchCooperativeKernel, 9000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuLaunchHostFunc' in found_functions}}
-            global __cuLaunchHostFunc
-            cuGetProcAddress('cuLaunchHostFunc', &__cuLaunchHostFunc, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuGraphInstantiateWithParams' in found_functions}}
-            global __cuGraphInstantiateWithParams
-            cuGetProcAddress('cuGraphInstantiateWithParams', &__cuGraphInstantiateWithParams, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuGraphUpload' in found_functions}}
-            global __cuGraphUpload
-            cuGetProcAddress('cuGraphUpload', &__cuGraphUpload, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuGraphLaunch' in found_functions}}
-            global __cuGraphLaunch
-            cuGetProcAddress('cuGraphLaunch', &__cuGraphLaunch, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuGraphicsMapResources' in found_functions}}
-            global __cuGraphicsMapResources
-            cuGetProcAddress('cuGraphicsMapResources', &__cuGraphicsMapResources, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-            {{if 'cuGraphicsUnmapResources' in found_functions}}
-            global __cuGraphicsUnmapResources
-            cuGetProcAddress('cuGraphicsUnmapResources', &__cuGraphicsUnmapResources, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-            {{endif}}
-        # Get remaining functions
-        {{if 'cuGetErrorString' in found_functions}}
-        global __cuGetErrorString
-        cuGetProcAddress('cuGetErrorString', &__cuGetErrorString, 6000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGetErrorName' in found_functions}}
-        global __cuGetErrorName
-        cuGetProcAddress('cuGetErrorName', &__cuGetErrorName, 6000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuInit' in found_functions}}
-        global __cuInit
-        cuGetProcAddress('cuInit', &__cuInit, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDriverGetVersion' in found_functions}}
-        global __cuDriverGetVersion
-        cuGetProcAddress('cuDriverGetVersion', &__cuDriverGetVersion, 2020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGet' in found_functions}}
-        global __cuDeviceGet
-        cuGetProcAddress('cuDeviceGet', &__cuDeviceGet, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetCount' in found_functions}}
-        global __cuDeviceGetCount
-        cuGetProcAddress('cuDeviceGetCount', &__cuDeviceGetCount, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetName' in found_functions}}
-        global __cuDeviceGetName
-        cuGetProcAddress('cuDeviceGetName', &__cuDeviceGetName, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetUuid' in found_functions}}
-        global __cuDeviceGetUuid
-        cuGetProcAddress('cuDeviceGetUuid', &__cuDeviceGetUuid, 9020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetUuid_v2' in found_functions}}
-        global __cuDeviceGetUuid_v2
-        cuGetProcAddress('cuDeviceGetUuid', &__cuDeviceGetUuid_v2, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetLuid' in found_functions}}
-        global __cuDeviceGetLuid
-        cuGetProcAddress('cuDeviceGetLuid', &__cuDeviceGetLuid, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceTotalMem_v2' in found_functions}}
-        global __cuDeviceTotalMem_v2
-        cuGetProcAddress('cuDeviceTotalMem', &__cuDeviceTotalMem_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-        global __cuDeviceGetTexture1DLinearMaxWidth
-        cuGetProcAddress('cuDeviceGetTexture1DLinearMaxWidth', &__cuDeviceGetTexture1DLinearMaxWidth, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetAttribute' in found_functions}}
-        global __cuDeviceGetAttribute
-        cuGetProcAddress('cuDeviceGetAttribute', &__cuDeviceGetAttribute, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-        global __cuDeviceGetNvSciSyncAttributes
-        cuGetProcAddress('cuDeviceGetNvSciSyncAttributes', &__cuDeviceGetNvSciSyncAttributes, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceSetMemPool' in found_functions}}
-        global __cuDeviceSetMemPool
-        cuGetProcAddress('cuDeviceSetMemPool', &__cuDeviceSetMemPool, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetMemPool' in found_functions}}
-        global __cuDeviceGetMemPool
-        cuGetProcAddress('cuDeviceGetMemPool', &__cuDeviceGetMemPool, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-        global __cuDeviceGetDefaultMemPool
-        cuGetProcAddress('cuDeviceGetDefaultMemPool', &__cuDeviceGetDefaultMemPool, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-        global __cuDeviceGetExecAffinitySupport
-        cuGetProcAddress('cuDeviceGetExecAffinitySupport', &__cuDeviceGetExecAffinitySupport, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-        global __cuFlushGPUDirectRDMAWrites
-        cuGetProcAddress('cuFlushGPUDirectRDMAWrites', &__cuFlushGPUDirectRDMAWrites, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetProperties' in found_functions}}
-        global __cuDeviceGetProperties
-        cuGetProcAddress('cuDeviceGetProperties', &__cuDeviceGetProperties, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceComputeCapability' in found_functions}}
-        global __cuDeviceComputeCapability
-        cuGetProcAddress('cuDeviceComputeCapability', &__cuDeviceComputeCapability, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-        global __cuDevicePrimaryCtxRetain
-        cuGetProcAddress('cuDevicePrimaryCtxRetain', &__cuDevicePrimaryCtxRetain, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-        global __cuDevicePrimaryCtxRelease_v2
-        cuGetProcAddress('cuDevicePrimaryCtxRelease', &__cuDevicePrimaryCtxRelease_v2, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-        global __cuDevicePrimaryCtxSetFlags_v2
-        cuGetProcAddress('cuDevicePrimaryCtxSetFlags', &__cuDevicePrimaryCtxSetFlags_v2, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-        global __cuDevicePrimaryCtxGetState
-        cuGetProcAddress('cuDevicePrimaryCtxGetState', &__cuDevicePrimaryCtxGetState, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-        global __cuDevicePrimaryCtxReset_v2
-        cuGetProcAddress('cuDevicePrimaryCtxReset', &__cuDevicePrimaryCtxReset_v2, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxCreate_v2' in found_functions}}
-        global __cuCtxCreate_v2
-        cuGetProcAddress('cuCtxCreate', &__cuCtxCreate_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxCreate_v3' in found_functions}}
-        global __cuCtxCreate_v3
-        cuGetProcAddress('cuCtxCreate', &__cuCtxCreate_v3, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxCreate_v4' in found_functions}}
-        global __cuCtxCreate_v4
-        cuGetProcAddress('cuCtxCreate', &__cuCtxCreate_v4, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxDestroy_v2' in found_functions}}
-        global __cuCtxDestroy_v2
-        cuGetProcAddress('cuCtxDestroy', &__cuCtxDestroy_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxPushCurrent_v2' in found_functions}}
-        global __cuCtxPushCurrent_v2
-        cuGetProcAddress('cuCtxPushCurrent', &__cuCtxPushCurrent_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxPopCurrent_v2' in found_functions}}
-        global __cuCtxPopCurrent_v2
-        cuGetProcAddress('cuCtxPopCurrent', &__cuCtxPopCurrent_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSetCurrent' in found_functions}}
-        global __cuCtxSetCurrent
-        cuGetProcAddress('cuCtxSetCurrent', &__cuCtxSetCurrent, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetCurrent' in found_functions}}
-        global __cuCtxGetCurrent
-        cuGetProcAddress('cuCtxGetCurrent', &__cuCtxGetCurrent, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetDevice' in found_functions}}
-        global __cuCtxGetDevice
-        cuGetProcAddress('cuCtxGetDevice', &__cuCtxGetDevice, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetFlags' in found_functions}}
-        global __cuCtxGetFlags
-        cuGetProcAddress('cuCtxGetFlags', &__cuCtxGetFlags, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSetFlags' in found_functions}}
-        global __cuCtxSetFlags
-        cuGetProcAddress('cuCtxSetFlags', &__cuCtxSetFlags, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetId' in found_functions}}
-        global __cuCtxGetId
-        cuGetProcAddress('cuCtxGetId', &__cuCtxGetId, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSynchronize' in found_functions}}
-        global __cuCtxSynchronize
-        cuGetProcAddress('cuCtxSynchronize', &__cuCtxSynchronize, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSetLimit' in found_functions}}
-        global __cuCtxSetLimit
-        cuGetProcAddress('cuCtxSetLimit', &__cuCtxSetLimit, 3010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetLimit' in found_functions}}
-        global __cuCtxGetLimit
-        cuGetProcAddress('cuCtxGetLimit', &__cuCtxGetLimit, 3010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetCacheConfig' in found_functions}}
-        global __cuCtxGetCacheConfig
-        cuGetProcAddress('cuCtxGetCacheConfig', &__cuCtxGetCacheConfig, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSetCacheConfig' in found_functions}}
-        global __cuCtxSetCacheConfig
-        cuGetProcAddress('cuCtxSetCacheConfig', &__cuCtxSetCacheConfig, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetApiVersion' in found_functions}}
-        global __cuCtxGetApiVersion
-        cuGetProcAddress('cuCtxGetApiVersion', &__cuCtxGetApiVersion, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-        global __cuCtxGetStreamPriorityRange
-        cuGetProcAddress('cuCtxGetStreamPriorityRange', &__cuCtxGetStreamPriorityRange, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-        global __cuCtxResetPersistingL2Cache
-        cuGetProcAddress('cuCtxResetPersistingL2Cache', &__cuCtxResetPersistingL2Cache, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetExecAffinity' in found_functions}}
-        global __cuCtxGetExecAffinity
-        cuGetProcAddress('cuCtxGetExecAffinity', &__cuCtxGetExecAffinity, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxRecordEvent' in found_functions}}
-        global __cuCtxRecordEvent
-        cuGetProcAddress('cuCtxRecordEvent', &__cuCtxRecordEvent, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxWaitEvent' in found_functions}}
-        global __cuCtxWaitEvent
-        cuGetProcAddress('cuCtxWaitEvent', &__cuCtxWaitEvent, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxAttach' in found_functions}}
-        global __cuCtxAttach
-        cuGetProcAddress('cuCtxAttach', &__cuCtxAttach, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxDetach' in found_functions}}
-        global __cuCtxDetach
-        cuGetProcAddress('cuCtxDetach', &__cuCtxDetach, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetSharedMemConfig' in found_functions}}
-        global __cuCtxGetSharedMemConfig
-        cuGetProcAddress('cuCtxGetSharedMemConfig', &__cuCtxGetSharedMemConfig, 4020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxSetSharedMemConfig' in found_functions}}
-        global __cuCtxSetSharedMemConfig
-        cuGetProcAddress('cuCtxSetSharedMemConfig', &__cuCtxSetSharedMemConfig, 4020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleLoad' in found_functions}}
-        global __cuModuleLoad
-        cuGetProcAddress('cuModuleLoad', &__cuModuleLoad, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleLoadData' in found_functions}}
-        global __cuModuleLoadData
-        cuGetProcAddress('cuModuleLoadData', &__cuModuleLoadData, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleLoadDataEx' in found_functions}}
-        global __cuModuleLoadDataEx
-        cuGetProcAddress('cuModuleLoadDataEx', &__cuModuleLoadDataEx, 2010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleLoadFatBinary' in found_functions}}
-        global __cuModuleLoadFatBinary
-        cuGetProcAddress('cuModuleLoadFatBinary', &__cuModuleLoadFatBinary, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleUnload' in found_functions}}
-        global __cuModuleUnload
-        cuGetProcAddress('cuModuleUnload', &__cuModuleUnload, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetLoadingMode' in found_functions}}
-        global __cuModuleGetLoadingMode
-        cuGetProcAddress('cuModuleGetLoadingMode', &__cuModuleGetLoadingMode, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetFunction' in found_functions}}
-        global __cuModuleGetFunction
-        cuGetProcAddress('cuModuleGetFunction', &__cuModuleGetFunction, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetFunctionCount' in found_functions}}
-        global __cuModuleGetFunctionCount
-        cuGetProcAddress('cuModuleGetFunctionCount', &__cuModuleGetFunctionCount, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleEnumerateFunctions' in found_functions}}
-        global __cuModuleEnumerateFunctions
-        cuGetProcAddress('cuModuleEnumerateFunctions', &__cuModuleEnumerateFunctions, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetGlobal_v2' in found_functions}}
-        global __cuModuleGetGlobal_v2
-        cuGetProcAddress('cuModuleGetGlobal', &__cuModuleGetGlobal_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLinkCreate_v2' in found_functions}}
-        global __cuLinkCreate_v2
-        cuGetProcAddress('cuLinkCreate', &__cuLinkCreate_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLinkAddData_v2' in found_functions}}
-        global __cuLinkAddData_v2
-        cuGetProcAddress('cuLinkAddData', &__cuLinkAddData_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLinkAddFile_v2' in found_functions}}
-        global __cuLinkAddFile_v2
-        cuGetProcAddress('cuLinkAddFile', &__cuLinkAddFile_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLinkComplete' in found_functions}}
-        global __cuLinkComplete
-        cuGetProcAddress('cuLinkComplete', &__cuLinkComplete, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLinkDestroy' in found_functions}}
-        global __cuLinkDestroy
-        cuGetProcAddress('cuLinkDestroy', &__cuLinkDestroy, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetTexRef' in found_functions}}
-        global __cuModuleGetTexRef
-        cuGetProcAddress('cuModuleGetTexRef', &__cuModuleGetTexRef, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuModuleGetSurfRef' in found_functions}}
-        global __cuModuleGetSurfRef
-        cuGetProcAddress('cuModuleGetSurfRef', &__cuModuleGetSurfRef, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryLoadData' in found_functions}}
-        global __cuLibraryLoadData
-        cuGetProcAddress('cuLibraryLoadData', &__cuLibraryLoadData, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryLoadFromFile' in found_functions}}
-        global __cuLibraryLoadFromFile
-        cuGetProcAddress('cuLibraryLoadFromFile', &__cuLibraryLoadFromFile, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryUnload' in found_functions}}
-        global __cuLibraryUnload
-        cuGetProcAddress('cuLibraryUnload', &__cuLibraryUnload, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetKernel' in found_functions}}
-        global __cuLibraryGetKernel
-        cuGetProcAddress('cuLibraryGetKernel', &__cuLibraryGetKernel, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetKernelCount' in found_functions}}
-        global __cuLibraryGetKernelCount
-        cuGetProcAddress('cuLibraryGetKernelCount', &__cuLibraryGetKernelCount, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryEnumerateKernels' in found_functions}}
-        global __cuLibraryEnumerateKernels
-        cuGetProcAddress('cuLibraryEnumerateKernels', &__cuLibraryEnumerateKernels, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetModule' in found_functions}}
-        global __cuLibraryGetModule
-        cuGetProcAddress('cuLibraryGetModule', &__cuLibraryGetModule, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelGetFunction' in found_functions}}
-        global __cuKernelGetFunction
-        cuGetProcAddress('cuKernelGetFunction', &__cuKernelGetFunction, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelGetLibrary' in found_functions}}
-        global __cuKernelGetLibrary
-        cuGetProcAddress('cuKernelGetLibrary', &__cuKernelGetLibrary, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetGlobal' in found_functions}}
-        global __cuLibraryGetGlobal
-        cuGetProcAddress('cuLibraryGetGlobal', &__cuLibraryGetGlobal, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetManaged' in found_functions}}
-        global __cuLibraryGetManaged
-        cuGetProcAddress('cuLibraryGetManaged', &__cuLibraryGetManaged, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-        global __cuLibraryGetUnifiedFunction
-        cuGetProcAddress('cuLibraryGetUnifiedFunction', &__cuLibraryGetUnifiedFunction, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelGetAttribute' in found_functions}}
-        global __cuKernelGetAttribute
-        cuGetProcAddress('cuKernelGetAttribute', &__cuKernelGetAttribute, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelSetAttribute' in found_functions}}
-        global __cuKernelSetAttribute
-        cuGetProcAddress('cuKernelSetAttribute', &__cuKernelSetAttribute, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelSetCacheConfig' in found_functions}}
-        global __cuKernelSetCacheConfig
-        cuGetProcAddress('cuKernelSetCacheConfig', &__cuKernelSetCacheConfig, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelGetName' in found_functions}}
-        global __cuKernelGetName
-        cuGetProcAddress('cuKernelGetName', &__cuKernelGetName, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuKernelGetParamInfo' in found_functions}}
-        global __cuKernelGetParamInfo
-        cuGetProcAddress('cuKernelGetParamInfo', &__cuKernelGetParamInfo, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetInfo_v2' in found_functions}}
-        global __cuMemGetInfo_v2
-        cuGetProcAddress('cuMemGetInfo', &__cuMemGetInfo_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAlloc_v2' in found_functions}}
-        global __cuMemAlloc_v2
-        cuGetProcAddress('cuMemAlloc', &__cuMemAlloc_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAllocPitch_v2' in found_functions}}
-        global __cuMemAllocPitch_v2
-        cuGetProcAddress('cuMemAllocPitch', &__cuMemAllocPitch_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemFree_v2' in found_functions}}
-        global __cuMemFree_v2
-        cuGetProcAddress('cuMemFree', &__cuMemFree_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetAddressRange_v2' in found_functions}}
-        global __cuMemGetAddressRange_v2
-        cuGetProcAddress('cuMemGetAddressRange', &__cuMemGetAddressRange_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAllocHost_v2' in found_functions}}
-        global __cuMemAllocHost_v2
-        cuGetProcAddress('cuMemAllocHost', &__cuMemAllocHost_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemFreeHost' in found_functions}}
-        global __cuMemFreeHost
-        cuGetProcAddress('cuMemFreeHost', &__cuMemFreeHost, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemHostAlloc' in found_functions}}
-        global __cuMemHostAlloc
-        cuGetProcAddress('cuMemHostAlloc', &__cuMemHostAlloc, 2020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-        global __cuMemHostGetDevicePointer_v2
-        cuGetProcAddress('cuMemHostGetDevicePointer', &__cuMemHostGetDevicePointer_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemHostGetFlags' in found_functions}}
-        global __cuMemHostGetFlags
-        cuGetProcAddress('cuMemHostGetFlags', &__cuMemHostGetFlags, 2030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAllocManaged' in found_functions}}
-        global __cuMemAllocManaged
-        cuGetProcAddress('cuMemAllocManaged', &__cuMemAllocManaged, 6000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-        global __cuDeviceRegisterAsyncNotification
-        cuGetProcAddress('cuDeviceRegisterAsyncNotification', &__cuDeviceRegisterAsyncNotification, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-        global __cuDeviceUnregisterAsyncNotification
-        cuGetProcAddress('cuDeviceUnregisterAsyncNotification', &__cuDeviceUnregisterAsyncNotification, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetByPCIBusId' in found_functions}}
-        global __cuDeviceGetByPCIBusId
-        cuGetProcAddress('cuDeviceGetByPCIBusId', &__cuDeviceGetByPCIBusId, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetPCIBusId' in found_functions}}
-        global __cuDeviceGetPCIBusId
-        cuGetProcAddress('cuDeviceGetPCIBusId', &__cuDeviceGetPCIBusId, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuIpcGetEventHandle' in found_functions}}
-        global __cuIpcGetEventHandle
-        cuGetProcAddress('cuIpcGetEventHandle', &__cuIpcGetEventHandle, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuIpcOpenEventHandle' in found_functions}}
-        global __cuIpcOpenEventHandle
-        cuGetProcAddress('cuIpcOpenEventHandle', &__cuIpcOpenEventHandle, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuIpcGetMemHandle' in found_functions}}
-        global __cuIpcGetMemHandle
-        cuGetProcAddress('cuIpcGetMemHandle', &__cuIpcGetMemHandle, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-        global __cuIpcOpenMemHandle_v2
-        cuGetProcAddress('cuIpcOpenMemHandle', &__cuIpcOpenMemHandle_v2, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuIpcCloseMemHandle' in found_functions}}
-        global __cuIpcCloseMemHandle
-        cuGetProcAddress('cuIpcCloseMemHandle', &__cuIpcCloseMemHandle, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemHostRegister_v2' in found_functions}}
-        global __cuMemHostRegister_v2
-        cuGetProcAddress('cuMemHostRegister', &__cuMemHostRegister_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemHostUnregister' in found_functions}}
-        global __cuMemHostUnregister
-        cuGetProcAddress('cuMemHostUnregister', &__cuMemHostUnregister, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayCreate_v2' in found_functions}}
-        global __cuArrayCreate_v2
-        cuGetProcAddress('cuArrayCreate', &__cuArrayCreate_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayGetDescriptor_v2' in found_functions}}
-        global __cuArrayGetDescriptor_v2
-        cuGetProcAddress('cuArrayGetDescriptor', &__cuArrayGetDescriptor_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayGetSparseProperties' in found_functions}}
-        global __cuArrayGetSparseProperties
-        cuGetProcAddress('cuArrayGetSparseProperties', &__cuArrayGetSparseProperties, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-        global __cuMipmappedArrayGetSparseProperties
-        cuGetProcAddress('cuMipmappedArrayGetSparseProperties', &__cuMipmappedArrayGetSparseProperties, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayGetMemoryRequirements' in found_functions}}
-        global __cuArrayGetMemoryRequirements
-        cuGetProcAddress('cuArrayGetMemoryRequirements', &__cuArrayGetMemoryRequirements, 11060, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-        global __cuMipmappedArrayGetMemoryRequirements
-        cuGetProcAddress('cuMipmappedArrayGetMemoryRequirements', &__cuMipmappedArrayGetMemoryRequirements, 11060, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayGetPlane' in found_functions}}
-        global __cuArrayGetPlane
-        cuGetProcAddress('cuArrayGetPlane', &__cuArrayGetPlane, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArrayDestroy' in found_functions}}
-        global __cuArrayDestroy
-        cuGetProcAddress('cuArrayDestroy', &__cuArrayDestroy, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArray3DCreate_v2' in found_functions}}
-        global __cuArray3DCreate_v2
-        cuGetProcAddress('cuArray3DCreate', &__cuArray3DCreate_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-        global __cuArray3DGetDescriptor_v2
-        cuGetProcAddress('cuArray3DGetDescriptor', &__cuArray3DGetDescriptor_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMipmappedArrayCreate' in found_functions}}
-        global __cuMipmappedArrayCreate
-        cuGetProcAddress('cuMipmappedArrayCreate', &__cuMipmappedArrayCreate, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMipmappedArrayGetLevel' in found_functions}}
-        global __cuMipmappedArrayGetLevel
-        cuGetProcAddress('cuMipmappedArrayGetLevel', &__cuMipmappedArrayGetLevel, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMipmappedArrayDestroy' in found_functions}}
-        global __cuMipmappedArrayDestroy
-        cuGetProcAddress('cuMipmappedArrayDestroy', &__cuMipmappedArrayDestroy, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetHandleForAddressRange' in found_functions}}
-        global __cuMemGetHandleForAddressRange
-        cuGetProcAddress('cuMemGetHandleForAddressRange', &__cuMemGetHandleForAddressRange, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAddressReserve' in found_functions}}
-        global __cuMemAddressReserve
-        cuGetProcAddress('cuMemAddressReserve', &__cuMemAddressReserve, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAddressFree' in found_functions}}
-        global __cuMemAddressFree
-        cuGetProcAddress('cuMemAddressFree', &__cuMemAddressFree, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemCreate' in found_functions}}
-        global __cuMemCreate
-        cuGetProcAddress('cuMemCreate', &__cuMemCreate, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemRelease' in found_functions}}
-        global __cuMemRelease
-        cuGetProcAddress('cuMemRelease', &__cuMemRelease, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemMap' in found_functions}}
-        global __cuMemMap
-        cuGetProcAddress('cuMemMap', &__cuMemMap, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemUnmap' in found_functions}}
-        global __cuMemUnmap
-        cuGetProcAddress('cuMemUnmap', &__cuMemUnmap, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemSetAccess' in found_functions}}
-        global __cuMemSetAccess
-        cuGetProcAddress('cuMemSetAccess', &__cuMemSetAccess, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetAccess' in found_functions}}
-        global __cuMemGetAccess
-        cuGetProcAddress('cuMemGetAccess', &__cuMemGetAccess, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemExportToShareableHandle' in found_functions}}
-        global __cuMemExportToShareableHandle
-        cuGetProcAddress('cuMemExportToShareableHandle', &__cuMemExportToShareableHandle, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemImportFromShareableHandle' in found_functions}}
-        global __cuMemImportFromShareableHandle
-        cuGetProcAddress('cuMemImportFromShareableHandle', &__cuMemImportFromShareableHandle, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetAllocationGranularity' in found_functions}}
-        global __cuMemGetAllocationGranularity
-        cuGetProcAddress('cuMemGetAllocationGranularity', &__cuMemGetAllocationGranularity, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-        global __cuMemGetAllocationPropertiesFromHandle
-        cuGetProcAddress('cuMemGetAllocationPropertiesFromHandle', &__cuMemGetAllocationPropertiesFromHandle, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemRetainAllocationHandle' in found_functions}}
-        global __cuMemRetainAllocationHandle
-        cuGetProcAddress('cuMemRetainAllocationHandle', &__cuMemRetainAllocationHandle, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolTrimTo' in found_functions}}
-        global __cuMemPoolTrimTo
-        cuGetProcAddress('cuMemPoolTrimTo', &__cuMemPoolTrimTo, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolSetAttribute' in found_functions}}
-        global __cuMemPoolSetAttribute
-        cuGetProcAddress('cuMemPoolSetAttribute', &__cuMemPoolSetAttribute, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolGetAttribute' in found_functions}}
-        global __cuMemPoolGetAttribute
-        cuGetProcAddress('cuMemPoolGetAttribute', &__cuMemPoolGetAttribute, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolSetAccess' in found_functions}}
-        global __cuMemPoolSetAccess
-        cuGetProcAddress('cuMemPoolSetAccess', &__cuMemPoolSetAccess, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolGetAccess' in found_functions}}
-        global __cuMemPoolGetAccess
-        cuGetProcAddress('cuMemPoolGetAccess', &__cuMemPoolGetAccess, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolCreate' in found_functions}}
-        global __cuMemPoolCreate
-        cuGetProcAddress('cuMemPoolCreate', &__cuMemPoolCreate, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolDestroy' in found_functions}}
-        global __cuMemPoolDestroy
-        cuGetProcAddress('cuMemPoolDestroy', &__cuMemPoolDestroy, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-        global __cuMemPoolExportToShareableHandle
-        cuGetProcAddress('cuMemPoolExportToShareableHandle', &__cuMemPoolExportToShareableHandle, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-        global __cuMemPoolImportFromShareableHandle
-        cuGetProcAddress('cuMemPoolImportFromShareableHandle', &__cuMemPoolImportFromShareableHandle, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolExportPointer' in found_functions}}
-        global __cuMemPoolExportPointer
-        cuGetProcAddress('cuMemPoolExportPointer', &__cuMemPoolExportPointer, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemPoolImportPointer' in found_functions}}
-        global __cuMemPoolImportPointer
-        cuGetProcAddress('cuMemPoolImportPointer', &__cuMemPoolImportPointer, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastCreate' in found_functions}}
-        global __cuMulticastCreate
-        cuGetProcAddress('cuMulticastCreate', &__cuMulticastCreate, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastAddDevice' in found_functions}}
-        global __cuMulticastAddDevice
-        cuGetProcAddress('cuMulticastAddDevice', &__cuMulticastAddDevice, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastBindMem' in found_functions}}
-        global __cuMulticastBindMem
-        cuGetProcAddress('cuMulticastBindMem', &__cuMulticastBindMem, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastBindAddr' in found_functions}}
-        global __cuMulticastBindAddr
-        cuGetProcAddress('cuMulticastBindAddr', &__cuMulticastBindAddr, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastUnbind' in found_functions}}
-        global __cuMulticastUnbind
-        cuGetProcAddress('cuMulticastUnbind', &__cuMulticastUnbind, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMulticastGetGranularity' in found_functions}}
-        global __cuMulticastGetGranularity
-        cuGetProcAddress('cuMulticastGetGranularity', &__cuMulticastGetGranularity, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuPointerGetAttribute' in found_functions}}
-        global __cuPointerGetAttribute
-        cuGetProcAddress('cuPointerGetAttribute', &__cuPointerGetAttribute, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAdvise' in found_functions}}
-        global __cuMemAdvise
-        cuGetProcAddress('cuMemAdvise', &__cuMemAdvise, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemAdvise_v2' in found_functions}}
-        global __cuMemAdvise_v2
-        cuGetProcAddress('cuMemAdvise', &__cuMemAdvise_v2, 12020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemRangeGetAttribute' in found_functions}}
-        global __cuMemRangeGetAttribute
-        cuGetProcAddress('cuMemRangeGetAttribute', &__cuMemRangeGetAttribute, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuMemRangeGetAttributes' in found_functions}}
-        global __cuMemRangeGetAttributes
-        cuGetProcAddress('cuMemRangeGetAttributes', &__cuMemRangeGetAttributes, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuPointerSetAttribute' in found_functions}}
-        global __cuPointerSetAttribute
-        cuGetProcAddress('cuPointerSetAttribute', &__cuPointerSetAttribute, 6000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuPointerGetAttributes' in found_functions}}
-        global __cuPointerGetAttributes
-        cuGetProcAddress('cuPointerGetAttributes', &__cuPointerGetAttributes, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuStreamCreate' in found_functions}}
-        global __cuStreamCreate
-        cuGetProcAddress('cuStreamCreate', &__cuStreamCreate, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuStreamCreateWithPriority' in found_functions}}
-        global __cuStreamCreateWithPriority
-        cuGetProcAddress('cuStreamCreateWithPriority', &__cuStreamCreateWithPriority, 5050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-        global __cuThreadExchangeStreamCaptureMode
-        cuGetProcAddress('cuThreadExchangeStreamCaptureMode', &__cuThreadExchangeStreamCaptureMode, 10010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuStreamDestroy_v2' in found_functions}}
-        global __cuStreamDestroy_v2
-        cuGetProcAddress('cuStreamDestroy', &__cuStreamDestroy_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuEventCreate' in found_functions}}
-        global __cuEventCreate
-        cuGetProcAddress('cuEventCreate', &__cuEventCreate, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuEventQuery' in found_functions}}
-        global __cuEventQuery
-        cuGetProcAddress('cuEventQuery', &__cuEventQuery, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuEventSynchronize' in found_functions}}
-        global __cuEventSynchronize
-        cuGetProcAddress('cuEventSynchronize', &__cuEventSynchronize, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuEventDestroy_v2' in found_functions}}
-        global __cuEventDestroy_v2
-        cuGetProcAddress('cuEventDestroy', &__cuEventDestroy_v2, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuEventElapsedTime' in found_functions}}
-        global __cuEventElapsedTime
-        cuGetProcAddress('cuEventElapsedTime', &__cuEventElapsedTime, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuImportExternalMemory' in found_functions}}
-        global __cuImportExternalMemory
-        cuGetProcAddress('cuImportExternalMemory', &__cuImportExternalMemory, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-        global __cuExternalMemoryGetMappedBuffer
-        cuGetProcAddress('cuExternalMemoryGetMappedBuffer', &__cuExternalMemoryGetMappedBuffer, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-        global __cuExternalMemoryGetMappedMipmappedArray
-        cuGetProcAddress('cuExternalMemoryGetMappedMipmappedArray', &__cuExternalMemoryGetMappedMipmappedArray, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDestroyExternalMemory' in found_functions}}
-        global __cuDestroyExternalMemory
-        cuGetProcAddress('cuDestroyExternalMemory', &__cuDestroyExternalMemory, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuImportExternalSemaphore' in found_functions}}
-        global __cuImportExternalSemaphore
-        cuGetProcAddress('cuImportExternalSemaphore', &__cuImportExternalSemaphore, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDestroyExternalSemaphore' in found_functions}}
-        global __cuDestroyExternalSemaphore
-        cuGetProcAddress('cuDestroyExternalSemaphore', &__cuDestroyExternalSemaphore, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncGetAttribute' in found_functions}}
-        global __cuFuncGetAttribute
-        cuGetProcAddress('cuFuncGetAttribute', &__cuFuncGetAttribute, 2020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncSetAttribute' in found_functions}}
-        global __cuFuncSetAttribute
-        cuGetProcAddress('cuFuncSetAttribute', &__cuFuncSetAttribute, 9000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncSetCacheConfig' in found_functions}}
-        global __cuFuncSetCacheConfig
-        cuGetProcAddress('cuFuncSetCacheConfig', &__cuFuncSetCacheConfig, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncGetModule' in found_functions}}
-        global __cuFuncGetModule
-        cuGetProcAddress('cuFuncGetModule', &__cuFuncGetModule, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncGetName' in found_functions}}
-        global __cuFuncGetName
-        cuGetProcAddress('cuFuncGetName', &__cuFuncGetName, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncGetParamInfo' in found_functions}}
-        global __cuFuncGetParamInfo
-        cuGetProcAddress('cuFuncGetParamInfo', &__cuFuncGetParamInfo, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncIsLoaded' in found_functions}}
-        global __cuFuncIsLoaded
-        cuGetProcAddress('cuFuncIsLoaded', &__cuFuncIsLoaded, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncLoad' in found_functions}}
-        global __cuFuncLoad
-        cuGetProcAddress('cuFuncLoad', &__cuFuncLoad, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-        global __cuLaunchCooperativeKernelMultiDevice
-        cuGetProcAddress('cuLaunchCooperativeKernelMultiDevice', &__cuLaunchCooperativeKernelMultiDevice, 9000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncSetBlockShape' in found_functions}}
-        global __cuFuncSetBlockShape
-        cuGetProcAddress('cuFuncSetBlockShape', &__cuFuncSetBlockShape, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncSetSharedSize' in found_functions}}
-        global __cuFuncSetSharedSize
-        cuGetProcAddress('cuFuncSetSharedSize', &__cuFuncSetSharedSize, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuParamSetSize' in found_functions}}
-        global __cuParamSetSize
-        cuGetProcAddress('cuParamSetSize', &__cuParamSetSize, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuParamSeti' in found_functions}}
-        global __cuParamSeti
-        cuGetProcAddress('cuParamSeti', &__cuParamSeti, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuParamSetf' in found_functions}}
-        global __cuParamSetf
-        cuGetProcAddress('cuParamSetf', &__cuParamSetf, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuParamSetv' in found_functions}}
-        global __cuParamSetv
-        cuGetProcAddress('cuParamSetv', &__cuParamSetv, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLaunch' in found_functions}}
-        global __cuLaunch
-        cuGetProcAddress('cuLaunch', &__cuLaunch, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLaunchGrid' in found_functions}}
-        global __cuLaunchGrid
-        cuGetProcAddress('cuLaunchGrid', &__cuLaunchGrid, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuLaunchGridAsync' in found_functions}}
-        global __cuLaunchGridAsync
-        cuGetProcAddress('cuLaunchGridAsync', &__cuLaunchGridAsync, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuParamSetTexRef' in found_functions}}
-        global __cuParamSetTexRef
-        cuGetProcAddress('cuParamSetTexRef', &__cuParamSetTexRef, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuFuncSetSharedMemConfig' in found_functions}}
-        global __cuFuncSetSharedMemConfig
-        cuGetProcAddress('cuFuncSetSharedMemConfig', &__cuFuncSetSharedMemConfig, 4020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphCreate' in found_functions}}
-        global __cuGraphCreate
-        cuGetProcAddress('cuGraphCreate', &__cuGraphCreate, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddKernelNode_v2' in found_functions}}
-        global __cuGraphAddKernelNode_v2
-        cuGetProcAddress('cuGraphAddKernelNode', &__cuGraphAddKernelNode_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-        global __cuGraphKernelNodeGetParams_v2
-        cuGetProcAddress('cuGraphKernelNodeGetParams', &__cuGraphKernelNodeGetParams_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-        global __cuGraphKernelNodeSetParams_v2
-        cuGetProcAddress('cuGraphKernelNodeSetParams', &__cuGraphKernelNodeSetParams_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddMemcpyNode' in found_functions}}
-        global __cuGraphAddMemcpyNode
-        cuGetProcAddress('cuGraphAddMemcpyNode', &__cuGraphAddMemcpyNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-        global __cuGraphMemcpyNodeGetParams
-        cuGetProcAddress('cuGraphMemcpyNodeGetParams', &__cuGraphMemcpyNodeGetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-        global __cuGraphMemcpyNodeSetParams
-        cuGetProcAddress('cuGraphMemcpyNodeSetParams', &__cuGraphMemcpyNodeSetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddMemsetNode' in found_functions}}
-        global __cuGraphAddMemsetNode
-        cuGetProcAddress('cuGraphAddMemsetNode', &__cuGraphAddMemsetNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-        global __cuGraphMemsetNodeGetParams
-        cuGetProcAddress('cuGraphMemsetNodeGetParams', &__cuGraphMemsetNodeGetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-        global __cuGraphMemsetNodeSetParams
-        cuGetProcAddress('cuGraphMemsetNodeSetParams', &__cuGraphMemsetNodeSetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddHostNode' in found_functions}}
-        global __cuGraphAddHostNode
-        cuGetProcAddress('cuGraphAddHostNode', &__cuGraphAddHostNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphHostNodeGetParams' in found_functions}}
-        global __cuGraphHostNodeGetParams
-        cuGetProcAddress('cuGraphHostNodeGetParams', &__cuGraphHostNodeGetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphHostNodeSetParams' in found_functions}}
-        global __cuGraphHostNodeSetParams
-        cuGetProcAddress('cuGraphHostNodeSetParams', &__cuGraphHostNodeSetParams, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddChildGraphNode' in found_functions}}
-        global __cuGraphAddChildGraphNode
-        cuGetProcAddress('cuGraphAddChildGraphNode', &__cuGraphAddChildGraphNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-        global __cuGraphChildGraphNodeGetGraph
-        cuGetProcAddress('cuGraphChildGraphNodeGetGraph', &__cuGraphChildGraphNodeGetGraph, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddEmptyNode' in found_functions}}
-        global __cuGraphAddEmptyNode
-        cuGetProcAddress('cuGraphAddEmptyNode', &__cuGraphAddEmptyNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddEventRecordNode' in found_functions}}
-        global __cuGraphAddEventRecordNode
-        cuGetProcAddress('cuGraphAddEventRecordNode', &__cuGraphAddEventRecordNode, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-        global __cuGraphEventRecordNodeGetEvent
-        cuGetProcAddress('cuGraphEventRecordNodeGetEvent', &__cuGraphEventRecordNodeGetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-        global __cuGraphEventRecordNodeSetEvent
-        cuGetProcAddress('cuGraphEventRecordNodeSetEvent', &__cuGraphEventRecordNodeSetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddEventWaitNode' in found_functions}}
-        global __cuGraphAddEventWaitNode
-        cuGetProcAddress('cuGraphAddEventWaitNode', &__cuGraphAddEventWaitNode, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-        global __cuGraphEventWaitNodeGetEvent
-        cuGetProcAddress('cuGraphEventWaitNodeGetEvent', &__cuGraphEventWaitNodeGetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-        global __cuGraphEventWaitNodeSetEvent
-        cuGetProcAddress('cuGraphEventWaitNodeSetEvent', &__cuGraphEventWaitNodeSetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-        global __cuGraphAddExternalSemaphoresSignalNode
-        cuGetProcAddress('cuGraphAddExternalSemaphoresSignalNode', &__cuGraphAddExternalSemaphoresSignalNode, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-        global __cuGraphExternalSemaphoresSignalNodeGetParams
-        cuGetProcAddress('cuGraphExternalSemaphoresSignalNodeGetParams', &__cuGraphExternalSemaphoresSignalNodeGetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-        global __cuGraphExternalSemaphoresSignalNodeSetParams
-        cuGetProcAddress('cuGraphExternalSemaphoresSignalNodeSetParams', &__cuGraphExternalSemaphoresSignalNodeSetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-        global __cuGraphAddExternalSemaphoresWaitNode
-        cuGetProcAddress('cuGraphAddExternalSemaphoresWaitNode', &__cuGraphAddExternalSemaphoresWaitNode, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-        global __cuGraphExternalSemaphoresWaitNodeGetParams
-        cuGetProcAddress('cuGraphExternalSemaphoresWaitNodeGetParams', &__cuGraphExternalSemaphoresWaitNodeGetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-        global __cuGraphExternalSemaphoresWaitNodeSetParams
-        cuGetProcAddress('cuGraphExternalSemaphoresWaitNodeSetParams', &__cuGraphExternalSemaphoresWaitNodeSetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-        global __cuGraphAddBatchMemOpNode
-        cuGetProcAddress('cuGraphAddBatchMemOpNode', &__cuGraphAddBatchMemOpNode, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-        global __cuGraphBatchMemOpNodeGetParams
-        cuGetProcAddress('cuGraphBatchMemOpNodeGetParams', &__cuGraphBatchMemOpNodeGetParams, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-        global __cuGraphBatchMemOpNodeSetParams
-        cuGetProcAddress('cuGraphBatchMemOpNodeSetParams', &__cuGraphBatchMemOpNodeSetParams, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-        global __cuGraphExecBatchMemOpNodeSetParams
-        cuGetProcAddress('cuGraphExecBatchMemOpNodeSetParams', &__cuGraphExecBatchMemOpNodeSetParams, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddMemAllocNode' in found_functions}}
-        global __cuGraphAddMemAllocNode
-        cuGetProcAddress('cuGraphAddMemAllocNode', &__cuGraphAddMemAllocNode, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-        global __cuGraphMemAllocNodeGetParams
-        cuGetProcAddress('cuGraphMemAllocNodeGetParams', &__cuGraphMemAllocNodeGetParams, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddMemFreeNode' in found_functions}}
-        global __cuGraphAddMemFreeNode
-        cuGetProcAddress('cuGraphAddMemFreeNode', &__cuGraphAddMemFreeNode, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-        global __cuGraphMemFreeNodeGetParams
-        cuGetProcAddress('cuGraphMemFreeNodeGetParams', &__cuGraphMemFreeNodeGetParams, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGraphMemTrim' in found_functions}}
-        global __cuDeviceGraphMemTrim
-        cuGetProcAddress('cuDeviceGraphMemTrim', &__cuDeviceGraphMemTrim, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-        global __cuDeviceGetGraphMemAttribute
-        cuGetProcAddress('cuDeviceGetGraphMemAttribute', &__cuDeviceGetGraphMemAttribute, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-        global __cuDeviceSetGraphMemAttribute
-        cuGetProcAddress('cuDeviceSetGraphMemAttribute', &__cuDeviceSetGraphMemAttribute, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphClone' in found_functions}}
-        global __cuGraphClone
-        cuGetProcAddress('cuGraphClone', &__cuGraphClone, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeFindInClone' in found_functions}}
-        global __cuGraphNodeFindInClone
-        cuGetProcAddress('cuGraphNodeFindInClone', &__cuGraphNodeFindInClone, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetType' in found_functions}}
-        global __cuGraphNodeGetType
-        cuGetProcAddress('cuGraphNodeGetType', &__cuGraphNodeGetType, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphGetNodes' in found_functions}}
-        global __cuGraphGetNodes
-        cuGetProcAddress('cuGraphGetNodes', &__cuGraphGetNodes, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphGetRootNodes' in found_functions}}
-        global __cuGraphGetRootNodes
-        cuGetProcAddress('cuGraphGetRootNodes', &__cuGraphGetRootNodes, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphGetEdges' in found_functions}}
-        global __cuGraphGetEdges
-        cuGetProcAddress('cuGraphGetEdges', &__cuGraphGetEdges, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphGetEdges_v2' in found_functions}}
-        global __cuGraphGetEdges_v2
-        cuGetProcAddress('cuGraphGetEdges', &__cuGraphGetEdges_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetDependencies' in found_functions}}
-        global __cuGraphNodeGetDependencies
-        cuGetProcAddress('cuGraphNodeGetDependencies', &__cuGraphNodeGetDependencies, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-        global __cuGraphNodeGetDependencies_v2
-        cuGetProcAddress('cuGraphNodeGetDependencies', &__cuGraphNodeGetDependencies_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-        global __cuGraphNodeGetDependentNodes
-        cuGetProcAddress('cuGraphNodeGetDependentNodes', &__cuGraphNodeGetDependentNodes, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-        global __cuGraphNodeGetDependentNodes_v2
-        cuGetProcAddress('cuGraphNodeGetDependentNodes', &__cuGraphNodeGetDependentNodes_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddDependencies' in found_functions}}
-        global __cuGraphAddDependencies
-        cuGetProcAddress('cuGraphAddDependencies', &__cuGraphAddDependencies, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddDependencies_v2' in found_functions}}
-        global __cuGraphAddDependencies_v2
-        cuGetProcAddress('cuGraphAddDependencies', &__cuGraphAddDependencies_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphRemoveDependencies' in found_functions}}
-        global __cuGraphRemoveDependencies
-        cuGetProcAddress('cuGraphRemoveDependencies', &__cuGraphRemoveDependencies, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-        global __cuGraphRemoveDependencies_v2
-        cuGetProcAddress('cuGraphRemoveDependencies', &__cuGraphRemoveDependencies_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphDestroyNode' in found_functions}}
-        global __cuGraphDestroyNode
-        cuGetProcAddress('cuGraphDestroyNode', &__cuGraphDestroyNode, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphInstantiateWithFlags' in found_functions}}
-        global __cuGraphInstantiateWithFlags
-        cuGetProcAddress('cuGraphInstantiateWithFlags', &__cuGraphInstantiateWithFlags, 11040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecGetFlags' in found_functions}}
-        global __cuGraphExecGetFlags
-        cuGetProcAddress('cuGraphExecGetFlags', &__cuGraphExecGetFlags, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-        global __cuGraphExecKernelNodeSetParams_v2
-        cuGetProcAddress('cuGraphExecKernelNodeSetParams', &__cuGraphExecKernelNodeSetParams_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-        global __cuGraphExecMemcpyNodeSetParams
-        cuGetProcAddress('cuGraphExecMemcpyNodeSetParams', &__cuGraphExecMemcpyNodeSetParams, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-        global __cuGraphExecMemsetNodeSetParams
-        cuGetProcAddress('cuGraphExecMemsetNodeSetParams', &__cuGraphExecMemsetNodeSetParams, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-        global __cuGraphExecHostNodeSetParams
-        cuGetProcAddress('cuGraphExecHostNodeSetParams', &__cuGraphExecHostNodeSetParams, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-        global __cuGraphExecChildGraphNodeSetParams
-        cuGetProcAddress('cuGraphExecChildGraphNodeSetParams', &__cuGraphExecChildGraphNodeSetParams, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-        global __cuGraphExecEventRecordNodeSetEvent
-        cuGetProcAddress('cuGraphExecEventRecordNodeSetEvent', &__cuGraphExecEventRecordNodeSetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-        global __cuGraphExecEventWaitNodeSetEvent
-        cuGetProcAddress('cuGraphExecEventWaitNodeSetEvent', &__cuGraphExecEventWaitNodeSetEvent, 11010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-        global __cuGraphExecExternalSemaphoresSignalNodeSetParams
-        cuGetProcAddress('cuGraphExecExternalSemaphoresSignalNodeSetParams', &__cuGraphExecExternalSemaphoresSignalNodeSetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-        global __cuGraphExecExternalSemaphoresWaitNodeSetParams
-        cuGetProcAddress('cuGraphExecExternalSemaphoresWaitNodeSetParams', &__cuGraphExecExternalSemaphoresWaitNodeSetParams, 11020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeSetEnabled' in found_functions}}
-        global __cuGraphNodeSetEnabled
-        cuGetProcAddress('cuGraphNodeSetEnabled', &__cuGraphNodeSetEnabled, 11060, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeGetEnabled' in found_functions}}
-        global __cuGraphNodeGetEnabled
-        cuGetProcAddress('cuGraphNodeGetEnabled', &__cuGraphNodeGetEnabled, 11060, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecDestroy' in found_functions}}
-        global __cuGraphExecDestroy
-        cuGetProcAddress('cuGraphExecDestroy', &__cuGraphExecDestroy, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphDestroy' in found_functions}}
-        global __cuGraphDestroy
-        cuGetProcAddress('cuGraphDestroy', &__cuGraphDestroy, 10000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecUpdate_v2' in found_functions}}
-        global __cuGraphExecUpdate_v2
-        cuGetProcAddress('cuGraphExecUpdate', &__cuGraphExecUpdate_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-        global __cuGraphKernelNodeCopyAttributes
-        cuGetProcAddress('cuGraphKernelNodeCopyAttributes', &__cuGraphKernelNodeCopyAttributes, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-        global __cuGraphKernelNodeGetAttribute
-        cuGetProcAddress('cuGraphKernelNodeGetAttribute', &__cuGraphKernelNodeGetAttribute, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-        global __cuGraphKernelNodeSetAttribute
-        cuGetProcAddress('cuGraphKernelNodeSetAttribute', &__cuGraphKernelNodeSetAttribute, 11000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphDebugDotPrint' in found_functions}}
-        global __cuGraphDebugDotPrint
-        cuGetProcAddress('cuGraphDebugDotPrint', &__cuGraphDebugDotPrint, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuUserObjectCreate' in found_functions}}
-        global __cuUserObjectCreate
-        cuGetProcAddress('cuUserObjectCreate', &__cuUserObjectCreate, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuUserObjectRetain' in found_functions}}
-        global __cuUserObjectRetain
-        cuGetProcAddress('cuUserObjectRetain', &__cuUserObjectRetain, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuUserObjectRelease' in found_functions}}
-        global __cuUserObjectRelease
-        cuGetProcAddress('cuUserObjectRelease', &__cuUserObjectRelease, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphRetainUserObject' in found_functions}}
-        global __cuGraphRetainUserObject
-        cuGetProcAddress('cuGraphRetainUserObject', &__cuGraphRetainUserObject, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphReleaseUserObject' in found_functions}}
-        global __cuGraphReleaseUserObject
-        cuGetProcAddress('cuGraphReleaseUserObject', &__cuGraphReleaseUserObject, 11030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddNode' in found_functions}}
-        global __cuGraphAddNode
-        cuGetProcAddress('cuGraphAddNode', &__cuGraphAddNode, 12020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphAddNode_v2' in found_functions}}
-        global __cuGraphAddNode_v2
-        cuGetProcAddress('cuGraphAddNode', &__cuGraphAddNode_v2, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphNodeSetParams' in found_functions}}
-        global __cuGraphNodeSetParams
-        cuGetProcAddress('cuGraphNodeSetParams', &__cuGraphNodeSetParams, 12020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphExecNodeSetParams' in found_functions}}
-        global __cuGraphExecNodeSetParams
-        cuGetProcAddress('cuGraphExecNodeSetParams', &__cuGraphExecNodeSetParams, 12020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphConditionalHandleCreate' in found_functions}}
-        global __cuGraphConditionalHandleCreate
-        cuGetProcAddress('cuGraphConditionalHandleCreate', &__cuGraphConditionalHandleCreate, 12030, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-        global __cuOccupancyMaxActiveBlocksPerMultiprocessor
-        cuGetProcAddress('cuOccupancyMaxActiveBlocksPerMultiprocessor', &__cuOccupancyMaxActiveBlocksPerMultiprocessor, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-        global __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-        cuGetProcAddress('cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags', &__cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-        global __cuOccupancyMaxPotentialBlockSize
-        cuGetProcAddress('cuOccupancyMaxPotentialBlockSize', &__cuOccupancyMaxPotentialBlockSize, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-        global __cuOccupancyMaxPotentialBlockSizeWithFlags
-        cuGetProcAddress('cuOccupancyMaxPotentialBlockSizeWithFlags', &__cuOccupancyMaxPotentialBlockSizeWithFlags, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-        global __cuOccupancyAvailableDynamicSMemPerBlock
-        cuGetProcAddress('cuOccupancyAvailableDynamicSMemPerBlock', &__cuOccupancyAvailableDynamicSMemPerBlock, 10020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-        global __cuOccupancyMaxPotentialClusterSize
-        cuGetProcAddress('cuOccupancyMaxPotentialClusterSize', &__cuOccupancyMaxPotentialClusterSize, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-        global __cuOccupancyMaxActiveClusters
-        cuGetProcAddress('cuOccupancyMaxActiveClusters', &__cuOccupancyMaxActiveClusters, 11070, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetArray' in found_functions}}
-        global __cuTexRefSetArray
-        cuGetProcAddress('cuTexRefSetArray', &__cuTexRefSetArray, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetMipmappedArray' in found_functions}}
-        global __cuTexRefSetMipmappedArray
-        cuGetProcAddress('cuTexRefSetMipmappedArray', &__cuTexRefSetMipmappedArray, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetAddress_v2' in found_functions}}
-        global __cuTexRefSetAddress_v2
-        cuGetProcAddress('cuTexRefSetAddress', &__cuTexRefSetAddress_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-        global __cuTexRefSetAddress2D_v3
-        cuGetProcAddress('cuTexRefSetAddress2D', &__cuTexRefSetAddress2D_v3, 4010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetFormat' in found_functions}}
-        global __cuTexRefSetFormat
-        cuGetProcAddress('cuTexRefSetFormat', &__cuTexRefSetFormat, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetAddressMode' in found_functions}}
-        global __cuTexRefSetAddressMode
-        cuGetProcAddress('cuTexRefSetAddressMode', &__cuTexRefSetAddressMode, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetFilterMode' in found_functions}}
-        global __cuTexRefSetFilterMode
-        cuGetProcAddress('cuTexRefSetFilterMode', &__cuTexRefSetFilterMode, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-        global __cuTexRefSetMipmapFilterMode
-        cuGetProcAddress('cuTexRefSetMipmapFilterMode', &__cuTexRefSetMipmapFilterMode, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-        global __cuTexRefSetMipmapLevelBias
-        cuGetProcAddress('cuTexRefSetMipmapLevelBias', &__cuTexRefSetMipmapLevelBias, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-        global __cuTexRefSetMipmapLevelClamp
-        cuGetProcAddress('cuTexRefSetMipmapLevelClamp', &__cuTexRefSetMipmapLevelClamp, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-        global __cuTexRefSetMaxAnisotropy
-        cuGetProcAddress('cuTexRefSetMaxAnisotropy', &__cuTexRefSetMaxAnisotropy, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetBorderColor' in found_functions}}
-        global __cuTexRefSetBorderColor
-        cuGetProcAddress('cuTexRefSetBorderColor', &__cuTexRefSetBorderColor, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefSetFlags' in found_functions}}
-        global __cuTexRefSetFlags
-        cuGetProcAddress('cuTexRefSetFlags', &__cuTexRefSetFlags, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetAddress_v2' in found_functions}}
-        global __cuTexRefGetAddress_v2
-        cuGetProcAddress('cuTexRefGetAddress', &__cuTexRefGetAddress_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetArray' in found_functions}}
-        global __cuTexRefGetArray
-        cuGetProcAddress('cuTexRefGetArray', &__cuTexRefGetArray, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetMipmappedArray' in found_functions}}
-        global __cuTexRefGetMipmappedArray
-        cuGetProcAddress('cuTexRefGetMipmappedArray', &__cuTexRefGetMipmappedArray, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetAddressMode' in found_functions}}
-        global __cuTexRefGetAddressMode
-        cuGetProcAddress('cuTexRefGetAddressMode', &__cuTexRefGetAddressMode, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetFilterMode' in found_functions}}
-        global __cuTexRefGetFilterMode
-        cuGetProcAddress('cuTexRefGetFilterMode', &__cuTexRefGetFilterMode, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetFormat' in found_functions}}
-        global __cuTexRefGetFormat
-        cuGetProcAddress('cuTexRefGetFormat', &__cuTexRefGetFormat, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-        global __cuTexRefGetMipmapFilterMode
-        cuGetProcAddress('cuTexRefGetMipmapFilterMode', &__cuTexRefGetMipmapFilterMode, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-        global __cuTexRefGetMipmapLevelBias
-        cuGetProcAddress('cuTexRefGetMipmapLevelBias', &__cuTexRefGetMipmapLevelBias, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-        global __cuTexRefGetMipmapLevelClamp
-        cuGetProcAddress('cuTexRefGetMipmapLevelClamp', &__cuTexRefGetMipmapLevelClamp, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-        global __cuTexRefGetMaxAnisotropy
-        cuGetProcAddress('cuTexRefGetMaxAnisotropy', &__cuTexRefGetMaxAnisotropy, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetBorderColor' in found_functions}}
-        global __cuTexRefGetBorderColor
-        cuGetProcAddress('cuTexRefGetBorderColor', &__cuTexRefGetBorderColor, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefGetFlags' in found_functions}}
-        global __cuTexRefGetFlags
-        cuGetProcAddress('cuTexRefGetFlags', &__cuTexRefGetFlags, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefCreate' in found_functions}}
-        global __cuTexRefCreate
-        cuGetProcAddress('cuTexRefCreate', &__cuTexRefCreate, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexRefDestroy' in found_functions}}
-        global __cuTexRefDestroy
-        cuGetProcAddress('cuTexRefDestroy', &__cuTexRefDestroy, 2000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuSurfRefSetArray' in found_functions}}
-        global __cuSurfRefSetArray
-        cuGetProcAddress('cuSurfRefSetArray', &__cuSurfRefSetArray, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuSurfRefGetArray' in found_functions}}
-        global __cuSurfRefGetArray
-        cuGetProcAddress('cuSurfRefGetArray', &__cuSurfRefGetArray, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexObjectCreate' in found_functions}}
-        global __cuTexObjectCreate
-        cuGetProcAddress('cuTexObjectCreate', &__cuTexObjectCreate, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexObjectDestroy' in found_functions}}
-        global __cuTexObjectDestroy
-        cuGetProcAddress('cuTexObjectDestroy', &__cuTexObjectDestroy, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexObjectGetResourceDesc' in found_functions}}
-        global __cuTexObjectGetResourceDesc
-        cuGetProcAddress('cuTexObjectGetResourceDesc', &__cuTexObjectGetResourceDesc, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexObjectGetTextureDesc' in found_functions}}
-        global __cuTexObjectGetTextureDesc
-        cuGetProcAddress('cuTexObjectGetTextureDesc', &__cuTexObjectGetTextureDesc, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-        global __cuTexObjectGetResourceViewDesc
-        cuGetProcAddress('cuTexObjectGetResourceViewDesc', &__cuTexObjectGetResourceViewDesc, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuSurfObjectCreate' in found_functions}}
-        global __cuSurfObjectCreate
-        cuGetProcAddress('cuSurfObjectCreate', &__cuSurfObjectCreate, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuSurfObjectDestroy' in found_functions}}
-        global __cuSurfObjectDestroy
-        cuGetProcAddress('cuSurfObjectDestroy', &__cuSurfObjectDestroy, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-        global __cuSurfObjectGetResourceDesc
-        cuGetProcAddress('cuSurfObjectGetResourceDesc', &__cuSurfObjectGetResourceDesc, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTensorMapEncodeTiled' in found_functions}}
-        global __cuTensorMapEncodeTiled
-        cuGetProcAddress('cuTensorMapEncodeTiled', &__cuTensorMapEncodeTiled, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTensorMapEncodeIm2col' in found_functions}}
-        global __cuTensorMapEncodeIm2col
-        cuGetProcAddress('cuTensorMapEncodeIm2col', &__cuTensorMapEncodeIm2col, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuTensorMapReplaceAddress' in found_functions}}
-        global __cuTensorMapReplaceAddress
-        cuGetProcAddress('cuTensorMapReplaceAddress', &__cuTensorMapReplaceAddress, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceCanAccessPeer' in found_functions}}
-        global __cuDeviceCanAccessPeer
-        cuGetProcAddress('cuDeviceCanAccessPeer', &__cuDeviceCanAccessPeer, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxEnablePeerAccess' in found_functions}}
-        global __cuCtxEnablePeerAccess
-        cuGetProcAddress('cuCtxEnablePeerAccess', &__cuCtxEnablePeerAccess, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxDisablePeerAccess' in found_functions}}
-        global __cuCtxDisablePeerAccess
-        cuGetProcAddress('cuCtxDisablePeerAccess', &__cuCtxDisablePeerAccess, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetP2PAttribute' in found_functions}}
-        global __cuDeviceGetP2PAttribute
-        cuGetProcAddress('cuDeviceGetP2PAttribute', &__cuDeviceGetP2PAttribute, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphicsUnregisterResource' in found_functions}}
-        global __cuGraphicsUnregisterResource
-        cuGetProcAddress('cuGraphicsUnregisterResource', &__cuGraphicsUnregisterResource, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-        global __cuGraphicsSubResourceGetMappedArray
-        cuGetProcAddress('cuGraphicsSubResourceGetMappedArray', &__cuGraphicsSubResourceGetMappedArray, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-        global __cuGraphicsResourceGetMappedMipmappedArray
-        cuGetProcAddress('cuGraphicsResourceGetMappedMipmappedArray', &__cuGraphicsResourceGetMappedMipmappedArray, 5000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-        global __cuGraphicsResourceGetMappedPointer_v2
-        cuGetProcAddress('cuGraphicsResourceGetMappedPointer', &__cuGraphicsResourceGetMappedPointer_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-        global __cuGraphicsResourceSetMapFlags_v2
-        cuGetProcAddress('cuGraphicsResourceSetMapFlags', &__cuGraphicsResourceSetMapFlags_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGetProcAddress_v2' in found_functions}}
-        global __cuGetProcAddress_v2
-        cuGetProcAddress('cuGetProcAddress', &__cuGetProcAddress_v2, 12000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCoredumpGetAttribute' in found_functions}}
-        global __cuCoredumpGetAttribute
-        cuGetProcAddress('cuCoredumpGetAttribute', &__cuCoredumpGetAttribute, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-        global __cuCoredumpGetAttributeGlobal
-        cuGetProcAddress('cuCoredumpGetAttributeGlobal', &__cuCoredumpGetAttributeGlobal, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCoredumpSetAttribute' in found_functions}}
-        global __cuCoredumpSetAttribute
-        cuGetProcAddress('cuCoredumpSetAttribute', &__cuCoredumpSetAttribute, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-        global __cuCoredumpSetAttributeGlobal
-        cuGetProcAddress('cuCoredumpSetAttributeGlobal', &__cuCoredumpSetAttributeGlobal, 12010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGetExportTable' in found_functions}}
-        global __cuGetExportTable
-        cuGetProcAddress('cuGetExportTable', &__cuGetExportTable, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxCreate' in found_functions}}
-        global __cuGreenCtxCreate
-        cuGetProcAddress('cuGreenCtxCreate', &__cuGreenCtxCreate, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxDestroy' in found_functions}}
-        global __cuGreenCtxDestroy
-        cuGetProcAddress('cuGreenCtxDestroy', &__cuGreenCtxDestroy, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxFromGreenCtx' in found_functions}}
-        global __cuCtxFromGreenCtx
-        cuGetProcAddress('cuCtxFromGreenCtx', &__cuCtxFromGreenCtx, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDeviceGetDevResource' in found_functions}}
-        global __cuDeviceGetDevResource
-        cuGetProcAddress('cuDeviceGetDevResource', &__cuDeviceGetDevResource, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuCtxGetDevResource' in found_functions}}
-        global __cuCtxGetDevResource
-        cuGetProcAddress('cuCtxGetDevResource', &__cuCtxGetDevResource, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxGetDevResource' in found_functions}}
-        global __cuGreenCtxGetDevResource
-        cuGetProcAddress('cuGreenCtxGetDevResource', &__cuGreenCtxGetDevResource, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevSmResourceSplitByCount' in found_functions}}
-        global __cuDevSmResourceSplitByCount
-        cuGetProcAddress('cuDevSmResourceSplitByCount', &__cuDevSmResourceSplitByCount, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuDevResourceGenerateDesc' in found_functions}}
-        global __cuDevResourceGenerateDesc
-        cuGetProcAddress('cuDevResourceGenerateDesc', &__cuDevResourceGenerateDesc, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxRecordEvent' in found_functions}}
-        global __cuGreenCtxRecordEvent
-        cuGetProcAddress('cuGreenCtxRecordEvent', &__cuGreenCtxRecordEvent, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxWaitEvent' in found_functions}}
-        global __cuGreenCtxWaitEvent
-        cuGetProcAddress('cuGreenCtxWaitEvent', &__cuGreenCtxWaitEvent, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuStreamGetGreenCtx' in found_functions}}
-        global __cuStreamGetGreenCtx
-        cuGetProcAddress('cuStreamGetGreenCtx', &__cuStreamGetGreenCtx, 12040, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuGreenCtxStreamCreate' in found_functions}}
-        global __cuGreenCtxStreamCreate
-        cuGetProcAddress('cuGreenCtxStreamCreate', &__cuGreenCtxStreamCreate, 12050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuProfilerStart' in found_functions}}
-        global __cuProfilerStart
-        cuGetProcAddress('cuProfilerStart', &__cuProfilerStart, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if 'cuProfilerStop' in found_functions}}
-        global __cuProfilerStop
-        cuGetProcAddress('cuProfilerStop', &__cuProfilerStop, 4000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsEGLRegisterImage
-        cuGetProcAddress('cuGraphicsEGLRegisterImage', &__cuGraphicsEGLRegisterImage, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamConsumerConnect
-        cuGetProcAddress('cuEGLStreamConsumerConnect', &__cuEGLStreamConsumerConnect, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamConsumerConnectWithFlags
-        cuGetProcAddress('cuEGLStreamConsumerConnectWithFlags', &__cuEGLStreamConsumerConnectWithFlags, 8000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamConsumerDisconnect
-        cuGetProcAddress('cuEGLStreamConsumerDisconnect', &__cuEGLStreamConsumerDisconnect, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamConsumerAcquireFrame
-        cuGetProcAddress('cuEGLStreamConsumerAcquireFrame', &__cuEGLStreamConsumerAcquireFrame, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamConsumerReleaseFrame
-        cuGetProcAddress('cuEGLStreamConsumerReleaseFrame', &__cuEGLStreamConsumerReleaseFrame, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamProducerConnect
-        cuGetProcAddress('cuEGLStreamProducerConnect', &__cuEGLStreamProducerConnect, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamProducerDisconnect
-        cuGetProcAddress('cuEGLStreamProducerDisconnect', &__cuEGLStreamProducerDisconnect, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamProducerPresentFrame
-        cuGetProcAddress('cuEGLStreamProducerPresentFrame', &__cuEGLStreamProducerPresentFrame, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEGLStreamProducerReturnFrame
-        cuGetProcAddress('cuEGLStreamProducerReturnFrame', &__cuEGLStreamProducerReturnFrame, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsResourceGetMappedEglFrame
-        cuGetProcAddress('cuGraphicsResourceGetMappedEglFrame', &__cuGraphicsResourceGetMappedEglFrame, 7000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuEventCreateFromEGLSync
-        cuGetProcAddress('cuEventCreateFromEGLSync', &__cuEventCreateFromEGLSync, 9000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsGLRegisterBuffer
-        cuGetProcAddress('cuGraphicsGLRegisterBuffer', &__cuGraphicsGLRegisterBuffer, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsGLRegisterImage
-        cuGetProcAddress('cuGraphicsGLRegisterImage', &__cuGraphicsGLRegisterImage, 3000, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGLGetDevices_v2
-        cuGetProcAddress('cuGLGetDevices', &__cuGLGetDevices_v2, 6050, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuVDPAUGetDevice
-        cuGetProcAddress('cuVDPAUGetDevice', &__cuVDPAUGetDevice, 3010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuVDPAUCtxCreate_v2
-        cuGetProcAddress('cuVDPAUCtxCreate', &__cuVDPAUCtxCreate_v2, 3020, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsVDPAURegisterVideoSurface
-        cuGetProcAddress('cuGraphicsVDPAURegisterVideoSurface', &__cuGraphicsVDPAURegisterVideoSurface, 3010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        {{if True}}
-        global __cuGraphicsVDPAURegisterOutputSurface
-        cuGetProcAddress('cuGraphicsVDPAURegisterOutputSurface', &__cuGraphicsVDPAURegisterOutputSurface, 3010, CU_GET_PROC_ADDRESS_DEFAULT, NULL)
-        {{endif}}
-        return 0
-
-    {{if 'Windows' == platform.system()}}
-    # Load using win32GetAddr
-    with gil:
-        if usePTDS:
-            # Get all PTDS version of functions
-            pass
-            {{if 'cuMemcpy' in found_functions}}
-            try:
-                global __cuMemcpy
-                __cuMemcpy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyPeer' in found_functions}}
-            try:
-                global __cuMemcpyPeer
-                __cuMemcpyPeer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyPeer_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoD_v2
-                __cuMemcpyHtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoD_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoH_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoH_v2
-                __cuMemcpyDtoH_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoH_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoD_v2
-                __cuMemcpyDtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoD_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoA_v2
-                __cuMemcpyDtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoA_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoD_v2
-                __cuMemcpyAtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoD_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoA_v2
-                __cuMemcpyHtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoA_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoH_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoH_v2
-                __cuMemcpyAtoH_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoH_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoA_v2
-                __cuMemcpyAtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoA_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2D_v2' in found_functions}}
-            try:
-                global __cuMemcpy2D_v2
-                __cuMemcpy2D_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2D_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-            try:
-                global __cuMemcpy2DUnaligned_v2
-                __cuMemcpy2DUnaligned_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2DUnaligned_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3D_v2' in found_functions}}
-            try:
-                global __cuMemcpy3D_v2
-                __cuMemcpy3D_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3D_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DPeer' in found_functions}}
-            try:
-                global __cuMemcpy3DPeer
-                __cuMemcpy3DPeer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DPeer_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAsync' in found_functions}}
-            try:
-                global __cuMemcpyAsync
-                __cuMemcpyAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyPeerAsync' in found_functions}}
-            try:
-                global __cuMemcpyPeerAsync
-                __cuMemcpyPeerAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyPeerAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoDAsync_v2
-                __cuMemcpyHtoDAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoDAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoHAsync_v2
-                __cuMemcpyDtoHAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoHAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoDAsync_v2
-                __cuMemcpyDtoDAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoDAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoAAsync_v2
-                __cuMemcpyHtoAAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoAAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoHAsync_v2
-                __cuMemcpyAtoHAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoHAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpy2DAsync_v2
-                __cuMemcpy2DAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2DAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpy3DAsync_v2
-                __cuMemcpy3DAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-            try:
-                global __cuMemcpy3DPeerAsync
-                __cuMemcpy3DPeerAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DPeerAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD8_v2' in found_functions}}
-            try:
-                global __cuMemsetD8_v2
-                __cuMemsetD8_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD8_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD16_v2' in found_functions}}
-            try:
-                global __cuMemsetD16_v2
-                __cuMemsetD16_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD16_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD32_v2' in found_functions}}
-            try:
-                global __cuMemsetD32_v2
-                __cuMemsetD32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD32_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D8_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D8_v2
-                __cuMemsetD2D8_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D8_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D16_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D16_v2
-                __cuMemsetD2D16_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D16_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D32_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D32_v2
-                __cuMemsetD2D32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D32_v2_ptds')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD8Async' in found_functions}}
-            try:
-                global __cuMemsetD8Async
-                __cuMemsetD8Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD8Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD16Async' in found_functions}}
-            try:
-                global __cuMemsetD16Async
-                __cuMemsetD16Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD16Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD32Async' in found_functions}}
-            try:
-                global __cuMemsetD32Async
-                __cuMemsetD32Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD32Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D8Async' in found_functions}}
-            try:
-                global __cuMemsetD2D8Async
-                __cuMemsetD2D8Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D8Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D16Async' in found_functions}}
-            try:
-                global __cuMemsetD2D16Async
-                __cuMemsetD2D16Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D16Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D32Async' in found_functions}}
-            try:
-                global __cuMemsetD2D32Async
-                __cuMemsetD2D32Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D32Async_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemMapArrayAsync' in found_functions}}
-            try:
-                global __cuMemMapArrayAsync
-                __cuMemMapArrayAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemMapArrayAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemFreeAsync' in found_functions}}
-            try:
-                global __cuMemFreeAsync
-                __cuMemFreeAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemFreeAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemAllocAsync' in found_functions}}
-            try:
-                global __cuMemAllocAsync
-                __cuMemAllocAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-            try:
-                global __cuMemAllocFromPoolAsync
-                __cuMemAllocFromPoolAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocFromPoolAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemPrefetchAsync' in found_functions}}
-            try:
-                global __cuMemPrefetchAsync
-                __cuMemPrefetchAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPrefetchAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-            try:
-                global __cuMemPrefetchAsync_v2
-                __cuMemPrefetchAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPrefetchAsync_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetPriority' in found_functions}}
-            try:
-                global __cuStreamGetPriority
-                __cuStreamGetPriority = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetPriority_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetFlags' in found_functions}}
-            try:
-                global __cuStreamGetFlags
-                __cuStreamGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetFlags_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetId' in found_functions}}
-            try:
-                global __cuStreamGetId
-                __cuStreamGetId = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetId_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCtx' in found_functions}}
-            try:
-                global __cuStreamGetCtx
-                __cuStreamGetCtx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCtx_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCtx_v2' in found_functions}}
-            try:
-                global __cuStreamGetCtx_v2
-                __cuStreamGetCtx_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCtx_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitEvent' in found_functions}}
-            try:
-                global __cuStreamWaitEvent
-                __cuStreamWaitEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitEvent_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamAddCallback' in found_functions}}
-            try:
-                global __cuStreamAddCallback
-                __cuStreamAddCallback = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamAddCallback_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBeginCapture_v2' in found_functions}}
-            try:
-                global __cuStreamBeginCapture_v2
-                __cuStreamBeginCapture_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBeginCapture_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-            try:
-                global __cuStreamBeginCaptureToGraph
-                __cuStreamBeginCaptureToGraph = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBeginCaptureToGraph_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamEndCapture' in found_functions}}
-            try:
-                global __cuStreamEndCapture
-                __cuStreamEndCapture = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamEndCapture_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamIsCapturing' in found_functions}}
-            try:
-                global __cuStreamIsCapturing
-                __cuStreamIsCapturing = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamIsCapturing_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-            try:
-                global __cuStreamGetCaptureInfo_v2
-                __cuStreamGetCaptureInfo_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCaptureInfo_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-            try:
-                global __cuStreamGetCaptureInfo_v3
-                __cuStreamGetCaptureInfo_v3 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCaptureInfo_v3_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-            try:
-                global __cuStreamUpdateCaptureDependencies
-                __cuStreamUpdateCaptureDependencies = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamUpdateCaptureDependencies_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-            try:
-                global __cuStreamUpdateCaptureDependencies_v2
-                __cuStreamUpdateCaptureDependencies_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamUpdateCaptureDependencies_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamAttachMemAsync' in found_functions}}
-            try:
-                global __cuStreamAttachMemAsync
-                __cuStreamAttachMemAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamAttachMemAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamQuery' in found_functions}}
-            try:
-                global __cuStreamQuery
-                __cuStreamQuery = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamQuery_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamSynchronize' in found_functions}}
-            try:
-                global __cuStreamSynchronize
-                __cuStreamSynchronize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamSynchronize_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamCopyAttributes' in found_functions}}
-            try:
-                global __cuStreamCopyAttributes
-                __cuStreamCopyAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamCopyAttributes_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetAttribute' in found_functions}}
-            try:
-                global __cuStreamGetAttribute
-                __cuStreamGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetAttribute_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamSetAttribute' in found_functions}}
-            try:
-                global __cuStreamSetAttribute
-                __cuStreamSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamSetAttribute_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuEventRecord' in found_functions}}
-            try:
-                global __cuEventRecord
-                __cuEventRecord = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventRecord_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuEventRecordWithFlags' in found_functions}}
-            try:
-                global __cuEventRecordWithFlags
-                __cuEventRecordWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventRecordWithFlags_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-            try:
-                global __cuSignalExternalSemaphoresAsync
-                __cuSignalExternalSemaphoresAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSignalExternalSemaphoresAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-            try:
-                global __cuWaitExternalSemaphoresAsync
-                __cuWaitExternalSemaphoresAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuWaitExternalSemaphoresAsync_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitValue32_v2' in found_functions}}
-            try:
-                global __cuStreamWaitValue32_v2
-                __cuStreamWaitValue32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitValue32_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitValue64_v2' in found_functions}}
-            try:
-                global __cuStreamWaitValue64_v2
-                __cuStreamWaitValue64_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitValue64_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWriteValue32_v2' in found_functions}}
-            try:
-                global __cuStreamWriteValue32_v2
-                __cuStreamWriteValue32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWriteValue32_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWriteValue64_v2' in found_functions}}
-            try:
-                global __cuStreamWriteValue64_v2
-                __cuStreamWriteValue64_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWriteValue64_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-            try:
-                global __cuStreamBatchMemOp_v2
-                __cuStreamBatchMemOp_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBatchMemOp_v2_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchKernel' in found_functions}}
-            try:
-                global __cuLaunchKernel
-                __cuLaunchKernel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchKernel_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchKernelEx' in found_functions}}
-            try:
-                global __cuLaunchKernelEx
-                __cuLaunchKernelEx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchKernelEx_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchCooperativeKernel' in found_functions}}
-            try:
-                global __cuLaunchCooperativeKernel
-                __cuLaunchCooperativeKernel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchCooperativeKernel_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchHostFunc' in found_functions}}
-            try:
-                global __cuLaunchHostFunc
-                __cuLaunchHostFunc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchHostFunc_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphInstantiateWithParams' in found_functions}}
-            try:
-                global __cuGraphInstantiateWithParams
-                __cuGraphInstantiateWithParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphInstantiateWithParams_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphUpload' in found_functions}}
-            try:
-                global __cuGraphUpload
-                __cuGraphUpload = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphUpload_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphLaunch' in found_functions}}
-            try:
-                global __cuGraphLaunch
-                __cuGraphLaunch = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphLaunch_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphicsMapResources' in found_functions}}
-            try:
-                global __cuGraphicsMapResources
-                __cuGraphicsMapResources = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsMapResources_ptsz')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphicsUnmapResources' in found_functions}}
-            try:
-                global __cuGraphicsUnmapResources
-                __cuGraphicsUnmapResources = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsUnmapResources_ptsz')
-            except:
-                pass
-            {{endif}}
-        else:
-            # Else get the regular version
-            pass
-            {{if 'cuMemcpy' in found_functions}}
-            try:
-                global __cuMemcpy
-                __cuMemcpy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyPeer' in found_functions}}
-            try:
-                global __cuMemcpyPeer
-                __cuMemcpyPeer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyPeer')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoD_v2
-                __cuMemcpyHtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoD_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoH_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoH_v2
-                __cuMemcpyDtoH_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoH_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoD_v2
-                __cuMemcpyDtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoD_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoA_v2
-                __cuMemcpyDtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoA_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoD_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoD_v2
-                __cuMemcpyAtoD_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoD_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoA_v2
-                __cuMemcpyHtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoA_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoH_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoH_v2
-                __cuMemcpyAtoH_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoH_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoA_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoA_v2
-                __cuMemcpyAtoA_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoA_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2D_v2' in found_functions}}
-            try:
-                global __cuMemcpy2D_v2
-                __cuMemcpy2D_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2D_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-            try:
-                global __cuMemcpy2DUnaligned_v2
-                __cuMemcpy2DUnaligned_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2DUnaligned_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3D_v2' in found_functions}}
-            try:
-                global __cuMemcpy3D_v2
-                __cuMemcpy3D_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3D_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DPeer' in found_functions}}
-            try:
-                global __cuMemcpy3DPeer
-                __cuMemcpy3DPeer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DPeer')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAsync' in found_functions}}
-            try:
-                global __cuMemcpyAsync
-                __cuMemcpyAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyPeerAsync' in found_functions}}
-            try:
-                global __cuMemcpyPeerAsync
-                __cuMemcpyPeerAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyPeerAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoDAsync_v2
-                __cuMemcpyHtoDAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoDAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoHAsync_v2
-                __cuMemcpyDtoHAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoHAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyDtoDAsync_v2
-                __cuMemcpyDtoDAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyDtoDAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyHtoAAsync_v2
-                __cuMemcpyHtoAAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyHtoAAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpyAtoHAsync_v2
-                __cuMemcpyAtoHAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpyAtoHAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpy2DAsync_v2
-                __cuMemcpy2DAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy2DAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-            try:
-                global __cuMemcpy3DAsync_v2
-                __cuMemcpy3DAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-            try:
-                global __cuMemcpy3DPeerAsync
-                __cuMemcpy3DPeerAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemcpy3DPeerAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD8_v2' in found_functions}}
-            try:
-                global __cuMemsetD8_v2
-                __cuMemsetD8_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD8_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD16_v2' in found_functions}}
-            try:
-                global __cuMemsetD16_v2
-                __cuMemsetD16_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD16_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD32_v2' in found_functions}}
-            try:
-                global __cuMemsetD32_v2
-                __cuMemsetD32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD32_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D8_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D8_v2
-                __cuMemsetD2D8_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D8_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D16_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D16_v2
-                __cuMemsetD2D16_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D16_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D32_v2' in found_functions}}
-            try:
-                global __cuMemsetD2D32_v2
-                __cuMemsetD2D32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D32_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD8Async' in found_functions}}
-            try:
-                global __cuMemsetD8Async
-                __cuMemsetD8Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD8Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD16Async' in found_functions}}
-            try:
-                global __cuMemsetD16Async
-                __cuMemsetD16Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD16Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD32Async' in found_functions}}
-            try:
-                global __cuMemsetD32Async
-                __cuMemsetD32Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD32Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D8Async' in found_functions}}
-            try:
-                global __cuMemsetD2D8Async
-                __cuMemsetD2D8Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D8Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D16Async' in found_functions}}
-            try:
-                global __cuMemsetD2D16Async
-                __cuMemsetD2D16Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D16Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemsetD2D32Async' in found_functions}}
-            try:
-                global __cuMemsetD2D32Async
-                __cuMemsetD2D32Async = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemsetD2D32Async')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemMapArrayAsync' in found_functions}}
-            try:
-                global __cuMemMapArrayAsync
-                __cuMemMapArrayAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemMapArrayAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemFreeAsync' in found_functions}}
-            try:
-                global __cuMemFreeAsync
-                __cuMemFreeAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemFreeAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemAllocAsync' in found_functions}}
-            try:
-                global __cuMemAllocAsync
-                __cuMemAllocAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-            try:
-                global __cuMemAllocFromPoolAsync
-                __cuMemAllocFromPoolAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocFromPoolAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemPrefetchAsync' in found_functions}}
-            try:
-                global __cuMemPrefetchAsync
-                __cuMemPrefetchAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPrefetchAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-            try:
-                global __cuMemPrefetchAsync_v2
-                __cuMemPrefetchAsync_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPrefetchAsync_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetPriority' in found_functions}}
-            try:
-                global __cuStreamGetPriority
-                __cuStreamGetPriority = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetPriority')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetFlags' in found_functions}}
-            try:
-                global __cuStreamGetFlags
-                __cuStreamGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetFlags')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetId' in found_functions}}
-            try:
-                global __cuStreamGetId
-                __cuStreamGetId = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetId')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCtx' in found_functions}}
-            try:
-                global __cuStreamGetCtx
-                __cuStreamGetCtx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCtx')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCtx_v2' in found_functions}}
-            try:
-                global __cuStreamGetCtx_v2
-                __cuStreamGetCtx_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCtx_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitEvent' in found_functions}}
-            try:
-                global __cuStreamWaitEvent
-                __cuStreamWaitEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitEvent')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamAddCallback' in found_functions}}
-            try:
-                global __cuStreamAddCallback
-                __cuStreamAddCallback = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamAddCallback')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBeginCapture_v2' in found_functions}}
-            try:
-                global __cuStreamBeginCapture_v2
-                __cuStreamBeginCapture_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBeginCapture_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-            try:
-                global __cuStreamBeginCaptureToGraph
-                __cuStreamBeginCaptureToGraph = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBeginCaptureToGraph')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamEndCapture' in found_functions}}
-            try:
-                global __cuStreamEndCapture
-                __cuStreamEndCapture = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamEndCapture')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamIsCapturing' in found_functions}}
-            try:
-                global __cuStreamIsCapturing
-                __cuStreamIsCapturing = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamIsCapturing')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-            try:
-                global __cuStreamGetCaptureInfo_v2
-                __cuStreamGetCaptureInfo_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCaptureInfo_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-            try:
-                global __cuStreamGetCaptureInfo_v3
-                __cuStreamGetCaptureInfo_v3 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetCaptureInfo_v3')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-            try:
-                global __cuStreamUpdateCaptureDependencies
-                __cuStreamUpdateCaptureDependencies = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamUpdateCaptureDependencies')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-            try:
-                global __cuStreamUpdateCaptureDependencies_v2
-                __cuStreamUpdateCaptureDependencies_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamUpdateCaptureDependencies_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamAttachMemAsync' in found_functions}}
-            try:
-                global __cuStreamAttachMemAsync
-                __cuStreamAttachMemAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamAttachMemAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamQuery' in found_functions}}
-            try:
-                global __cuStreamQuery
-                __cuStreamQuery = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamQuery')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamSynchronize' in found_functions}}
-            try:
-                global __cuStreamSynchronize
-                __cuStreamSynchronize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamSynchronize')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamCopyAttributes' in found_functions}}
-            try:
-                global __cuStreamCopyAttributes
-                __cuStreamCopyAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamCopyAttributes')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamGetAttribute' in found_functions}}
-            try:
-                global __cuStreamGetAttribute
-                __cuStreamGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetAttribute')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamSetAttribute' in found_functions}}
-            try:
-                global __cuStreamSetAttribute
-                __cuStreamSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamSetAttribute')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuEventRecord' in found_functions}}
-            try:
-                global __cuEventRecord
-                __cuEventRecord = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventRecord')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuEventRecordWithFlags' in found_functions}}
-            try:
-                global __cuEventRecordWithFlags
-                __cuEventRecordWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventRecordWithFlags')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-            try:
-                global __cuSignalExternalSemaphoresAsync
-                __cuSignalExternalSemaphoresAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSignalExternalSemaphoresAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-            try:
-                global __cuWaitExternalSemaphoresAsync
-                __cuWaitExternalSemaphoresAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuWaitExternalSemaphoresAsync')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitValue32_v2' in found_functions}}
-            try:
-                global __cuStreamWaitValue32_v2
-                __cuStreamWaitValue32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitValue32_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWaitValue64_v2' in found_functions}}
-            try:
-                global __cuStreamWaitValue64_v2
-                __cuStreamWaitValue64_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWaitValue64_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWriteValue32_v2' in found_functions}}
-            try:
-                global __cuStreamWriteValue32_v2
-                __cuStreamWriteValue32_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWriteValue32_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamWriteValue64_v2' in found_functions}}
-            try:
-                global __cuStreamWriteValue64_v2
-                __cuStreamWriteValue64_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamWriteValue64_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-            try:
-                global __cuStreamBatchMemOp_v2
-                __cuStreamBatchMemOp_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamBatchMemOp_v2')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchKernel' in found_functions}}
-            try:
-                global __cuLaunchKernel
-                __cuLaunchKernel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchKernel')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchKernelEx' in found_functions}}
-            try:
-                global __cuLaunchKernelEx
-                __cuLaunchKernelEx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchKernelEx')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchCooperativeKernel' in found_functions}}
-            try:
-                global __cuLaunchCooperativeKernel
-                __cuLaunchCooperativeKernel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchCooperativeKernel')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuLaunchHostFunc' in found_functions}}
-            try:
-                global __cuLaunchHostFunc
-                __cuLaunchHostFunc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchHostFunc')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphInstantiateWithParams' in found_functions}}
-            try:
-                global __cuGraphInstantiateWithParams
-                __cuGraphInstantiateWithParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphInstantiateWithParams')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphUpload' in found_functions}}
-            try:
-                global __cuGraphUpload
-                __cuGraphUpload = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphUpload')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphLaunch' in found_functions}}
-            try:
-                global __cuGraphLaunch
-                __cuGraphLaunch = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphLaunch')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphicsMapResources' in found_functions}}
-            try:
-                global __cuGraphicsMapResources
-                __cuGraphicsMapResources = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsMapResources')
-            except:
-                pass
-            {{endif}}
-            {{if 'cuGraphicsUnmapResources' in found_functions}}
-            try:
-                global __cuGraphicsUnmapResources
-                __cuGraphicsUnmapResources = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsUnmapResources')
-            except:
-                pass
-            {{endif}}
-        # Get remaining functions
-        {{if 'cuGetErrorString' in found_functions}}
-        try:
-            global __cuGetErrorString
-            __cuGetErrorString = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGetErrorString')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGetErrorName' in found_functions}}
-        try:
-            global __cuGetErrorName
-            __cuGetErrorName = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGetErrorName')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuInit' in found_functions}}
-        try:
-            global __cuInit
-            __cuInit = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuInit')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDriverGetVersion' in found_functions}}
-        try:
-            global __cuDriverGetVersion
-            __cuDriverGetVersion = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDriverGetVersion')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGet' in found_functions}}
-        try:
-            global __cuDeviceGet
-            __cuDeviceGet = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGet')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetCount' in found_functions}}
-        try:
-            global __cuDeviceGetCount
-            __cuDeviceGetCount = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetCount')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetName' in found_functions}}
-        try:
-            global __cuDeviceGetName
-            __cuDeviceGetName = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetName')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetUuid' in found_functions}}
-        try:
-            global __cuDeviceGetUuid
-            __cuDeviceGetUuid = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetUuid')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetUuid_v2' in found_functions}}
-        try:
-            global __cuDeviceGetUuid_v2
-            __cuDeviceGetUuid_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetUuid_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetLuid' in found_functions}}
-        try:
-            global __cuDeviceGetLuid
-            __cuDeviceGetLuid = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetLuid')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceTotalMem_v2' in found_functions}}
-        try:
-            global __cuDeviceTotalMem_v2
-            __cuDeviceTotalMem_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceTotalMem_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-        try:
-            global __cuDeviceGetTexture1DLinearMaxWidth
-            __cuDeviceGetTexture1DLinearMaxWidth = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetTexture1DLinearMaxWidth')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetAttribute' in found_functions}}
-        try:
-            global __cuDeviceGetAttribute
-            __cuDeviceGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-        try:
-            global __cuDeviceGetNvSciSyncAttributes
-            __cuDeviceGetNvSciSyncAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetNvSciSyncAttributes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceSetMemPool' in found_functions}}
-        try:
-            global __cuDeviceSetMemPool
-            __cuDeviceSetMemPool = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceSetMemPool')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetMemPool' in found_functions}}
-        try:
-            global __cuDeviceGetMemPool
-            __cuDeviceGetMemPool = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetMemPool')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-        try:
-            global __cuDeviceGetDefaultMemPool
-            __cuDeviceGetDefaultMemPool = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetDefaultMemPool')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-        try:
-            global __cuDeviceGetExecAffinitySupport
-            __cuDeviceGetExecAffinitySupport = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetExecAffinitySupport')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-        try:
-            global __cuFlushGPUDirectRDMAWrites
-            __cuFlushGPUDirectRDMAWrites = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFlushGPUDirectRDMAWrites')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetProperties' in found_functions}}
-        try:
-            global __cuDeviceGetProperties
-            __cuDeviceGetProperties = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetProperties')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceComputeCapability' in found_functions}}
-        try:
-            global __cuDeviceComputeCapability
-            __cuDeviceComputeCapability = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceComputeCapability')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-        try:
-            global __cuDevicePrimaryCtxRetain
-            __cuDevicePrimaryCtxRetain = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevicePrimaryCtxRetain')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-        try:
-            global __cuDevicePrimaryCtxRelease_v2
-            __cuDevicePrimaryCtxRelease_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevicePrimaryCtxRelease_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-        try:
-            global __cuDevicePrimaryCtxSetFlags_v2
-            __cuDevicePrimaryCtxSetFlags_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevicePrimaryCtxSetFlags_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-        try:
-            global __cuDevicePrimaryCtxGetState
-            __cuDevicePrimaryCtxGetState = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevicePrimaryCtxGetState')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-        try:
-            global __cuDevicePrimaryCtxReset_v2
-            __cuDevicePrimaryCtxReset_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevicePrimaryCtxReset_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxCreate_v2' in found_functions}}
-        try:
-            global __cuCtxCreate_v2
-            __cuCtxCreate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxCreate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxCreate_v3' in found_functions}}
-        try:
-            global __cuCtxCreate_v3
-            __cuCtxCreate_v3 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxCreate_v3')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxCreate_v4' in found_functions}}
-        try:
-            global __cuCtxCreate_v4
-            __cuCtxCreate_v4 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxCreate_v4')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxDestroy_v2' in found_functions}}
-        try:
-            global __cuCtxDestroy_v2
-            __cuCtxDestroy_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxDestroy_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxPushCurrent_v2' in found_functions}}
-        try:
-            global __cuCtxPushCurrent_v2
-            __cuCtxPushCurrent_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxPushCurrent_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxPopCurrent_v2' in found_functions}}
-        try:
-            global __cuCtxPopCurrent_v2
-            __cuCtxPopCurrent_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxPopCurrent_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSetCurrent' in found_functions}}
-        try:
-            global __cuCtxSetCurrent
-            __cuCtxSetCurrent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSetCurrent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetCurrent' in found_functions}}
-        try:
-            global __cuCtxGetCurrent
-            __cuCtxGetCurrent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetCurrent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetDevice' in found_functions}}
-        try:
-            global __cuCtxGetDevice
-            __cuCtxGetDevice = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetDevice')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetFlags' in found_functions}}
-        try:
-            global __cuCtxGetFlags
-            __cuCtxGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSetFlags' in found_functions}}
-        try:
-            global __cuCtxSetFlags
-            __cuCtxSetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetId' in found_functions}}
-        try:
-            global __cuCtxGetId
-            __cuCtxGetId = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetId')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSynchronize' in found_functions}}
-        try:
-            global __cuCtxSynchronize
-            __cuCtxSynchronize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSynchronize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSetLimit' in found_functions}}
-        try:
-            global __cuCtxSetLimit
-            __cuCtxSetLimit = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSetLimit')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetLimit' in found_functions}}
-        try:
-            global __cuCtxGetLimit
-            __cuCtxGetLimit = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetLimit')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetCacheConfig' in found_functions}}
-        try:
-            global __cuCtxGetCacheConfig
-            __cuCtxGetCacheConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetCacheConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSetCacheConfig' in found_functions}}
-        try:
-            global __cuCtxSetCacheConfig
-            __cuCtxSetCacheConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSetCacheConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetApiVersion' in found_functions}}
-        try:
-            global __cuCtxGetApiVersion
-            __cuCtxGetApiVersion = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetApiVersion')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-        try:
-            global __cuCtxGetStreamPriorityRange
-            __cuCtxGetStreamPriorityRange = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetStreamPriorityRange')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-        try:
-            global __cuCtxResetPersistingL2Cache
-            __cuCtxResetPersistingL2Cache = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxResetPersistingL2Cache')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetExecAffinity' in found_functions}}
-        try:
-            global __cuCtxGetExecAffinity
-            __cuCtxGetExecAffinity = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetExecAffinity')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxRecordEvent' in found_functions}}
-        try:
-            global __cuCtxRecordEvent
-            __cuCtxRecordEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxRecordEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxWaitEvent' in found_functions}}
-        try:
-            global __cuCtxWaitEvent
-            __cuCtxWaitEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxWaitEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxAttach' in found_functions}}
-        try:
-            global __cuCtxAttach
-            __cuCtxAttach = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxAttach')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxDetach' in found_functions}}
-        try:
-            global __cuCtxDetach
-            __cuCtxDetach = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxDetach')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetSharedMemConfig' in found_functions}}
-        try:
-            global __cuCtxGetSharedMemConfig
-            __cuCtxGetSharedMemConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetSharedMemConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxSetSharedMemConfig' in found_functions}}
-        try:
-            global __cuCtxSetSharedMemConfig
-            __cuCtxSetSharedMemConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxSetSharedMemConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleLoad' in found_functions}}
-        try:
-            global __cuModuleLoad
-            __cuModuleLoad = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleLoad')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleLoadData' in found_functions}}
-        try:
-            global __cuModuleLoadData
-            __cuModuleLoadData = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleLoadData')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleLoadDataEx' in found_functions}}
-        try:
-            global __cuModuleLoadDataEx
-            __cuModuleLoadDataEx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleLoadDataEx')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleLoadFatBinary' in found_functions}}
-        try:
-            global __cuModuleLoadFatBinary
-            __cuModuleLoadFatBinary = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleLoadFatBinary')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleUnload' in found_functions}}
-        try:
-            global __cuModuleUnload
-            __cuModuleUnload = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleUnload')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetLoadingMode' in found_functions}}
-        try:
-            global __cuModuleGetLoadingMode
-            __cuModuleGetLoadingMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetLoadingMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetFunction' in found_functions}}
-        try:
-            global __cuModuleGetFunction
-            __cuModuleGetFunction = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetFunction')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetFunctionCount' in found_functions}}
-        try:
-            global __cuModuleGetFunctionCount
-            __cuModuleGetFunctionCount = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetFunctionCount')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleEnumerateFunctions' in found_functions}}
-        try:
-            global __cuModuleEnumerateFunctions
-            __cuModuleEnumerateFunctions = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleEnumerateFunctions')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetGlobal_v2' in found_functions}}
-        try:
-            global __cuModuleGetGlobal_v2
-            __cuModuleGetGlobal_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetGlobal_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLinkCreate_v2' in found_functions}}
-        try:
-            global __cuLinkCreate_v2
-            __cuLinkCreate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLinkCreate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLinkAddData_v2' in found_functions}}
-        try:
-            global __cuLinkAddData_v2
-            __cuLinkAddData_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLinkAddData_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLinkAddFile_v2' in found_functions}}
-        try:
-            global __cuLinkAddFile_v2
-            __cuLinkAddFile_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLinkAddFile_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLinkComplete' in found_functions}}
-        try:
-            global __cuLinkComplete
-            __cuLinkComplete = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLinkComplete')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLinkDestroy' in found_functions}}
-        try:
-            global __cuLinkDestroy
-            __cuLinkDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLinkDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetTexRef' in found_functions}}
-        try:
-            global __cuModuleGetTexRef
-            __cuModuleGetTexRef = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetTexRef')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuModuleGetSurfRef' in found_functions}}
-        try:
-            global __cuModuleGetSurfRef
-            __cuModuleGetSurfRef = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuModuleGetSurfRef')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryLoadData' in found_functions}}
-        try:
-            global __cuLibraryLoadData
-            __cuLibraryLoadData = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryLoadData')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryLoadFromFile' in found_functions}}
-        try:
-            global __cuLibraryLoadFromFile
-            __cuLibraryLoadFromFile = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryLoadFromFile')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryUnload' in found_functions}}
-        try:
-            global __cuLibraryUnload
-            __cuLibraryUnload = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryUnload')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetKernel' in found_functions}}
-        try:
-            global __cuLibraryGetKernel
-            __cuLibraryGetKernel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetKernel')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetKernelCount' in found_functions}}
-        try:
-            global __cuLibraryGetKernelCount
-            __cuLibraryGetKernelCount = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetKernelCount')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryEnumerateKernels' in found_functions}}
-        try:
-            global __cuLibraryEnumerateKernels
-            __cuLibraryEnumerateKernels = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryEnumerateKernels')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetModule' in found_functions}}
-        try:
-            global __cuLibraryGetModule
-            __cuLibraryGetModule = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetModule')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelGetFunction' in found_functions}}
-        try:
-            global __cuKernelGetFunction
-            __cuKernelGetFunction = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelGetFunction')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelGetLibrary' in found_functions}}
-        try:
-            global __cuKernelGetLibrary
-            __cuKernelGetLibrary = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelGetLibrary')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetGlobal' in found_functions}}
-        try:
-            global __cuLibraryGetGlobal
-            __cuLibraryGetGlobal = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetGlobal')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetManaged' in found_functions}}
-        try:
-            global __cuLibraryGetManaged
-            __cuLibraryGetManaged = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetManaged')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-        try:
-            global __cuLibraryGetUnifiedFunction
-            __cuLibraryGetUnifiedFunction = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLibraryGetUnifiedFunction')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelGetAttribute' in found_functions}}
-        try:
-            global __cuKernelGetAttribute
-            __cuKernelGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelSetAttribute' in found_functions}}
-        try:
-            global __cuKernelSetAttribute
-            __cuKernelSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelSetCacheConfig' in found_functions}}
-        try:
-            global __cuKernelSetCacheConfig
-            __cuKernelSetCacheConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelSetCacheConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelGetName' in found_functions}}
-        try:
-            global __cuKernelGetName
-            __cuKernelGetName = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelGetName')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuKernelGetParamInfo' in found_functions}}
-        try:
-            global __cuKernelGetParamInfo
-            __cuKernelGetParamInfo = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuKernelGetParamInfo')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetInfo_v2' in found_functions}}
-        try:
-            global __cuMemGetInfo_v2
-            __cuMemGetInfo_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetInfo_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAlloc_v2' in found_functions}}
-        try:
-            global __cuMemAlloc_v2
-            __cuMemAlloc_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAlloc_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAllocPitch_v2' in found_functions}}
-        try:
-            global __cuMemAllocPitch_v2
-            __cuMemAllocPitch_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocPitch_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemFree_v2' in found_functions}}
-        try:
-            global __cuMemFree_v2
-            __cuMemFree_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemFree_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetAddressRange_v2' in found_functions}}
-        try:
-            global __cuMemGetAddressRange_v2
-            __cuMemGetAddressRange_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetAddressRange_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAllocHost_v2' in found_functions}}
-        try:
-            global __cuMemAllocHost_v2
-            __cuMemAllocHost_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocHost_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemFreeHost' in found_functions}}
-        try:
-            global __cuMemFreeHost
-            __cuMemFreeHost = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemFreeHost')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemHostAlloc' in found_functions}}
-        try:
-            global __cuMemHostAlloc
-            __cuMemHostAlloc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemHostAlloc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-        try:
-            global __cuMemHostGetDevicePointer_v2
-            __cuMemHostGetDevicePointer_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemHostGetDevicePointer_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemHostGetFlags' in found_functions}}
-        try:
-            global __cuMemHostGetFlags
-            __cuMemHostGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemHostGetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAllocManaged' in found_functions}}
-        try:
-            global __cuMemAllocManaged
-            __cuMemAllocManaged = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAllocManaged')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-        try:
-            global __cuDeviceRegisterAsyncNotification
-            __cuDeviceRegisterAsyncNotification = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceRegisterAsyncNotification')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-        try:
-            global __cuDeviceUnregisterAsyncNotification
-            __cuDeviceUnregisterAsyncNotification = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceUnregisterAsyncNotification')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetByPCIBusId' in found_functions}}
-        try:
-            global __cuDeviceGetByPCIBusId
-            __cuDeviceGetByPCIBusId = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetByPCIBusId')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetPCIBusId' in found_functions}}
-        try:
-            global __cuDeviceGetPCIBusId
-            __cuDeviceGetPCIBusId = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetPCIBusId')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuIpcGetEventHandle' in found_functions}}
-        try:
-            global __cuIpcGetEventHandle
-            __cuIpcGetEventHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuIpcGetEventHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuIpcOpenEventHandle' in found_functions}}
-        try:
-            global __cuIpcOpenEventHandle
-            __cuIpcOpenEventHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuIpcOpenEventHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuIpcGetMemHandle' in found_functions}}
-        try:
-            global __cuIpcGetMemHandle
-            __cuIpcGetMemHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuIpcGetMemHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-        try:
-            global __cuIpcOpenMemHandle_v2
-            __cuIpcOpenMemHandle_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuIpcOpenMemHandle_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuIpcCloseMemHandle' in found_functions}}
-        try:
-            global __cuIpcCloseMemHandle
-            __cuIpcCloseMemHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuIpcCloseMemHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemHostRegister_v2' in found_functions}}
-        try:
-            global __cuMemHostRegister_v2
-            __cuMemHostRegister_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemHostRegister_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemHostUnregister' in found_functions}}
-        try:
-            global __cuMemHostUnregister
-            __cuMemHostUnregister = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemHostUnregister')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayCreate_v2' in found_functions}}
-        try:
-            global __cuArrayCreate_v2
-            __cuArrayCreate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayCreate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayGetDescriptor_v2' in found_functions}}
-        try:
-            global __cuArrayGetDescriptor_v2
-            __cuArrayGetDescriptor_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayGetDescriptor_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayGetSparseProperties' in found_functions}}
-        try:
-            global __cuArrayGetSparseProperties
-            __cuArrayGetSparseProperties = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayGetSparseProperties')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-        try:
-            global __cuMipmappedArrayGetSparseProperties
-            __cuMipmappedArrayGetSparseProperties = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMipmappedArrayGetSparseProperties')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayGetMemoryRequirements' in found_functions}}
-        try:
-            global __cuArrayGetMemoryRequirements
-            __cuArrayGetMemoryRequirements = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayGetMemoryRequirements')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-        try:
-            global __cuMipmappedArrayGetMemoryRequirements
-            __cuMipmappedArrayGetMemoryRequirements = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMipmappedArrayGetMemoryRequirements')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayGetPlane' in found_functions}}
-        try:
-            global __cuArrayGetPlane
-            __cuArrayGetPlane = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayGetPlane')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArrayDestroy' in found_functions}}
-        try:
-            global __cuArrayDestroy
-            __cuArrayDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArrayDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArray3DCreate_v2' in found_functions}}
-        try:
-            global __cuArray3DCreate_v2
-            __cuArray3DCreate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArray3DCreate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-        try:
-            global __cuArray3DGetDescriptor_v2
-            __cuArray3DGetDescriptor_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuArray3DGetDescriptor_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMipmappedArrayCreate' in found_functions}}
-        try:
-            global __cuMipmappedArrayCreate
-            __cuMipmappedArrayCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMipmappedArrayCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMipmappedArrayGetLevel' in found_functions}}
-        try:
-            global __cuMipmappedArrayGetLevel
-            __cuMipmappedArrayGetLevel = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMipmappedArrayGetLevel')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMipmappedArrayDestroy' in found_functions}}
-        try:
-            global __cuMipmappedArrayDestroy
-            __cuMipmappedArrayDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMipmappedArrayDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetHandleForAddressRange' in found_functions}}
-        try:
-            global __cuMemGetHandleForAddressRange
-            __cuMemGetHandleForAddressRange = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetHandleForAddressRange')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAddressReserve' in found_functions}}
-        try:
-            global __cuMemAddressReserve
-            __cuMemAddressReserve = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAddressReserve')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAddressFree' in found_functions}}
-        try:
-            global __cuMemAddressFree
-            __cuMemAddressFree = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAddressFree')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemCreate' in found_functions}}
-        try:
-            global __cuMemCreate
-            __cuMemCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemRelease' in found_functions}}
-        try:
-            global __cuMemRelease
-            __cuMemRelease = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemRelease')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemMap' in found_functions}}
-        try:
-            global __cuMemMap
-            __cuMemMap = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemMap')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemUnmap' in found_functions}}
-        try:
-            global __cuMemUnmap
-            __cuMemUnmap = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemUnmap')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemSetAccess' in found_functions}}
-        try:
-            global __cuMemSetAccess
-            __cuMemSetAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemSetAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetAccess' in found_functions}}
-        try:
-            global __cuMemGetAccess
-            __cuMemGetAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemExportToShareableHandle' in found_functions}}
-        try:
-            global __cuMemExportToShareableHandle
-            __cuMemExportToShareableHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemExportToShareableHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemImportFromShareableHandle' in found_functions}}
-        try:
-            global __cuMemImportFromShareableHandle
-            __cuMemImportFromShareableHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemImportFromShareableHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetAllocationGranularity' in found_functions}}
-        try:
-            global __cuMemGetAllocationGranularity
-            __cuMemGetAllocationGranularity = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetAllocationGranularity')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-        try:
-            global __cuMemGetAllocationPropertiesFromHandle
-            __cuMemGetAllocationPropertiesFromHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemGetAllocationPropertiesFromHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemRetainAllocationHandle' in found_functions}}
-        try:
-            global __cuMemRetainAllocationHandle
-            __cuMemRetainAllocationHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemRetainAllocationHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolTrimTo' in found_functions}}
-        try:
-            global __cuMemPoolTrimTo
-            __cuMemPoolTrimTo = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolTrimTo')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolSetAttribute' in found_functions}}
-        try:
-            global __cuMemPoolSetAttribute
-            __cuMemPoolSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolGetAttribute' in found_functions}}
-        try:
-            global __cuMemPoolGetAttribute
-            __cuMemPoolGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolSetAccess' in found_functions}}
-        try:
-            global __cuMemPoolSetAccess
-            __cuMemPoolSetAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolSetAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolGetAccess' in found_functions}}
-        try:
-            global __cuMemPoolGetAccess
-            __cuMemPoolGetAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolGetAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolCreate' in found_functions}}
-        try:
-            global __cuMemPoolCreate
-            __cuMemPoolCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolDestroy' in found_functions}}
-        try:
-            global __cuMemPoolDestroy
-            __cuMemPoolDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-        try:
-            global __cuMemPoolExportToShareableHandle
-            __cuMemPoolExportToShareableHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolExportToShareableHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-        try:
-            global __cuMemPoolImportFromShareableHandle
-            __cuMemPoolImportFromShareableHandle = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolImportFromShareableHandle')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolExportPointer' in found_functions}}
-        try:
-            global __cuMemPoolExportPointer
-            __cuMemPoolExportPointer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolExportPointer')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemPoolImportPointer' in found_functions}}
-        try:
-            global __cuMemPoolImportPointer
-            __cuMemPoolImportPointer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemPoolImportPointer')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastCreate' in found_functions}}
-        try:
-            global __cuMulticastCreate
-            __cuMulticastCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastAddDevice' in found_functions}}
-        try:
-            global __cuMulticastAddDevice
-            __cuMulticastAddDevice = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastAddDevice')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastBindMem' in found_functions}}
-        try:
-            global __cuMulticastBindMem
-            __cuMulticastBindMem = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastBindMem')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastBindAddr' in found_functions}}
-        try:
-            global __cuMulticastBindAddr
-            __cuMulticastBindAddr = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastBindAddr')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastUnbind' in found_functions}}
-        try:
-            global __cuMulticastUnbind
-            __cuMulticastUnbind = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastUnbind')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMulticastGetGranularity' in found_functions}}
-        try:
-            global __cuMulticastGetGranularity
-            __cuMulticastGetGranularity = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMulticastGetGranularity')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuPointerGetAttribute' in found_functions}}
-        try:
-            global __cuPointerGetAttribute
-            __cuPointerGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuPointerGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAdvise' in found_functions}}
-        try:
-            global __cuMemAdvise
-            __cuMemAdvise = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAdvise')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemAdvise_v2' in found_functions}}
-        try:
-            global __cuMemAdvise_v2
-            __cuMemAdvise_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemAdvise_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemRangeGetAttribute' in found_functions}}
-        try:
-            global __cuMemRangeGetAttribute
-            __cuMemRangeGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemRangeGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuMemRangeGetAttributes' in found_functions}}
-        try:
-            global __cuMemRangeGetAttributes
-            __cuMemRangeGetAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuMemRangeGetAttributes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuPointerSetAttribute' in found_functions}}
-        try:
-            global __cuPointerSetAttribute
-            __cuPointerSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuPointerSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuPointerGetAttributes' in found_functions}}
-        try:
-            global __cuPointerGetAttributes
-            __cuPointerGetAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuPointerGetAttributes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuStreamCreate' in found_functions}}
-        try:
-            global __cuStreamCreate
-            __cuStreamCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuStreamCreateWithPriority' in found_functions}}
-        try:
-            global __cuStreamCreateWithPriority
-            __cuStreamCreateWithPriority = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamCreateWithPriority')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-        try:
-            global __cuThreadExchangeStreamCaptureMode
-            __cuThreadExchangeStreamCaptureMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuThreadExchangeStreamCaptureMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuStreamDestroy_v2' in found_functions}}
-        try:
-            global __cuStreamDestroy_v2
-            __cuStreamDestroy_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamDestroy_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuEventCreate' in found_functions}}
-        try:
-            global __cuEventCreate
-            __cuEventCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuEventQuery' in found_functions}}
-        try:
-            global __cuEventQuery
-            __cuEventQuery = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventQuery')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuEventSynchronize' in found_functions}}
-        try:
-            global __cuEventSynchronize
-            __cuEventSynchronize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventSynchronize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuEventDestroy_v2' in found_functions}}
-        try:
-            global __cuEventDestroy_v2
-            __cuEventDestroy_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventDestroy_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuEventElapsedTime' in found_functions}}
-        try:
-            global __cuEventElapsedTime
-            __cuEventElapsedTime = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventElapsedTime')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuImportExternalMemory' in found_functions}}
-        try:
-            global __cuImportExternalMemory
-            __cuImportExternalMemory = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuImportExternalMemory')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-        try:
-            global __cuExternalMemoryGetMappedBuffer
-            __cuExternalMemoryGetMappedBuffer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuExternalMemoryGetMappedBuffer')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-        try:
-            global __cuExternalMemoryGetMappedMipmappedArray
-            __cuExternalMemoryGetMappedMipmappedArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuExternalMemoryGetMappedMipmappedArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDestroyExternalMemory' in found_functions}}
-        try:
-            global __cuDestroyExternalMemory
-            __cuDestroyExternalMemory = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDestroyExternalMemory')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuImportExternalSemaphore' in found_functions}}
-        try:
-            global __cuImportExternalSemaphore
-            __cuImportExternalSemaphore = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuImportExternalSemaphore')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDestroyExternalSemaphore' in found_functions}}
-        try:
-            global __cuDestroyExternalSemaphore
-            __cuDestroyExternalSemaphore = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDestroyExternalSemaphore')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncGetAttribute' in found_functions}}
-        try:
-            global __cuFuncGetAttribute
-            __cuFuncGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncSetAttribute' in found_functions}}
-        try:
-            global __cuFuncSetAttribute
-            __cuFuncSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncSetCacheConfig' in found_functions}}
-        try:
-            global __cuFuncSetCacheConfig
-            __cuFuncSetCacheConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncSetCacheConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncGetModule' in found_functions}}
-        try:
-            global __cuFuncGetModule
-            __cuFuncGetModule = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncGetModule')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncGetName' in found_functions}}
-        try:
-            global __cuFuncGetName
-            __cuFuncGetName = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncGetName')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncGetParamInfo' in found_functions}}
-        try:
-            global __cuFuncGetParamInfo
-            __cuFuncGetParamInfo = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncGetParamInfo')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncIsLoaded' in found_functions}}
-        try:
-            global __cuFuncIsLoaded
-            __cuFuncIsLoaded = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncIsLoaded')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncLoad' in found_functions}}
-        try:
-            global __cuFuncLoad
-            __cuFuncLoad = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncLoad')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-        try:
-            global __cuLaunchCooperativeKernelMultiDevice
-            __cuLaunchCooperativeKernelMultiDevice = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchCooperativeKernelMultiDevice')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncSetBlockShape' in found_functions}}
-        try:
-            global __cuFuncSetBlockShape
-            __cuFuncSetBlockShape = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncSetBlockShape')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncSetSharedSize' in found_functions}}
-        try:
-            global __cuFuncSetSharedSize
-            __cuFuncSetSharedSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncSetSharedSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuParamSetSize' in found_functions}}
-        try:
-            global __cuParamSetSize
-            __cuParamSetSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuParamSetSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuParamSeti' in found_functions}}
-        try:
-            global __cuParamSeti
-            __cuParamSeti = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuParamSeti')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuParamSetf' in found_functions}}
-        try:
-            global __cuParamSetf
-            __cuParamSetf = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuParamSetf')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuParamSetv' in found_functions}}
-        try:
-            global __cuParamSetv
-            __cuParamSetv = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuParamSetv')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLaunch' in found_functions}}
-        try:
-            global __cuLaunch
-            __cuLaunch = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunch')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLaunchGrid' in found_functions}}
-        try:
-            global __cuLaunchGrid
-            __cuLaunchGrid = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchGrid')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuLaunchGridAsync' in found_functions}}
-        try:
-            global __cuLaunchGridAsync
-            __cuLaunchGridAsync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuLaunchGridAsync')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuParamSetTexRef' in found_functions}}
-        try:
-            global __cuParamSetTexRef
-            __cuParamSetTexRef = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuParamSetTexRef')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuFuncSetSharedMemConfig' in found_functions}}
-        try:
-            global __cuFuncSetSharedMemConfig
-            __cuFuncSetSharedMemConfig = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuFuncSetSharedMemConfig')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphCreate' in found_functions}}
-        try:
-            global __cuGraphCreate
-            __cuGraphCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddKernelNode_v2' in found_functions}}
-        try:
-            global __cuGraphAddKernelNode_v2
-            __cuGraphAddKernelNode_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddKernelNode_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-        try:
-            global __cuGraphKernelNodeGetParams_v2
-            __cuGraphKernelNodeGetParams_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphKernelNodeGetParams_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-        try:
-            global __cuGraphKernelNodeSetParams_v2
-            __cuGraphKernelNodeSetParams_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphKernelNodeSetParams_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddMemcpyNode' in found_functions}}
-        try:
-            global __cuGraphAddMemcpyNode
-            __cuGraphAddMemcpyNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddMemcpyNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphMemcpyNodeGetParams
-            __cuGraphMemcpyNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemcpyNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphMemcpyNodeSetParams
-            __cuGraphMemcpyNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemcpyNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddMemsetNode' in found_functions}}
-        try:
-            global __cuGraphAddMemsetNode
-            __cuGraphAddMemsetNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddMemsetNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphMemsetNodeGetParams
-            __cuGraphMemsetNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemsetNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphMemsetNodeSetParams
-            __cuGraphMemsetNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemsetNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddHostNode' in found_functions}}
-        try:
-            global __cuGraphAddHostNode
-            __cuGraphAddHostNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddHostNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphHostNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphHostNodeGetParams
-            __cuGraphHostNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphHostNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphHostNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphHostNodeSetParams
-            __cuGraphHostNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphHostNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddChildGraphNode' in found_functions}}
-        try:
-            global __cuGraphAddChildGraphNode
-            __cuGraphAddChildGraphNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddChildGraphNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-        try:
-            global __cuGraphChildGraphNodeGetGraph
-            __cuGraphChildGraphNodeGetGraph = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphChildGraphNodeGetGraph')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddEmptyNode' in found_functions}}
-        try:
-            global __cuGraphAddEmptyNode
-            __cuGraphAddEmptyNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddEmptyNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddEventRecordNode' in found_functions}}
-        try:
-            global __cuGraphAddEventRecordNode
-            __cuGraphAddEventRecordNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddEventRecordNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-        try:
-            global __cuGraphEventRecordNodeGetEvent
-            __cuGraphEventRecordNodeGetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphEventRecordNodeGetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-        try:
-            global __cuGraphEventRecordNodeSetEvent
-            __cuGraphEventRecordNodeSetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphEventRecordNodeSetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddEventWaitNode' in found_functions}}
-        try:
-            global __cuGraphAddEventWaitNode
-            __cuGraphAddEventWaitNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddEventWaitNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-        try:
-            global __cuGraphEventWaitNodeGetEvent
-            __cuGraphEventWaitNodeGetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphEventWaitNodeGetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-        try:
-            global __cuGraphEventWaitNodeSetEvent
-            __cuGraphEventWaitNodeSetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphEventWaitNodeSetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-        try:
-            global __cuGraphAddExternalSemaphoresSignalNode
-            __cuGraphAddExternalSemaphoresSignalNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddExternalSemaphoresSignalNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphExternalSemaphoresSignalNodeGetParams
-            __cuGraphExternalSemaphoresSignalNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExternalSemaphoresSignalNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExternalSemaphoresSignalNodeSetParams
-            __cuGraphExternalSemaphoresSignalNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExternalSemaphoresSignalNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-        try:
-            global __cuGraphAddExternalSemaphoresWaitNode
-            __cuGraphAddExternalSemaphoresWaitNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddExternalSemaphoresWaitNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphExternalSemaphoresWaitNodeGetParams
-            __cuGraphExternalSemaphoresWaitNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExternalSemaphoresWaitNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExternalSemaphoresWaitNodeSetParams
-            __cuGraphExternalSemaphoresWaitNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExternalSemaphoresWaitNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-        try:
-            global __cuGraphAddBatchMemOpNode
-            __cuGraphAddBatchMemOpNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddBatchMemOpNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphBatchMemOpNodeGetParams
-            __cuGraphBatchMemOpNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphBatchMemOpNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphBatchMemOpNodeSetParams
-            __cuGraphBatchMemOpNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphBatchMemOpNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecBatchMemOpNodeSetParams
-            __cuGraphExecBatchMemOpNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecBatchMemOpNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddMemAllocNode' in found_functions}}
-        try:
-            global __cuGraphAddMemAllocNode
-            __cuGraphAddMemAllocNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddMemAllocNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphMemAllocNodeGetParams
-            __cuGraphMemAllocNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemAllocNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddMemFreeNode' in found_functions}}
-        try:
-            global __cuGraphAddMemFreeNode
-            __cuGraphAddMemFreeNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddMemFreeNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-        try:
-            global __cuGraphMemFreeNodeGetParams
-            __cuGraphMemFreeNodeGetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphMemFreeNodeGetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGraphMemTrim' in found_functions}}
-        try:
-            global __cuDeviceGraphMemTrim
-            __cuDeviceGraphMemTrim = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGraphMemTrim')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-        try:
-            global __cuDeviceGetGraphMemAttribute
-            __cuDeviceGetGraphMemAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetGraphMemAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-        try:
-            global __cuDeviceSetGraphMemAttribute
-            __cuDeviceSetGraphMemAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceSetGraphMemAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphClone' in found_functions}}
-        try:
-            global __cuGraphClone
-            __cuGraphClone = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphClone')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeFindInClone' in found_functions}}
-        try:
-            global __cuGraphNodeFindInClone
-            __cuGraphNodeFindInClone = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeFindInClone')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetType' in found_functions}}
-        try:
-            global __cuGraphNodeGetType
-            __cuGraphNodeGetType = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetType')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphGetNodes' in found_functions}}
-        try:
-            global __cuGraphGetNodes
-            __cuGraphGetNodes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphGetNodes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphGetRootNodes' in found_functions}}
-        try:
-            global __cuGraphGetRootNodes
-            __cuGraphGetRootNodes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphGetRootNodes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphGetEdges' in found_functions}}
-        try:
-            global __cuGraphGetEdges
-            __cuGraphGetEdges = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphGetEdges')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphGetEdges_v2' in found_functions}}
-        try:
-            global __cuGraphGetEdges_v2
-            __cuGraphGetEdges_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphGetEdges_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetDependencies' in found_functions}}
-        try:
-            global __cuGraphNodeGetDependencies
-            __cuGraphNodeGetDependencies = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetDependencies')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-        try:
-            global __cuGraphNodeGetDependencies_v2
-            __cuGraphNodeGetDependencies_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetDependencies_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-        try:
-            global __cuGraphNodeGetDependentNodes
-            __cuGraphNodeGetDependentNodes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetDependentNodes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-        try:
-            global __cuGraphNodeGetDependentNodes_v2
-            __cuGraphNodeGetDependentNodes_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetDependentNodes_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddDependencies' in found_functions}}
-        try:
-            global __cuGraphAddDependencies
-            __cuGraphAddDependencies = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddDependencies')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddDependencies_v2' in found_functions}}
-        try:
-            global __cuGraphAddDependencies_v2
-            __cuGraphAddDependencies_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddDependencies_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphRemoveDependencies' in found_functions}}
-        try:
-            global __cuGraphRemoveDependencies
-            __cuGraphRemoveDependencies = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphRemoveDependencies')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-        try:
-            global __cuGraphRemoveDependencies_v2
-            __cuGraphRemoveDependencies_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphRemoveDependencies_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphDestroyNode' in found_functions}}
-        try:
-            global __cuGraphDestroyNode
-            __cuGraphDestroyNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphDestroyNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphInstantiateWithFlags' in found_functions}}
-        try:
-            global __cuGraphInstantiateWithFlags
-            __cuGraphInstantiateWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphInstantiateWithFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecGetFlags' in found_functions}}
-        try:
-            global __cuGraphExecGetFlags
-            __cuGraphExecGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecGetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-        try:
-            global __cuGraphExecKernelNodeSetParams_v2
-            __cuGraphExecKernelNodeSetParams_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecKernelNodeSetParams_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecMemcpyNodeSetParams
-            __cuGraphExecMemcpyNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecMemcpyNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecMemsetNodeSetParams
-            __cuGraphExecMemsetNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecMemsetNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecHostNodeSetParams
-            __cuGraphExecHostNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecHostNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecChildGraphNodeSetParams
-            __cuGraphExecChildGraphNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecChildGraphNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-        try:
-            global __cuGraphExecEventRecordNodeSetEvent
-            __cuGraphExecEventRecordNodeSetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecEventRecordNodeSetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-        try:
-            global __cuGraphExecEventWaitNodeSetEvent
-            __cuGraphExecEventWaitNodeSetEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecEventWaitNodeSetEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecExternalSemaphoresSignalNodeSetParams
-            __cuGraphExecExternalSemaphoresSignalNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecExternalSemaphoresSignalNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecExternalSemaphoresWaitNodeSetParams
-            __cuGraphExecExternalSemaphoresWaitNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecExternalSemaphoresWaitNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeSetEnabled' in found_functions}}
-        try:
-            global __cuGraphNodeSetEnabled
-            __cuGraphNodeSetEnabled = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeSetEnabled')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeGetEnabled' in found_functions}}
-        try:
-            global __cuGraphNodeGetEnabled
-            __cuGraphNodeGetEnabled = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeGetEnabled')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecDestroy' in found_functions}}
-        try:
-            global __cuGraphExecDestroy
-            __cuGraphExecDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphDestroy' in found_functions}}
-        try:
-            global __cuGraphDestroy
-            __cuGraphDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecUpdate_v2' in found_functions}}
-        try:
-            global __cuGraphExecUpdate_v2
-            __cuGraphExecUpdate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecUpdate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-        try:
-            global __cuGraphKernelNodeCopyAttributes
-            __cuGraphKernelNodeCopyAttributes = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphKernelNodeCopyAttributes')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-        try:
-            global __cuGraphKernelNodeGetAttribute
-            __cuGraphKernelNodeGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphKernelNodeGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-        try:
-            global __cuGraphKernelNodeSetAttribute
-            __cuGraphKernelNodeSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphKernelNodeSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphDebugDotPrint' in found_functions}}
-        try:
-            global __cuGraphDebugDotPrint
-            __cuGraphDebugDotPrint = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphDebugDotPrint')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuUserObjectCreate' in found_functions}}
-        try:
-            global __cuUserObjectCreate
-            __cuUserObjectCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuUserObjectCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuUserObjectRetain' in found_functions}}
-        try:
-            global __cuUserObjectRetain
-            __cuUserObjectRetain = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuUserObjectRetain')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuUserObjectRelease' in found_functions}}
-        try:
-            global __cuUserObjectRelease
-            __cuUserObjectRelease = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuUserObjectRelease')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphRetainUserObject' in found_functions}}
-        try:
-            global __cuGraphRetainUserObject
-            __cuGraphRetainUserObject = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphRetainUserObject')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphReleaseUserObject' in found_functions}}
-        try:
-            global __cuGraphReleaseUserObject
-            __cuGraphReleaseUserObject = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphReleaseUserObject')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddNode' in found_functions}}
-        try:
-            global __cuGraphAddNode
-            __cuGraphAddNode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddNode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphAddNode_v2' in found_functions}}
-        try:
-            global __cuGraphAddNode_v2
-            __cuGraphAddNode_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphAddNode_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphNodeSetParams
-            __cuGraphNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphExecNodeSetParams' in found_functions}}
-        try:
-            global __cuGraphExecNodeSetParams
-            __cuGraphExecNodeSetParams = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphExecNodeSetParams')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphConditionalHandleCreate' in found_functions}}
-        try:
-            global __cuGraphConditionalHandleCreate
-            __cuGraphConditionalHandleCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphConditionalHandleCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-        try:
-            global __cuOccupancyMaxActiveBlocksPerMultiprocessor
-            __cuOccupancyMaxActiveBlocksPerMultiprocessor = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxActiveBlocksPerMultiprocessor')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-        try:
-            global __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-            __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-        try:
-            global __cuOccupancyMaxPotentialBlockSize
-            __cuOccupancyMaxPotentialBlockSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxPotentialBlockSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-        try:
-            global __cuOccupancyMaxPotentialBlockSizeWithFlags
-            __cuOccupancyMaxPotentialBlockSizeWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxPotentialBlockSizeWithFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-        try:
-            global __cuOccupancyAvailableDynamicSMemPerBlock
-            __cuOccupancyAvailableDynamicSMemPerBlock = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyAvailableDynamicSMemPerBlock')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-        try:
-            global __cuOccupancyMaxPotentialClusterSize
-            __cuOccupancyMaxPotentialClusterSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxPotentialClusterSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-        try:
-            global __cuOccupancyMaxActiveClusters
-            __cuOccupancyMaxActiveClusters = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuOccupancyMaxActiveClusters')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetArray' in found_functions}}
-        try:
-            global __cuTexRefSetArray
-            __cuTexRefSetArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetMipmappedArray' in found_functions}}
-        try:
-            global __cuTexRefSetMipmappedArray
-            __cuTexRefSetMipmappedArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetMipmappedArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetAddress_v2' in found_functions}}
-        try:
-            global __cuTexRefSetAddress_v2
-            __cuTexRefSetAddress_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetAddress_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-        try:
-            global __cuTexRefSetAddress2D_v3
-            __cuTexRefSetAddress2D_v3 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetAddress2D_v3')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetFormat' in found_functions}}
-        try:
-            global __cuTexRefSetFormat
-            __cuTexRefSetFormat = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetFormat')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetAddressMode' in found_functions}}
-        try:
-            global __cuTexRefSetAddressMode
-            __cuTexRefSetAddressMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetAddressMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetFilterMode' in found_functions}}
-        try:
-            global __cuTexRefSetFilterMode
-            __cuTexRefSetFilterMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetFilterMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-        try:
-            global __cuTexRefSetMipmapFilterMode
-            __cuTexRefSetMipmapFilterMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetMipmapFilterMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-        try:
-            global __cuTexRefSetMipmapLevelBias
-            __cuTexRefSetMipmapLevelBias = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetMipmapLevelBias')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-        try:
-            global __cuTexRefSetMipmapLevelClamp
-            __cuTexRefSetMipmapLevelClamp = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetMipmapLevelClamp')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-        try:
-            global __cuTexRefSetMaxAnisotropy
-            __cuTexRefSetMaxAnisotropy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetMaxAnisotropy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetBorderColor' in found_functions}}
-        try:
-            global __cuTexRefSetBorderColor
-            __cuTexRefSetBorderColor = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetBorderColor')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefSetFlags' in found_functions}}
-        try:
-            global __cuTexRefSetFlags
-            __cuTexRefSetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefSetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetAddress_v2' in found_functions}}
-        try:
-            global __cuTexRefGetAddress_v2
-            __cuTexRefGetAddress_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetAddress_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetArray' in found_functions}}
-        try:
-            global __cuTexRefGetArray
-            __cuTexRefGetArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetMipmappedArray' in found_functions}}
-        try:
-            global __cuTexRefGetMipmappedArray
-            __cuTexRefGetMipmappedArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetMipmappedArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetAddressMode' in found_functions}}
-        try:
-            global __cuTexRefGetAddressMode
-            __cuTexRefGetAddressMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetAddressMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetFilterMode' in found_functions}}
-        try:
-            global __cuTexRefGetFilterMode
-            __cuTexRefGetFilterMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetFilterMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetFormat' in found_functions}}
-        try:
-            global __cuTexRefGetFormat
-            __cuTexRefGetFormat = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetFormat')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-        try:
-            global __cuTexRefGetMipmapFilterMode
-            __cuTexRefGetMipmapFilterMode = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetMipmapFilterMode')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-        try:
-            global __cuTexRefGetMipmapLevelBias
-            __cuTexRefGetMipmapLevelBias = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetMipmapLevelBias')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-        try:
-            global __cuTexRefGetMipmapLevelClamp
-            __cuTexRefGetMipmapLevelClamp = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetMipmapLevelClamp')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-        try:
-            global __cuTexRefGetMaxAnisotropy
-            __cuTexRefGetMaxAnisotropy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetMaxAnisotropy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetBorderColor' in found_functions}}
-        try:
-            global __cuTexRefGetBorderColor
-            __cuTexRefGetBorderColor = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetBorderColor')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefGetFlags' in found_functions}}
-        try:
-            global __cuTexRefGetFlags
-            __cuTexRefGetFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefGetFlags')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefCreate' in found_functions}}
-        try:
-            global __cuTexRefCreate
-            __cuTexRefCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexRefDestroy' in found_functions}}
-        try:
-            global __cuTexRefDestroy
-            __cuTexRefDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexRefDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuSurfRefSetArray' in found_functions}}
-        try:
-            global __cuSurfRefSetArray
-            __cuSurfRefSetArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSurfRefSetArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuSurfRefGetArray' in found_functions}}
-        try:
-            global __cuSurfRefGetArray
-            __cuSurfRefGetArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSurfRefGetArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexObjectCreate' in found_functions}}
-        try:
-            global __cuTexObjectCreate
-            __cuTexObjectCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexObjectCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexObjectDestroy' in found_functions}}
-        try:
-            global __cuTexObjectDestroy
-            __cuTexObjectDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexObjectDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexObjectGetResourceDesc' in found_functions}}
-        try:
-            global __cuTexObjectGetResourceDesc
-            __cuTexObjectGetResourceDesc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexObjectGetResourceDesc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexObjectGetTextureDesc' in found_functions}}
-        try:
-            global __cuTexObjectGetTextureDesc
-            __cuTexObjectGetTextureDesc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexObjectGetTextureDesc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-        try:
-            global __cuTexObjectGetResourceViewDesc
-            __cuTexObjectGetResourceViewDesc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTexObjectGetResourceViewDesc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuSurfObjectCreate' in found_functions}}
-        try:
-            global __cuSurfObjectCreate
-            __cuSurfObjectCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSurfObjectCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuSurfObjectDestroy' in found_functions}}
-        try:
-            global __cuSurfObjectDestroy
-            __cuSurfObjectDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSurfObjectDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-        try:
-            global __cuSurfObjectGetResourceDesc
-            __cuSurfObjectGetResourceDesc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuSurfObjectGetResourceDesc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTensorMapEncodeTiled' in found_functions}}
-        try:
-            global __cuTensorMapEncodeTiled
-            __cuTensorMapEncodeTiled = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTensorMapEncodeTiled')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTensorMapEncodeIm2col' in found_functions}}
-        try:
-            global __cuTensorMapEncodeIm2col
-            __cuTensorMapEncodeIm2col = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTensorMapEncodeIm2col')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuTensorMapReplaceAddress' in found_functions}}
-        try:
-            global __cuTensorMapReplaceAddress
-            __cuTensorMapReplaceAddress = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuTensorMapReplaceAddress')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceCanAccessPeer' in found_functions}}
-        try:
-            global __cuDeviceCanAccessPeer
-            __cuDeviceCanAccessPeer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceCanAccessPeer')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxEnablePeerAccess' in found_functions}}
-        try:
-            global __cuCtxEnablePeerAccess
-            __cuCtxEnablePeerAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxEnablePeerAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxDisablePeerAccess' in found_functions}}
-        try:
-            global __cuCtxDisablePeerAccess
-            __cuCtxDisablePeerAccess = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxDisablePeerAccess')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetP2PAttribute' in found_functions}}
-        try:
-            global __cuDeviceGetP2PAttribute
-            __cuDeviceGetP2PAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetP2PAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphicsUnregisterResource' in found_functions}}
-        try:
-            global __cuGraphicsUnregisterResource
-            __cuGraphicsUnregisterResource = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsUnregisterResource')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-        try:
-            global __cuGraphicsSubResourceGetMappedArray
-            __cuGraphicsSubResourceGetMappedArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsSubResourceGetMappedArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-        try:
-            global __cuGraphicsResourceGetMappedMipmappedArray
-            __cuGraphicsResourceGetMappedMipmappedArray = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsResourceGetMappedMipmappedArray')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-        try:
-            global __cuGraphicsResourceGetMappedPointer_v2
-            __cuGraphicsResourceGetMappedPointer_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsResourceGetMappedPointer_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-        try:
-            global __cuGraphicsResourceSetMapFlags_v2
-            __cuGraphicsResourceSetMapFlags_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsResourceSetMapFlags_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGetProcAddress_v2' in found_functions}}
-        try:
-            global __cuGetProcAddress_v2
-            __cuGetProcAddress_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGetProcAddress_v2')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCoredumpGetAttribute' in found_functions}}
-        try:
-            global __cuCoredumpGetAttribute
-            __cuCoredumpGetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCoredumpGetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-        try:
-            global __cuCoredumpGetAttributeGlobal
-            __cuCoredumpGetAttributeGlobal = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCoredumpGetAttributeGlobal')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCoredumpSetAttribute' in found_functions}}
-        try:
-            global __cuCoredumpSetAttribute
-            __cuCoredumpSetAttribute = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCoredumpSetAttribute')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-        try:
-            global __cuCoredumpSetAttributeGlobal
-            __cuCoredumpSetAttributeGlobal = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCoredumpSetAttributeGlobal')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGetExportTable' in found_functions}}
-        try:
-            global __cuGetExportTable
-            __cuGetExportTable = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGetExportTable')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxCreate' in found_functions}}
-        try:
-            global __cuGreenCtxCreate
-            __cuGreenCtxCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxDestroy' in found_functions}}
-        try:
-            global __cuGreenCtxDestroy
-            __cuGreenCtxDestroy = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxDestroy')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxFromGreenCtx' in found_functions}}
-        try:
-            global __cuCtxFromGreenCtx
-            __cuCtxFromGreenCtx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxFromGreenCtx')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDeviceGetDevResource' in found_functions}}
-        try:
-            global __cuDeviceGetDevResource
-            __cuDeviceGetDevResource = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDeviceGetDevResource')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuCtxGetDevResource' in found_functions}}
-        try:
-            global __cuCtxGetDevResource
-            __cuCtxGetDevResource = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuCtxGetDevResource')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxGetDevResource' in found_functions}}
-        try:
-            global __cuGreenCtxGetDevResource
-            __cuGreenCtxGetDevResource = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxGetDevResource')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevSmResourceSplitByCount' in found_functions}}
-        try:
-            global __cuDevSmResourceSplitByCount
-            __cuDevSmResourceSplitByCount = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevSmResourceSplitByCount')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuDevResourceGenerateDesc' in found_functions}}
-        try:
-            global __cuDevResourceGenerateDesc
-            __cuDevResourceGenerateDesc = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuDevResourceGenerateDesc')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxRecordEvent' in found_functions}}
-        try:
-            global __cuGreenCtxRecordEvent
-            __cuGreenCtxRecordEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxRecordEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxWaitEvent' in found_functions}}
-        try:
-            global __cuGreenCtxWaitEvent
-            __cuGreenCtxWaitEvent = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxWaitEvent')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuStreamGetGreenCtx' in found_functions}}
-        try:
-            global __cuStreamGetGreenCtx
-            __cuStreamGetGreenCtx = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuStreamGetGreenCtx')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuGreenCtxStreamCreate' in found_functions}}
-        try:
-            global __cuGreenCtxStreamCreate
-            __cuGreenCtxStreamCreate = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGreenCtxStreamCreate')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuProfilerStart' in found_functions}}
-        try:
-            global __cuProfilerStart
-            __cuProfilerStart = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuProfilerStart')
-        except:
-            pass
-        {{endif}}
-        {{if 'cuProfilerStop' in found_functions}}
-        try:
-            global __cuProfilerStop
-            __cuProfilerStop = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuProfilerStop')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsEGLRegisterImage
-            __cuGraphicsEGLRegisterImage = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsEGLRegisterImage')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamConsumerConnect
-            __cuEGLStreamConsumerConnect = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamConsumerConnect')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamConsumerConnectWithFlags
-            __cuEGLStreamConsumerConnectWithFlags = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamConsumerConnectWithFlags')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamConsumerDisconnect
-            __cuEGLStreamConsumerDisconnect = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamConsumerDisconnect')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamConsumerAcquireFrame
-            __cuEGLStreamConsumerAcquireFrame = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamConsumerAcquireFrame')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamConsumerReleaseFrame
-            __cuEGLStreamConsumerReleaseFrame = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamConsumerReleaseFrame')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamProducerConnect
-            __cuEGLStreamProducerConnect = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamProducerConnect')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamProducerDisconnect
-            __cuEGLStreamProducerDisconnect = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamProducerDisconnect')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamProducerPresentFrame
-            __cuEGLStreamProducerPresentFrame = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamProducerPresentFrame')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEGLStreamProducerReturnFrame
-            __cuEGLStreamProducerReturnFrame = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEGLStreamProducerReturnFrame')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsResourceGetMappedEglFrame
-            __cuGraphicsResourceGetMappedEglFrame = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsResourceGetMappedEglFrame')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuEventCreateFromEGLSync
-            __cuEventCreateFromEGLSync = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuEventCreateFromEGLSync')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsGLRegisterBuffer
-            __cuGraphicsGLRegisterBuffer = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsGLRegisterBuffer')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsGLRegisterImage
-            __cuGraphicsGLRegisterImage = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsGLRegisterImage')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGLGetDevices_v2
-            __cuGLGetDevices_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGLGetDevices_v2')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuVDPAUGetDevice
-            __cuVDPAUGetDevice = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuVDPAUGetDevice')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuVDPAUCtxCreate_v2
-            __cuVDPAUCtxCreate_v2 = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuVDPAUCtxCreate_v2')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsVDPAURegisterVideoSurface
-            __cuGraphicsVDPAURegisterVideoSurface = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsVDPAURegisterVideoSurface')
-        except:
-            pass
-        {{endif}}
-        {{if True}}
-        try:
-            global __cuGraphicsVDPAURegisterOutputSurface
-            __cuGraphicsVDPAURegisterOutputSurface = <void*><unsigned long long>win32api.GetProcAddress(handle, 'cuGraphicsVDPAURegisterOutputSurface')
-        except:
-            pass
-        {{endif}}
-    {{else}}
-    # Load using dlsym
-    if usePTDS:
-        # Get all PTDS version of functions
-        pass
-        {{if 'cuMemcpy' in found_functions}}
-        global __cuMemcpy
-        __cuMemcpy = dlfcn.dlsym(handle, 'cuMemcpy_ptds')
-        {{endif}}
-        {{if 'cuMemcpyPeer' in found_functions}}
-        global __cuMemcpyPeer
-        __cuMemcpyPeer = dlfcn.dlsym(handle, 'cuMemcpyPeer_ptds')
-        {{endif}}
-        {{if 'cuMemcpyHtoD_v2' in found_functions}}
-        global __cuMemcpyHtoD_v2
-        __cuMemcpyHtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoD_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyDtoH_v2' in found_functions}}
-        global __cuMemcpyDtoH_v2
-        __cuMemcpyDtoH_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoH_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyDtoD_v2' in found_functions}}
-        global __cuMemcpyDtoD_v2
-        __cuMemcpyDtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoD_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyDtoA_v2' in found_functions}}
-        global __cuMemcpyDtoA_v2
-        __cuMemcpyDtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoA_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyAtoD_v2' in found_functions}}
-        global __cuMemcpyAtoD_v2
-        __cuMemcpyAtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoD_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyHtoA_v2' in found_functions}}
-        global __cuMemcpyHtoA_v2
-        __cuMemcpyHtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoA_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyAtoH_v2' in found_functions}}
-        global __cuMemcpyAtoH_v2
-        __cuMemcpyAtoH_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoH_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpyAtoA_v2' in found_functions}}
-        global __cuMemcpyAtoA_v2
-        __cuMemcpyAtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoA_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpy2D_v2' in found_functions}}
-        global __cuMemcpy2D_v2
-        __cuMemcpy2D_v2 = dlfcn.dlsym(handle, 'cuMemcpy2D_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-        global __cuMemcpy2DUnaligned_v2
-        __cuMemcpy2DUnaligned_v2 = dlfcn.dlsym(handle, 'cuMemcpy2DUnaligned_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpy3D_v2' in found_functions}}
-        global __cuMemcpy3D_v2
-        __cuMemcpy3D_v2 = dlfcn.dlsym(handle, 'cuMemcpy3D_v2_ptds')
-        {{endif}}
-        {{if 'cuMemcpy3DPeer' in found_functions}}
-        global __cuMemcpy3DPeer
-        __cuMemcpy3DPeer = dlfcn.dlsym(handle, 'cuMemcpy3DPeer_ptds')
-        {{endif}}
-        {{if 'cuMemcpyAsync' in found_functions}}
-        global __cuMemcpyAsync
-        __cuMemcpyAsync = dlfcn.dlsym(handle, 'cuMemcpyAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyPeerAsync' in found_functions}}
-        global __cuMemcpyPeerAsync
-        __cuMemcpyPeerAsync = dlfcn.dlsym(handle, 'cuMemcpyPeerAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-        global __cuMemcpyHtoDAsync_v2
-        __cuMemcpyHtoDAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoDAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-        global __cuMemcpyDtoHAsync_v2
-        __cuMemcpyDtoHAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoHAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-        global __cuMemcpyDtoDAsync_v2
-        __cuMemcpyDtoDAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoDAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-        global __cuMemcpyHtoAAsync_v2
-        __cuMemcpyHtoAAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoAAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-        global __cuMemcpyAtoHAsync_v2
-        __cuMemcpyAtoHAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoHAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-        global __cuMemcpy2DAsync_v2
-        __cuMemcpy2DAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpy2DAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-        global __cuMemcpy3DAsync_v2
-        __cuMemcpy3DAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpy3DAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-        global __cuMemcpy3DPeerAsync
-        __cuMemcpy3DPeerAsync = dlfcn.dlsym(handle, 'cuMemcpy3DPeerAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD8_v2' in found_functions}}
-        global __cuMemsetD8_v2
-        __cuMemsetD8_v2 = dlfcn.dlsym(handle, 'cuMemsetD8_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD16_v2' in found_functions}}
-        global __cuMemsetD16_v2
-        __cuMemsetD16_v2 = dlfcn.dlsym(handle, 'cuMemsetD16_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD32_v2' in found_functions}}
-        global __cuMemsetD32_v2
-        __cuMemsetD32_v2 = dlfcn.dlsym(handle, 'cuMemsetD32_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD2D8_v2' in found_functions}}
-        global __cuMemsetD2D8_v2
-        __cuMemsetD2D8_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D8_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD2D16_v2' in found_functions}}
-        global __cuMemsetD2D16_v2
-        __cuMemsetD2D16_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D16_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD2D32_v2' in found_functions}}
-        global __cuMemsetD2D32_v2
-        __cuMemsetD2D32_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D32_v2_ptds')
-        {{endif}}
-        {{if 'cuMemsetD8Async' in found_functions}}
-        global __cuMemsetD8Async
-        __cuMemsetD8Async = dlfcn.dlsym(handle, 'cuMemsetD8Async_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD16Async' in found_functions}}
-        global __cuMemsetD16Async
-        __cuMemsetD16Async = dlfcn.dlsym(handle, 'cuMemsetD16Async_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD32Async' in found_functions}}
-        global __cuMemsetD32Async
-        __cuMemsetD32Async = dlfcn.dlsym(handle, 'cuMemsetD32Async_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD2D8Async' in found_functions}}
-        global __cuMemsetD2D8Async
-        __cuMemsetD2D8Async = dlfcn.dlsym(handle, 'cuMemsetD2D8Async_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD2D16Async' in found_functions}}
-        global __cuMemsetD2D16Async
-        __cuMemsetD2D16Async = dlfcn.dlsym(handle, 'cuMemsetD2D16Async_ptsz')
-        {{endif}}
-        {{if 'cuMemsetD2D32Async' in found_functions}}
-        global __cuMemsetD2D32Async
-        __cuMemsetD2D32Async = dlfcn.dlsym(handle, 'cuMemsetD2D32Async_ptsz')
-        {{endif}}
-        {{if 'cuMemMapArrayAsync' in found_functions}}
-        global __cuMemMapArrayAsync
-        __cuMemMapArrayAsync = dlfcn.dlsym(handle, 'cuMemMapArrayAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemFreeAsync' in found_functions}}
-        global __cuMemFreeAsync
-        __cuMemFreeAsync = dlfcn.dlsym(handle, 'cuMemFreeAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemAllocAsync' in found_functions}}
-        global __cuMemAllocAsync
-        __cuMemAllocAsync = dlfcn.dlsym(handle, 'cuMemAllocAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-        global __cuMemAllocFromPoolAsync
-        __cuMemAllocFromPoolAsync = dlfcn.dlsym(handle, 'cuMemAllocFromPoolAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemPrefetchAsync' in found_functions}}
-        global __cuMemPrefetchAsync
-        __cuMemPrefetchAsync = dlfcn.dlsym(handle, 'cuMemPrefetchAsync_ptsz')
-        {{endif}}
-        {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-        global __cuMemPrefetchAsync_v2
-        __cuMemPrefetchAsync_v2 = dlfcn.dlsym(handle, 'cuMemPrefetchAsync_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetPriority' in found_functions}}
-        global __cuStreamGetPriority
-        __cuStreamGetPriority = dlfcn.dlsym(handle, 'cuStreamGetPriority_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetFlags' in found_functions}}
-        global __cuStreamGetFlags
-        __cuStreamGetFlags = dlfcn.dlsym(handle, 'cuStreamGetFlags_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetId' in found_functions}}
-        global __cuStreamGetId
-        __cuStreamGetId = dlfcn.dlsym(handle, 'cuStreamGetId_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetCtx' in found_functions}}
-        global __cuStreamGetCtx
-        __cuStreamGetCtx = dlfcn.dlsym(handle, 'cuStreamGetCtx_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetCtx_v2' in found_functions}}
-        global __cuStreamGetCtx_v2
-        __cuStreamGetCtx_v2 = dlfcn.dlsym(handle, 'cuStreamGetCtx_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamWaitEvent' in found_functions}}
-        global __cuStreamWaitEvent
-        __cuStreamWaitEvent = dlfcn.dlsym(handle, 'cuStreamWaitEvent_ptsz')
-        {{endif}}
-        {{if 'cuStreamAddCallback' in found_functions}}
-        global __cuStreamAddCallback
-        __cuStreamAddCallback = dlfcn.dlsym(handle, 'cuStreamAddCallback_ptsz')
-        {{endif}}
-        {{if 'cuStreamBeginCapture_v2' in found_functions}}
-        global __cuStreamBeginCapture_v2
-        __cuStreamBeginCapture_v2 = dlfcn.dlsym(handle, 'cuStreamBeginCapture_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-        global __cuStreamBeginCaptureToGraph
-        __cuStreamBeginCaptureToGraph = dlfcn.dlsym(handle, 'cuStreamBeginCaptureToGraph_ptsz')
-        {{endif}}
-        {{if 'cuStreamEndCapture' in found_functions}}
-        global __cuStreamEndCapture
-        __cuStreamEndCapture = dlfcn.dlsym(handle, 'cuStreamEndCapture_ptsz')
-        {{endif}}
-        {{if 'cuStreamIsCapturing' in found_functions}}
-        global __cuStreamIsCapturing
-        __cuStreamIsCapturing = dlfcn.dlsym(handle, 'cuStreamIsCapturing_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-        global __cuStreamGetCaptureInfo_v2
-        __cuStreamGetCaptureInfo_v2 = dlfcn.dlsym(handle, 'cuStreamGetCaptureInfo_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-        global __cuStreamGetCaptureInfo_v3
-        __cuStreamGetCaptureInfo_v3 = dlfcn.dlsym(handle, 'cuStreamGetCaptureInfo_v3_ptsz')
-        {{endif}}
-        {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-        global __cuStreamUpdateCaptureDependencies
-        __cuStreamUpdateCaptureDependencies = dlfcn.dlsym(handle, 'cuStreamUpdateCaptureDependencies_ptsz')
-        {{endif}}
-        {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-        global __cuStreamUpdateCaptureDependencies_v2
-        __cuStreamUpdateCaptureDependencies_v2 = dlfcn.dlsym(handle, 'cuStreamUpdateCaptureDependencies_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamAttachMemAsync' in found_functions}}
-        global __cuStreamAttachMemAsync
-        __cuStreamAttachMemAsync = dlfcn.dlsym(handle, 'cuStreamAttachMemAsync_ptsz')
-        {{endif}}
-        {{if 'cuStreamQuery' in found_functions}}
-        global __cuStreamQuery
-        __cuStreamQuery = dlfcn.dlsym(handle, 'cuStreamQuery_ptsz')
-        {{endif}}
-        {{if 'cuStreamSynchronize' in found_functions}}
-        global __cuStreamSynchronize
-        __cuStreamSynchronize = dlfcn.dlsym(handle, 'cuStreamSynchronize_ptsz')
-        {{endif}}
-        {{if 'cuStreamCopyAttributes' in found_functions}}
-        global __cuStreamCopyAttributes
-        __cuStreamCopyAttributes = dlfcn.dlsym(handle, 'cuStreamCopyAttributes_ptsz')
-        {{endif}}
-        {{if 'cuStreamGetAttribute' in found_functions}}
-        global __cuStreamGetAttribute
-        __cuStreamGetAttribute = dlfcn.dlsym(handle, 'cuStreamGetAttribute_ptsz')
-        {{endif}}
-        {{if 'cuStreamSetAttribute' in found_functions}}
-        global __cuStreamSetAttribute
-        __cuStreamSetAttribute = dlfcn.dlsym(handle, 'cuStreamSetAttribute_ptsz')
-        {{endif}}
-        {{if 'cuEventRecord' in found_functions}}
-        global __cuEventRecord
-        __cuEventRecord = dlfcn.dlsym(handle, 'cuEventRecord_ptsz')
-        {{endif}}
-        {{if 'cuEventRecordWithFlags' in found_functions}}
-        global __cuEventRecordWithFlags
-        __cuEventRecordWithFlags = dlfcn.dlsym(handle, 'cuEventRecordWithFlags_ptsz')
-        {{endif}}
-        {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-        global __cuSignalExternalSemaphoresAsync
-        __cuSignalExternalSemaphoresAsync = dlfcn.dlsym(handle, 'cuSignalExternalSemaphoresAsync_ptsz')
-        {{endif}}
-        {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-        global __cuWaitExternalSemaphoresAsync
-        __cuWaitExternalSemaphoresAsync = dlfcn.dlsym(handle, 'cuWaitExternalSemaphoresAsync_ptsz')
-        {{endif}}
-        {{if 'cuStreamWaitValue32_v2' in found_functions}}
-        global __cuStreamWaitValue32_v2
-        __cuStreamWaitValue32_v2 = dlfcn.dlsym(handle, 'cuStreamWaitValue32_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamWaitValue64_v2' in found_functions}}
-        global __cuStreamWaitValue64_v2
-        __cuStreamWaitValue64_v2 = dlfcn.dlsym(handle, 'cuStreamWaitValue64_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamWriteValue32_v2' in found_functions}}
-        global __cuStreamWriteValue32_v2
-        __cuStreamWriteValue32_v2 = dlfcn.dlsym(handle, 'cuStreamWriteValue32_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamWriteValue64_v2' in found_functions}}
-        global __cuStreamWriteValue64_v2
-        __cuStreamWriteValue64_v2 = dlfcn.dlsym(handle, 'cuStreamWriteValue64_v2_ptsz')
-        {{endif}}
-        {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-        global __cuStreamBatchMemOp_v2
-        __cuStreamBatchMemOp_v2 = dlfcn.dlsym(handle, 'cuStreamBatchMemOp_v2_ptsz')
-        {{endif}}
-        {{if 'cuLaunchKernel' in found_functions}}
-        global __cuLaunchKernel
-        __cuLaunchKernel = dlfcn.dlsym(handle, 'cuLaunchKernel_ptsz')
-        {{endif}}
-        {{if 'cuLaunchKernelEx' in found_functions}}
-        global __cuLaunchKernelEx
-        __cuLaunchKernelEx = dlfcn.dlsym(handle, 'cuLaunchKernelEx_ptsz')
-        {{endif}}
-        {{if 'cuLaunchCooperativeKernel' in found_functions}}
-        global __cuLaunchCooperativeKernel
-        __cuLaunchCooperativeKernel = dlfcn.dlsym(handle, 'cuLaunchCooperativeKernel_ptsz')
-        {{endif}}
-        {{if 'cuLaunchHostFunc' in found_functions}}
-        global __cuLaunchHostFunc
-        __cuLaunchHostFunc = dlfcn.dlsym(handle, 'cuLaunchHostFunc_ptsz')
-        {{endif}}
-        {{if 'cuGraphInstantiateWithParams' in found_functions}}
-        global __cuGraphInstantiateWithParams
-        __cuGraphInstantiateWithParams = dlfcn.dlsym(handle, 'cuGraphInstantiateWithParams_ptsz')
-        {{endif}}
-        {{if 'cuGraphUpload' in found_functions}}
-        global __cuGraphUpload
-        __cuGraphUpload = dlfcn.dlsym(handle, 'cuGraphUpload_ptsz')
-        {{endif}}
-        {{if 'cuGraphLaunch' in found_functions}}
-        global __cuGraphLaunch
-        __cuGraphLaunch = dlfcn.dlsym(handle, 'cuGraphLaunch_ptsz')
-        {{endif}}
-        {{if 'cuGraphicsMapResources' in found_functions}}
-        global __cuGraphicsMapResources
-        __cuGraphicsMapResources = dlfcn.dlsym(handle, 'cuGraphicsMapResources_ptsz')
-        {{endif}}
-        {{if 'cuGraphicsUnmapResources' in found_functions}}
-        global __cuGraphicsUnmapResources
-        __cuGraphicsUnmapResources = dlfcn.dlsym(handle, 'cuGraphicsUnmapResources_ptsz')
-        {{endif}}
-    else:
-        # Else get the regular version
-        pass
-        {{if 'cuMemcpy' in found_functions}}
-        global __cuMemcpy
-        __cuMemcpy = dlfcn.dlsym(handle, 'cuMemcpy')
-        {{endif}}
-        {{if 'cuMemcpyPeer' in found_functions}}
-        global __cuMemcpyPeer
-        __cuMemcpyPeer = dlfcn.dlsym(handle, 'cuMemcpyPeer')
-        {{endif}}
-        {{if 'cuMemcpyHtoD_v2' in found_functions}}
-        global __cuMemcpyHtoD_v2
-        __cuMemcpyHtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoD_v2')
-        {{endif}}
-        {{if 'cuMemcpyDtoH_v2' in found_functions}}
-        global __cuMemcpyDtoH_v2
-        __cuMemcpyDtoH_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoH_v2')
-        {{endif}}
-        {{if 'cuMemcpyDtoD_v2' in found_functions}}
-        global __cuMemcpyDtoD_v2
-        __cuMemcpyDtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoD_v2')
-        {{endif}}
-        {{if 'cuMemcpyDtoA_v2' in found_functions}}
-        global __cuMemcpyDtoA_v2
-        __cuMemcpyDtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoA_v2')
-        {{endif}}
-        {{if 'cuMemcpyAtoD_v2' in found_functions}}
-        global __cuMemcpyAtoD_v2
-        __cuMemcpyAtoD_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoD_v2')
-        {{endif}}
-        {{if 'cuMemcpyHtoA_v2' in found_functions}}
-        global __cuMemcpyHtoA_v2
-        __cuMemcpyHtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoA_v2')
-        {{endif}}
-        {{if 'cuMemcpyAtoH_v2' in found_functions}}
-        global __cuMemcpyAtoH_v2
-        __cuMemcpyAtoH_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoH_v2')
-        {{endif}}
-        {{if 'cuMemcpyAtoA_v2' in found_functions}}
-        global __cuMemcpyAtoA_v2
-        __cuMemcpyAtoA_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoA_v2')
-        {{endif}}
-        {{if 'cuMemcpy2D_v2' in found_functions}}
-        global __cuMemcpy2D_v2
-        __cuMemcpy2D_v2 = dlfcn.dlsym(handle, 'cuMemcpy2D_v2')
-        {{endif}}
-        {{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-        global __cuMemcpy2DUnaligned_v2
-        __cuMemcpy2DUnaligned_v2 = dlfcn.dlsym(handle, 'cuMemcpy2DUnaligned_v2')
-        {{endif}}
-        {{if 'cuMemcpy3D_v2' in found_functions}}
-        global __cuMemcpy3D_v2
-        __cuMemcpy3D_v2 = dlfcn.dlsym(handle, 'cuMemcpy3D_v2')
-        {{endif}}
-        {{if 'cuMemcpy3DPeer' in found_functions}}
-        global __cuMemcpy3DPeer
-        __cuMemcpy3DPeer = dlfcn.dlsym(handle, 'cuMemcpy3DPeer')
-        {{endif}}
-        {{if 'cuMemcpyAsync' in found_functions}}
-        global __cuMemcpyAsync
-        __cuMemcpyAsync = dlfcn.dlsym(handle, 'cuMemcpyAsync')
-        {{endif}}
-        {{if 'cuMemcpyPeerAsync' in found_functions}}
-        global __cuMemcpyPeerAsync
-        __cuMemcpyPeerAsync = dlfcn.dlsym(handle, 'cuMemcpyPeerAsync')
-        {{endif}}
-        {{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-        global __cuMemcpyHtoDAsync_v2
-        __cuMemcpyHtoDAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoDAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-        global __cuMemcpyDtoHAsync_v2
-        __cuMemcpyDtoHAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoHAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-        global __cuMemcpyDtoDAsync_v2
-        __cuMemcpyDtoDAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyDtoDAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-        global __cuMemcpyHtoAAsync_v2
-        __cuMemcpyHtoAAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyHtoAAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-        global __cuMemcpyAtoHAsync_v2
-        __cuMemcpyAtoHAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpyAtoHAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpy2DAsync_v2' in found_functions}}
-        global __cuMemcpy2DAsync_v2
-        __cuMemcpy2DAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpy2DAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpy3DAsync_v2' in found_functions}}
-        global __cuMemcpy3DAsync_v2
-        __cuMemcpy3DAsync_v2 = dlfcn.dlsym(handle, 'cuMemcpy3DAsync_v2')
-        {{endif}}
-        {{if 'cuMemcpy3DPeerAsync' in found_functions}}
-        global __cuMemcpy3DPeerAsync
-        __cuMemcpy3DPeerAsync = dlfcn.dlsym(handle, 'cuMemcpy3DPeerAsync')
-        {{endif}}
-        {{if 'cuMemsetD8_v2' in found_functions}}
-        global __cuMemsetD8_v2
-        __cuMemsetD8_v2 = dlfcn.dlsym(handle, 'cuMemsetD8_v2')
-        {{endif}}
-        {{if 'cuMemsetD16_v2' in found_functions}}
-        global __cuMemsetD16_v2
-        __cuMemsetD16_v2 = dlfcn.dlsym(handle, 'cuMemsetD16_v2')
-        {{endif}}
-        {{if 'cuMemsetD32_v2' in found_functions}}
-        global __cuMemsetD32_v2
-        __cuMemsetD32_v2 = dlfcn.dlsym(handle, 'cuMemsetD32_v2')
-        {{endif}}
-        {{if 'cuMemsetD2D8_v2' in found_functions}}
-        global __cuMemsetD2D8_v2
-        __cuMemsetD2D8_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D8_v2')
-        {{endif}}
-        {{if 'cuMemsetD2D16_v2' in found_functions}}
-        global __cuMemsetD2D16_v2
-        __cuMemsetD2D16_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D16_v2')
-        {{endif}}
-        {{if 'cuMemsetD2D32_v2' in found_functions}}
-        global __cuMemsetD2D32_v2
-        __cuMemsetD2D32_v2 = dlfcn.dlsym(handle, 'cuMemsetD2D32_v2')
-        {{endif}}
-        {{if 'cuMemsetD8Async' in found_functions}}
-        global __cuMemsetD8Async
-        __cuMemsetD8Async = dlfcn.dlsym(handle, 'cuMemsetD8Async')
-        {{endif}}
-        {{if 'cuMemsetD16Async' in found_functions}}
-        global __cuMemsetD16Async
-        __cuMemsetD16Async = dlfcn.dlsym(handle, 'cuMemsetD16Async')
-        {{endif}}
-        {{if 'cuMemsetD32Async' in found_functions}}
-        global __cuMemsetD32Async
-        __cuMemsetD32Async = dlfcn.dlsym(handle, 'cuMemsetD32Async')
-        {{endif}}
-        {{if 'cuMemsetD2D8Async' in found_functions}}
-        global __cuMemsetD2D8Async
-        __cuMemsetD2D8Async = dlfcn.dlsym(handle, 'cuMemsetD2D8Async')
-        {{endif}}
-        {{if 'cuMemsetD2D16Async' in found_functions}}
-        global __cuMemsetD2D16Async
-        __cuMemsetD2D16Async = dlfcn.dlsym(handle, 'cuMemsetD2D16Async')
-        {{endif}}
-        {{if 'cuMemsetD2D32Async' in found_functions}}
-        global __cuMemsetD2D32Async
-        __cuMemsetD2D32Async = dlfcn.dlsym(handle, 'cuMemsetD2D32Async')
-        {{endif}}
-        {{if 'cuMemMapArrayAsync' in found_functions}}
-        global __cuMemMapArrayAsync
-        __cuMemMapArrayAsync = dlfcn.dlsym(handle, 'cuMemMapArrayAsync')
-        {{endif}}
-        {{if 'cuMemFreeAsync' in found_functions}}
-        global __cuMemFreeAsync
-        __cuMemFreeAsync = dlfcn.dlsym(handle, 'cuMemFreeAsync')
-        {{endif}}
-        {{if 'cuMemAllocAsync' in found_functions}}
-        global __cuMemAllocAsync
-        __cuMemAllocAsync = dlfcn.dlsym(handle, 'cuMemAllocAsync')
-        {{endif}}
-        {{if 'cuMemAllocFromPoolAsync' in found_functions}}
-        global __cuMemAllocFromPoolAsync
-        __cuMemAllocFromPoolAsync = dlfcn.dlsym(handle, 'cuMemAllocFromPoolAsync')
-        {{endif}}
-        {{if 'cuMemPrefetchAsync' in found_functions}}
-        global __cuMemPrefetchAsync
-        __cuMemPrefetchAsync = dlfcn.dlsym(handle, 'cuMemPrefetchAsync')
-        {{endif}}
-        {{if 'cuMemPrefetchAsync_v2' in found_functions}}
-        global __cuMemPrefetchAsync_v2
-        __cuMemPrefetchAsync_v2 = dlfcn.dlsym(handle, 'cuMemPrefetchAsync_v2')
-        {{endif}}
-        {{if 'cuStreamGetPriority' in found_functions}}
-        global __cuStreamGetPriority
-        __cuStreamGetPriority = dlfcn.dlsym(handle, 'cuStreamGetPriority')
-        {{endif}}
-        {{if 'cuStreamGetFlags' in found_functions}}
-        global __cuStreamGetFlags
-        __cuStreamGetFlags = dlfcn.dlsym(handle, 'cuStreamGetFlags')
-        {{endif}}
-        {{if 'cuStreamGetId' in found_functions}}
-        global __cuStreamGetId
-        __cuStreamGetId = dlfcn.dlsym(handle, 'cuStreamGetId')
-        {{endif}}
-        {{if 'cuStreamGetCtx' in found_functions}}
-        global __cuStreamGetCtx
-        __cuStreamGetCtx = dlfcn.dlsym(handle, 'cuStreamGetCtx')
-        {{endif}}
-        {{if 'cuStreamGetCtx_v2' in found_functions}}
-        global __cuStreamGetCtx_v2
-        __cuStreamGetCtx_v2 = dlfcn.dlsym(handle, 'cuStreamGetCtx_v2')
-        {{endif}}
-        {{if 'cuStreamWaitEvent' in found_functions}}
-        global __cuStreamWaitEvent
-        __cuStreamWaitEvent = dlfcn.dlsym(handle, 'cuStreamWaitEvent')
-        {{endif}}
-        {{if 'cuStreamAddCallback' in found_functions}}
-        global __cuStreamAddCallback
-        __cuStreamAddCallback = dlfcn.dlsym(handle, 'cuStreamAddCallback')
-        {{endif}}
-        {{if 'cuStreamBeginCapture_v2' in found_functions}}
-        global __cuStreamBeginCapture_v2
-        __cuStreamBeginCapture_v2 = dlfcn.dlsym(handle, 'cuStreamBeginCapture_v2')
-        {{endif}}
-        {{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-        global __cuStreamBeginCaptureToGraph
-        __cuStreamBeginCaptureToGraph = dlfcn.dlsym(handle, 'cuStreamBeginCaptureToGraph')
-        {{endif}}
-        {{if 'cuStreamEndCapture' in found_functions}}
-        global __cuStreamEndCapture
-        __cuStreamEndCapture = dlfcn.dlsym(handle, 'cuStreamEndCapture')
-        {{endif}}
-        {{if 'cuStreamIsCapturing' in found_functions}}
-        global __cuStreamIsCapturing
-        __cuStreamIsCapturing = dlfcn.dlsym(handle, 'cuStreamIsCapturing')
-        {{endif}}
-        {{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-        global __cuStreamGetCaptureInfo_v2
-        __cuStreamGetCaptureInfo_v2 = dlfcn.dlsym(handle, 'cuStreamGetCaptureInfo_v2')
-        {{endif}}
-        {{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-        global __cuStreamGetCaptureInfo_v3
-        __cuStreamGetCaptureInfo_v3 = dlfcn.dlsym(handle, 'cuStreamGetCaptureInfo_v3')
-        {{endif}}
-        {{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-        global __cuStreamUpdateCaptureDependencies
-        __cuStreamUpdateCaptureDependencies = dlfcn.dlsym(handle, 'cuStreamUpdateCaptureDependencies')
-        {{endif}}
-        {{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-        global __cuStreamUpdateCaptureDependencies_v2
-        __cuStreamUpdateCaptureDependencies_v2 = dlfcn.dlsym(handle, 'cuStreamUpdateCaptureDependencies_v2')
-        {{endif}}
-        {{if 'cuStreamAttachMemAsync' in found_functions}}
-        global __cuStreamAttachMemAsync
-        __cuStreamAttachMemAsync = dlfcn.dlsym(handle, 'cuStreamAttachMemAsync')
-        {{endif}}
-        {{if 'cuStreamQuery' in found_functions}}
-        global __cuStreamQuery
-        __cuStreamQuery = dlfcn.dlsym(handle, 'cuStreamQuery')
-        {{endif}}
-        {{if 'cuStreamSynchronize' in found_functions}}
-        global __cuStreamSynchronize
-        __cuStreamSynchronize = dlfcn.dlsym(handle, 'cuStreamSynchronize')
-        {{endif}}
-        {{if 'cuStreamCopyAttributes' in found_functions}}
-        global __cuStreamCopyAttributes
-        __cuStreamCopyAttributes = dlfcn.dlsym(handle, 'cuStreamCopyAttributes')
-        {{endif}}
-        {{if 'cuStreamGetAttribute' in found_functions}}
-        global __cuStreamGetAttribute
-        __cuStreamGetAttribute = dlfcn.dlsym(handle, 'cuStreamGetAttribute')
-        {{endif}}
-        {{if 'cuStreamSetAttribute' in found_functions}}
-        global __cuStreamSetAttribute
-        __cuStreamSetAttribute = dlfcn.dlsym(handle, 'cuStreamSetAttribute')
-        {{endif}}
-        {{if 'cuEventRecord' in found_functions}}
-        global __cuEventRecord
-        __cuEventRecord = dlfcn.dlsym(handle, 'cuEventRecord')
-        {{endif}}
-        {{if 'cuEventRecordWithFlags' in found_functions}}
-        global __cuEventRecordWithFlags
-        __cuEventRecordWithFlags = dlfcn.dlsym(handle, 'cuEventRecordWithFlags')
-        {{endif}}
-        {{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-        global __cuSignalExternalSemaphoresAsync
-        __cuSignalExternalSemaphoresAsync = dlfcn.dlsym(handle, 'cuSignalExternalSemaphoresAsync')
-        {{endif}}
-        {{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-        global __cuWaitExternalSemaphoresAsync
-        __cuWaitExternalSemaphoresAsync = dlfcn.dlsym(handle, 'cuWaitExternalSemaphoresAsync')
-        {{endif}}
-        {{if 'cuStreamWaitValue32_v2' in found_functions}}
-        global __cuStreamWaitValue32_v2
-        __cuStreamWaitValue32_v2 = dlfcn.dlsym(handle, 'cuStreamWaitValue32_v2')
-        {{endif}}
-        {{if 'cuStreamWaitValue64_v2' in found_functions}}
-        global __cuStreamWaitValue64_v2
-        __cuStreamWaitValue64_v2 = dlfcn.dlsym(handle, 'cuStreamWaitValue64_v2')
-        {{endif}}
-        {{if 'cuStreamWriteValue32_v2' in found_functions}}
-        global __cuStreamWriteValue32_v2
-        __cuStreamWriteValue32_v2 = dlfcn.dlsym(handle, 'cuStreamWriteValue32_v2')
-        {{endif}}
-        {{if 'cuStreamWriteValue64_v2' in found_functions}}
-        global __cuStreamWriteValue64_v2
-        __cuStreamWriteValue64_v2 = dlfcn.dlsym(handle, 'cuStreamWriteValue64_v2')
-        {{endif}}
-        {{if 'cuStreamBatchMemOp_v2' in found_functions}}
-        global __cuStreamBatchMemOp_v2
-        __cuStreamBatchMemOp_v2 = dlfcn.dlsym(handle, 'cuStreamBatchMemOp_v2')
-        {{endif}}
-        {{if 'cuLaunchKernel' in found_functions}}
-        global __cuLaunchKernel
-        __cuLaunchKernel = dlfcn.dlsym(handle, 'cuLaunchKernel')
-        {{endif}}
-        {{if 'cuLaunchKernelEx' in found_functions}}
-        global __cuLaunchKernelEx
-        __cuLaunchKernelEx = dlfcn.dlsym(handle, 'cuLaunchKernelEx')
-        {{endif}}
-        {{if 'cuLaunchCooperativeKernel' in found_functions}}
-        global __cuLaunchCooperativeKernel
-        __cuLaunchCooperativeKernel = dlfcn.dlsym(handle, 'cuLaunchCooperativeKernel')
-        {{endif}}
-        {{if 'cuLaunchHostFunc' in found_functions}}
-        global __cuLaunchHostFunc
-        __cuLaunchHostFunc = dlfcn.dlsym(handle, 'cuLaunchHostFunc')
-        {{endif}}
-        {{if 'cuGraphInstantiateWithParams' in found_functions}}
-        global __cuGraphInstantiateWithParams
-        __cuGraphInstantiateWithParams = dlfcn.dlsym(handle, 'cuGraphInstantiateWithParams')
-        {{endif}}
-        {{if 'cuGraphUpload' in found_functions}}
-        global __cuGraphUpload
-        __cuGraphUpload = dlfcn.dlsym(handle, 'cuGraphUpload')
-        {{endif}}
-        {{if 'cuGraphLaunch' in found_functions}}
-        global __cuGraphLaunch
-        __cuGraphLaunch = dlfcn.dlsym(handle, 'cuGraphLaunch')
-        {{endif}}
-        {{if 'cuGraphicsMapResources' in found_functions}}
-        global __cuGraphicsMapResources
-        __cuGraphicsMapResources = dlfcn.dlsym(handle, 'cuGraphicsMapResources')
-        {{endif}}
-        {{if 'cuGraphicsUnmapResources' in found_functions}}
-        global __cuGraphicsUnmapResources
-        __cuGraphicsUnmapResources = dlfcn.dlsym(handle, 'cuGraphicsUnmapResources')
-        {{endif}}
-    # Get remaining functions
-    {{if 'cuGetErrorString' in found_functions}}
-    global __cuGetErrorString
-    __cuGetErrorString = dlfcn.dlsym(handle, 'cuGetErrorString')
-    {{endif}}
-    {{if 'cuGetErrorName' in found_functions}}
-    global __cuGetErrorName
-    __cuGetErrorName = dlfcn.dlsym(handle, 'cuGetErrorName')
-    {{endif}}
-    {{if 'cuInit' in found_functions}}
-    global __cuInit
-    __cuInit = dlfcn.dlsym(handle, 'cuInit')
-    {{endif}}
-    {{if 'cuDriverGetVersion' in found_functions}}
-    global __cuDriverGetVersion
-    __cuDriverGetVersion = dlfcn.dlsym(handle, 'cuDriverGetVersion')
-    {{endif}}
-    {{if 'cuDeviceGet' in found_functions}}
-    global __cuDeviceGet
-    __cuDeviceGet = dlfcn.dlsym(handle, 'cuDeviceGet')
-    {{endif}}
-    {{if 'cuDeviceGetCount' in found_functions}}
-    global __cuDeviceGetCount
-    __cuDeviceGetCount = dlfcn.dlsym(handle, 'cuDeviceGetCount')
-    {{endif}}
-    {{if 'cuDeviceGetName' in found_functions}}
-    global __cuDeviceGetName
-    __cuDeviceGetName = dlfcn.dlsym(handle, 'cuDeviceGetName')
-    {{endif}}
-    {{if 'cuDeviceGetUuid' in found_functions}}
-    global __cuDeviceGetUuid
-    __cuDeviceGetUuid = dlfcn.dlsym(handle, 'cuDeviceGetUuid')
-    {{endif}}
-    {{if 'cuDeviceGetUuid_v2' in found_functions}}
-    global __cuDeviceGetUuid_v2
-    __cuDeviceGetUuid_v2 = dlfcn.dlsym(handle, 'cuDeviceGetUuid_v2')
-    {{endif}}
-    {{if 'cuDeviceGetLuid' in found_functions}}
-    global __cuDeviceGetLuid
-    __cuDeviceGetLuid = dlfcn.dlsym(handle, 'cuDeviceGetLuid')
-    {{endif}}
-    {{if 'cuDeviceTotalMem_v2' in found_functions}}
-    global __cuDeviceTotalMem_v2
-    __cuDeviceTotalMem_v2 = dlfcn.dlsym(handle, 'cuDeviceTotalMem_v2')
-    {{endif}}
-    {{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-    global __cuDeviceGetTexture1DLinearMaxWidth
-    __cuDeviceGetTexture1DLinearMaxWidth = dlfcn.dlsym(handle, 'cuDeviceGetTexture1DLinearMaxWidth')
-    {{endif}}
-    {{if 'cuDeviceGetAttribute' in found_functions}}
-    global __cuDeviceGetAttribute
-    __cuDeviceGetAttribute = dlfcn.dlsym(handle, 'cuDeviceGetAttribute')
-    {{endif}}
-    {{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-    global __cuDeviceGetNvSciSyncAttributes
-    __cuDeviceGetNvSciSyncAttributes = dlfcn.dlsym(handle, 'cuDeviceGetNvSciSyncAttributes')
-    {{endif}}
-    {{if 'cuDeviceSetMemPool' in found_functions}}
-    global __cuDeviceSetMemPool
-    __cuDeviceSetMemPool = dlfcn.dlsym(handle, 'cuDeviceSetMemPool')
-    {{endif}}
-    {{if 'cuDeviceGetMemPool' in found_functions}}
-    global __cuDeviceGetMemPool
-    __cuDeviceGetMemPool = dlfcn.dlsym(handle, 'cuDeviceGetMemPool')
-    {{endif}}
-    {{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-    global __cuDeviceGetDefaultMemPool
-    __cuDeviceGetDefaultMemPool = dlfcn.dlsym(handle, 'cuDeviceGetDefaultMemPool')
-    {{endif}}
-    {{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-    global __cuDeviceGetExecAffinitySupport
-    __cuDeviceGetExecAffinitySupport = dlfcn.dlsym(handle, 'cuDeviceGetExecAffinitySupport')
-    {{endif}}
-    {{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-    global __cuFlushGPUDirectRDMAWrites
-    __cuFlushGPUDirectRDMAWrites = dlfcn.dlsym(handle, 'cuFlushGPUDirectRDMAWrites')
-    {{endif}}
-    {{if 'cuDeviceGetProperties' in found_functions}}
-    global __cuDeviceGetProperties
-    __cuDeviceGetProperties = dlfcn.dlsym(handle, 'cuDeviceGetProperties')
-    {{endif}}
-    {{if 'cuDeviceComputeCapability' in found_functions}}
-    global __cuDeviceComputeCapability
-    __cuDeviceComputeCapability = dlfcn.dlsym(handle, 'cuDeviceComputeCapability')
-    {{endif}}
-    {{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-    global __cuDevicePrimaryCtxRetain
-    __cuDevicePrimaryCtxRetain = dlfcn.dlsym(handle, 'cuDevicePrimaryCtxRetain')
-    {{endif}}
-    {{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-    global __cuDevicePrimaryCtxRelease_v2
-    __cuDevicePrimaryCtxRelease_v2 = dlfcn.dlsym(handle, 'cuDevicePrimaryCtxRelease_v2')
-    {{endif}}
-    {{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-    global __cuDevicePrimaryCtxSetFlags_v2
-    __cuDevicePrimaryCtxSetFlags_v2 = dlfcn.dlsym(handle, 'cuDevicePrimaryCtxSetFlags_v2')
-    {{endif}}
-    {{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-    global __cuDevicePrimaryCtxGetState
-    __cuDevicePrimaryCtxGetState = dlfcn.dlsym(handle, 'cuDevicePrimaryCtxGetState')
-    {{endif}}
-    {{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-    global __cuDevicePrimaryCtxReset_v2
-    __cuDevicePrimaryCtxReset_v2 = dlfcn.dlsym(handle, 'cuDevicePrimaryCtxReset_v2')
-    {{endif}}
-    {{if 'cuCtxCreate_v2' in found_functions}}
-    global __cuCtxCreate_v2
-    __cuCtxCreate_v2 = dlfcn.dlsym(handle, 'cuCtxCreate_v2')
-    {{endif}}
-    {{if 'cuCtxCreate_v3' in found_functions}}
-    global __cuCtxCreate_v3
-    __cuCtxCreate_v3 = dlfcn.dlsym(handle, 'cuCtxCreate_v3')
-    {{endif}}
-    {{if 'cuCtxCreate_v4' in found_functions}}
-    global __cuCtxCreate_v4
-    __cuCtxCreate_v4 = dlfcn.dlsym(handle, 'cuCtxCreate_v4')
-    {{endif}}
-    {{if 'cuCtxDestroy_v2' in found_functions}}
-    global __cuCtxDestroy_v2
-    __cuCtxDestroy_v2 = dlfcn.dlsym(handle, 'cuCtxDestroy_v2')
-    {{endif}}
-    {{if 'cuCtxPushCurrent_v2' in found_functions}}
-    global __cuCtxPushCurrent_v2
-    __cuCtxPushCurrent_v2 = dlfcn.dlsym(handle, 'cuCtxPushCurrent_v2')
-    {{endif}}
-    {{if 'cuCtxPopCurrent_v2' in found_functions}}
-    global __cuCtxPopCurrent_v2
-    __cuCtxPopCurrent_v2 = dlfcn.dlsym(handle, 'cuCtxPopCurrent_v2')
-    {{endif}}
-    {{if 'cuCtxSetCurrent' in found_functions}}
-    global __cuCtxSetCurrent
-    __cuCtxSetCurrent = dlfcn.dlsym(handle, 'cuCtxSetCurrent')
-    {{endif}}
-    {{if 'cuCtxGetCurrent' in found_functions}}
-    global __cuCtxGetCurrent
-    __cuCtxGetCurrent = dlfcn.dlsym(handle, 'cuCtxGetCurrent')
-    {{endif}}
-    {{if 'cuCtxGetDevice' in found_functions}}
-    global __cuCtxGetDevice
-    __cuCtxGetDevice = dlfcn.dlsym(handle, 'cuCtxGetDevice')
-    {{endif}}
-    {{if 'cuCtxGetFlags' in found_functions}}
-    global __cuCtxGetFlags
-    __cuCtxGetFlags = dlfcn.dlsym(handle, 'cuCtxGetFlags')
-    {{endif}}
-    {{if 'cuCtxSetFlags' in found_functions}}
-    global __cuCtxSetFlags
-    __cuCtxSetFlags = dlfcn.dlsym(handle, 'cuCtxSetFlags')
-    {{endif}}
-    {{if 'cuCtxGetId' in found_functions}}
-    global __cuCtxGetId
-    __cuCtxGetId = dlfcn.dlsym(handle, 'cuCtxGetId')
-    {{endif}}
-    {{if 'cuCtxSynchronize' in found_functions}}
-    global __cuCtxSynchronize
-    __cuCtxSynchronize = dlfcn.dlsym(handle, 'cuCtxSynchronize')
-    {{endif}}
-    {{if 'cuCtxSetLimit' in found_functions}}
-    global __cuCtxSetLimit
-    __cuCtxSetLimit = dlfcn.dlsym(handle, 'cuCtxSetLimit')
-    {{endif}}
-    {{if 'cuCtxGetLimit' in found_functions}}
-    global __cuCtxGetLimit
-    __cuCtxGetLimit = dlfcn.dlsym(handle, 'cuCtxGetLimit')
-    {{endif}}
-    {{if 'cuCtxGetCacheConfig' in found_functions}}
-    global __cuCtxGetCacheConfig
-    __cuCtxGetCacheConfig = dlfcn.dlsym(handle, 'cuCtxGetCacheConfig')
-    {{endif}}
-    {{if 'cuCtxSetCacheConfig' in found_functions}}
-    global __cuCtxSetCacheConfig
-    __cuCtxSetCacheConfig = dlfcn.dlsym(handle, 'cuCtxSetCacheConfig')
-    {{endif}}
-    {{if 'cuCtxGetApiVersion' in found_functions}}
-    global __cuCtxGetApiVersion
-    __cuCtxGetApiVersion = dlfcn.dlsym(handle, 'cuCtxGetApiVersion')
-    {{endif}}
-    {{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-    global __cuCtxGetStreamPriorityRange
-    __cuCtxGetStreamPriorityRange = dlfcn.dlsym(handle, 'cuCtxGetStreamPriorityRange')
-    {{endif}}
-    {{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-    global __cuCtxResetPersistingL2Cache
-    __cuCtxResetPersistingL2Cache = dlfcn.dlsym(handle, 'cuCtxResetPersistingL2Cache')
-    {{endif}}
-    {{if 'cuCtxGetExecAffinity' in found_functions}}
-    global __cuCtxGetExecAffinity
-    __cuCtxGetExecAffinity = dlfcn.dlsym(handle, 'cuCtxGetExecAffinity')
-    {{endif}}
-    {{if 'cuCtxRecordEvent' in found_functions}}
-    global __cuCtxRecordEvent
-    __cuCtxRecordEvent = dlfcn.dlsym(handle, 'cuCtxRecordEvent')
-    {{endif}}
-    {{if 'cuCtxWaitEvent' in found_functions}}
-    global __cuCtxWaitEvent
-    __cuCtxWaitEvent = dlfcn.dlsym(handle, 'cuCtxWaitEvent')
-    {{endif}}
-    {{if 'cuCtxAttach' in found_functions}}
-    global __cuCtxAttach
-    __cuCtxAttach = dlfcn.dlsym(handle, 'cuCtxAttach')
-    {{endif}}
-    {{if 'cuCtxDetach' in found_functions}}
-    global __cuCtxDetach
-    __cuCtxDetach = dlfcn.dlsym(handle, 'cuCtxDetach')
-    {{endif}}
-    {{if 'cuCtxGetSharedMemConfig' in found_functions}}
-    global __cuCtxGetSharedMemConfig
-    __cuCtxGetSharedMemConfig = dlfcn.dlsym(handle, 'cuCtxGetSharedMemConfig')
-    {{endif}}
-    {{if 'cuCtxSetSharedMemConfig' in found_functions}}
-    global __cuCtxSetSharedMemConfig
-    __cuCtxSetSharedMemConfig = dlfcn.dlsym(handle, 'cuCtxSetSharedMemConfig')
-    {{endif}}
-    {{if 'cuModuleLoad' in found_functions}}
-    global __cuModuleLoad
-    __cuModuleLoad = dlfcn.dlsym(handle, 'cuModuleLoad')
-    {{endif}}
-    {{if 'cuModuleLoadData' in found_functions}}
-    global __cuModuleLoadData
-    __cuModuleLoadData = dlfcn.dlsym(handle, 'cuModuleLoadData')
-    {{endif}}
-    {{if 'cuModuleLoadDataEx' in found_functions}}
-    global __cuModuleLoadDataEx
-    __cuModuleLoadDataEx = dlfcn.dlsym(handle, 'cuModuleLoadDataEx')
-    {{endif}}
-    {{if 'cuModuleLoadFatBinary' in found_functions}}
-    global __cuModuleLoadFatBinary
-    __cuModuleLoadFatBinary = dlfcn.dlsym(handle, 'cuModuleLoadFatBinary')
-    {{endif}}
-    {{if 'cuModuleUnload' in found_functions}}
-    global __cuModuleUnload
-    __cuModuleUnload = dlfcn.dlsym(handle, 'cuModuleUnload')
-    {{endif}}
-    {{if 'cuModuleGetLoadingMode' in found_functions}}
-    global __cuModuleGetLoadingMode
-    __cuModuleGetLoadingMode = dlfcn.dlsym(handle, 'cuModuleGetLoadingMode')
-    {{endif}}
-    {{if 'cuModuleGetFunction' in found_functions}}
-    global __cuModuleGetFunction
-    __cuModuleGetFunction = dlfcn.dlsym(handle, 'cuModuleGetFunction')
-    {{endif}}
-    {{if 'cuModuleGetFunctionCount' in found_functions}}
-    global __cuModuleGetFunctionCount
-    __cuModuleGetFunctionCount = dlfcn.dlsym(handle, 'cuModuleGetFunctionCount')
-    {{endif}}
-    {{if 'cuModuleEnumerateFunctions' in found_functions}}
-    global __cuModuleEnumerateFunctions
-    __cuModuleEnumerateFunctions = dlfcn.dlsym(handle, 'cuModuleEnumerateFunctions')
-    {{endif}}
-    {{if 'cuModuleGetGlobal_v2' in found_functions}}
-    global __cuModuleGetGlobal_v2
-    __cuModuleGetGlobal_v2 = dlfcn.dlsym(handle, 'cuModuleGetGlobal_v2')
-    {{endif}}
-    {{if 'cuLinkCreate_v2' in found_functions}}
-    global __cuLinkCreate_v2
-    __cuLinkCreate_v2 = dlfcn.dlsym(handle, 'cuLinkCreate_v2')
-    {{endif}}
-    {{if 'cuLinkAddData_v2' in found_functions}}
-    global __cuLinkAddData_v2
-    __cuLinkAddData_v2 = dlfcn.dlsym(handle, 'cuLinkAddData_v2')
-    {{endif}}
-    {{if 'cuLinkAddFile_v2' in found_functions}}
-    global __cuLinkAddFile_v2
-    __cuLinkAddFile_v2 = dlfcn.dlsym(handle, 'cuLinkAddFile_v2')
-    {{endif}}
-    {{if 'cuLinkComplete' in found_functions}}
-    global __cuLinkComplete
-    __cuLinkComplete = dlfcn.dlsym(handle, 'cuLinkComplete')
-    {{endif}}
-    {{if 'cuLinkDestroy' in found_functions}}
-    global __cuLinkDestroy
-    __cuLinkDestroy = dlfcn.dlsym(handle, 'cuLinkDestroy')
-    {{endif}}
-    {{if 'cuModuleGetTexRef' in found_functions}}
-    global __cuModuleGetTexRef
-    __cuModuleGetTexRef = dlfcn.dlsym(handle, 'cuModuleGetTexRef')
-    {{endif}}
-    {{if 'cuModuleGetSurfRef' in found_functions}}
-    global __cuModuleGetSurfRef
-    __cuModuleGetSurfRef = dlfcn.dlsym(handle, 'cuModuleGetSurfRef')
-    {{endif}}
-    {{if 'cuLibraryLoadData' in found_functions}}
-    global __cuLibraryLoadData
-    __cuLibraryLoadData = dlfcn.dlsym(handle, 'cuLibraryLoadData')
-    {{endif}}
-    {{if 'cuLibraryLoadFromFile' in found_functions}}
-    global __cuLibraryLoadFromFile
-    __cuLibraryLoadFromFile = dlfcn.dlsym(handle, 'cuLibraryLoadFromFile')
-    {{endif}}
-    {{if 'cuLibraryUnload' in found_functions}}
-    global __cuLibraryUnload
-    __cuLibraryUnload = dlfcn.dlsym(handle, 'cuLibraryUnload')
-    {{endif}}
-    {{if 'cuLibraryGetKernel' in found_functions}}
-    global __cuLibraryGetKernel
-    __cuLibraryGetKernel = dlfcn.dlsym(handle, 'cuLibraryGetKernel')
-    {{endif}}
-    {{if 'cuLibraryGetKernelCount' in found_functions}}
-    global __cuLibraryGetKernelCount
-    __cuLibraryGetKernelCount = dlfcn.dlsym(handle, 'cuLibraryGetKernelCount')
-    {{endif}}
-    {{if 'cuLibraryEnumerateKernels' in found_functions}}
-    global __cuLibraryEnumerateKernels
-    __cuLibraryEnumerateKernels = dlfcn.dlsym(handle, 'cuLibraryEnumerateKernels')
-    {{endif}}
-    {{if 'cuLibraryGetModule' in found_functions}}
-    global __cuLibraryGetModule
-    __cuLibraryGetModule = dlfcn.dlsym(handle, 'cuLibraryGetModule')
-    {{endif}}
-    {{if 'cuKernelGetFunction' in found_functions}}
-    global __cuKernelGetFunction
-    __cuKernelGetFunction = dlfcn.dlsym(handle, 'cuKernelGetFunction')
-    {{endif}}
-    {{if 'cuKernelGetLibrary' in found_functions}}
-    global __cuKernelGetLibrary
-    __cuKernelGetLibrary = dlfcn.dlsym(handle, 'cuKernelGetLibrary')
-    {{endif}}
-    {{if 'cuLibraryGetGlobal' in found_functions}}
-    global __cuLibraryGetGlobal
-    __cuLibraryGetGlobal = dlfcn.dlsym(handle, 'cuLibraryGetGlobal')
-    {{endif}}
-    {{if 'cuLibraryGetManaged' in found_functions}}
-    global __cuLibraryGetManaged
-    __cuLibraryGetManaged = dlfcn.dlsym(handle, 'cuLibraryGetManaged')
-    {{endif}}
-    {{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-    global __cuLibraryGetUnifiedFunction
-    __cuLibraryGetUnifiedFunction = dlfcn.dlsym(handle, 'cuLibraryGetUnifiedFunction')
-    {{endif}}
-    {{if 'cuKernelGetAttribute' in found_functions}}
-    global __cuKernelGetAttribute
-    __cuKernelGetAttribute = dlfcn.dlsym(handle, 'cuKernelGetAttribute')
-    {{endif}}
-    {{if 'cuKernelSetAttribute' in found_functions}}
-    global __cuKernelSetAttribute
-    __cuKernelSetAttribute = dlfcn.dlsym(handle, 'cuKernelSetAttribute')
-    {{endif}}
-    {{if 'cuKernelSetCacheConfig' in found_functions}}
-    global __cuKernelSetCacheConfig
-    __cuKernelSetCacheConfig = dlfcn.dlsym(handle, 'cuKernelSetCacheConfig')
-    {{endif}}
-    {{if 'cuKernelGetName' in found_functions}}
-    global __cuKernelGetName
-    __cuKernelGetName = dlfcn.dlsym(handle, 'cuKernelGetName')
-    {{endif}}
-    {{if 'cuKernelGetParamInfo' in found_functions}}
-    global __cuKernelGetParamInfo
-    __cuKernelGetParamInfo = dlfcn.dlsym(handle, 'cuKernelGetParamInfo')
-    {{endif}}
-    {{if 'cuMemGetInfo_v2' in found_functions}}
-    global __cuMemGetInfo_v2
-    __cuMemGetInfo_v2 = dlfcn.dlsym(handle, 'cuMemGetInfo_v2')
-    {{endif}}
-    {{if 'cuMemAlloc_v2' in found_functions}}
-    global __cuMemAlloc_v2
-    __cuMemAlloc_v2 = dlfcn.dlsym(handle, 'cuMemAlloc_v2')
-    {{endif}}
-    {{if 'cuMemAllocPitch_v2' in found_functions}}
-    global __cuMemAllocPitch_v2
-    __cuMemAllocPitch_v2 = dlfcn.dlsym(handle, 'cuMemAllocPitch_v2')
-    {{endif}}
-    {{if 'cuMemFree_v2' in found_functions}}
-    global __cuMemFree_v2
-    __cuMemFree_v2 = dlfcn.dlsym(handle, 'cuMemFree_v2')
-    {{endif}}
-    {{if 'cuMemGetAddressRange_v2' in found_functions}}
-    global __cuMemGetAddressRange_v2
-    __cuMemGetAddressRange_v2 = dlfcn.dlsym(handle, 'cuMemGetAddressRange_v2')
-    {{endif}}
-    {{if 'cuMemAllocHost_v2' in found_functions}}
-    global __cuMemAllocHost_v2
-    __cuMemAllocHost_v2 = dlfcn.dlsym(handle, 'cuMemAllocHost_v2')
-    {{endif}}
-    {{if 'cuMemFreeHost' in found_functions}}
-    global __cuMemFreeHost
-    __cuMemFreeHost = dlfcn.dlsym(handle, 'cuMemFreeHost')
-    {{endif}}
-    {{if 'cuMemHostAlloc' in found_functions}}
-    global __cuMemHostAlloc
-    __cuMemHostAlloc = dlfcn.dlsym(handle, 'cuMemHostAlloc')
-    {{endif}}
-    {{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-    global __cuMemHostGetDevicePointer_v2
-    __cuMemHostGetDevicePointer_v2 = dlfcn.dlsym(handle, 'cuMemHostGetDevicePointer_v2')
-    {{endif}}
-    {{if 'cuMemHostGetFlags' in found_functions}}
-    global __cuMemHostGetFlags
-    __cuMemHostGetFlags = dlfcn.dlsym(handle, 'cuMemHostGetFlags')
-    {{endif}}
-    {{if 'cuMemAllocManaged' in found_functions}}
-    global __cuMemAllocManaged
-    __cuMemAllocManaged = dlfcn.dlsym(handle, 'cuMemAllocManaged')
-    {{endif}}
-    {{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-    global __cuDeviceRegisterAsyncNotification
-    __cuDeviceRegisterAsyncNotification = dlfcn.dlsym(handle, 'cuDeviceRegisterAsyncNotification')
-    {{endif}}
-    {{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-    global __cuDeviceUnregisterAsyncNotification
-    __cuDeviceUnregisterAsyncNotification = dlfcn.dlsym(handle, 'cuDeviceUnregisterAsyncNotification')
-    {{endif}}
-    {{if 'cuDeviceGetByPCIBusId' in found_functions}}
-    global __cuDeviceGetByPCIBusId
-    __cuDeviceGetByPCIBusId = dlfcn.dlsym(handle, 'cuDeviceGetByPCIBusId')
-    {{endif}}
-    {{if 'cuDeviceGetPCIBusId' in found_functions}}
-    global __cuDeviceGetPCIBusId
-    __cuDeviceGetPCIBusId = dlfcn.dlsym(handle, 'cuDeviceGetPCIBusId')
-    {{endif}}
-    {{if 'cuIpcGetEventHandle' in found_functions}}
-    global __cuIpcGetEventHandle
-    __cuIpcGetEventHandle = dlfcn.dlsym(handle, 'cuIpcGetEventHandle')
-    {{endif}}
-    {{if 'cuIpcOpenEventHandle' in found_functions}}
-    global __cuIpcOpenEventHandle
-    __cuIpcOpenEventHandle = dlfcn.dlsym(handle, 'cuIpcOpenEventHandle')
-    {{endif}}
-    {{if 'cuIpcGetMemHandle' in found_functions}}
-    global __cuIpcGetMemHandle
-    __cuIpcGetMemHandle = dlfcn.dlsym(handle, 'cuIpcGetMemHandle')
-    {{endif}}
-    {{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-    global __cuIpcOpenMemHandle_v2
-    __cuIpcOpenMemHandle_v2 = dlfcn.dlsym(handle, 'cuIpcOpenMemHandle_v2')
-    {{endif}}
-    {{if 'cuIpcCloseMemHandle' in found_functions}}
-    global __cuIpcCloseMemHandle
-    __cuIpcCloseMemHandle = dlfcn.dlsym(handle, 'cuIpcCloseMemHandle')
-    {{endif}}
-    {{if 'cuMemHostRegister_v2' in found_functions}}
-    global __cuMemHostRegister_v2
-    __cuMemHostRegister_v2 = dlfcn.dlsym(handle, 'cuMemHostRegister_v2')
-    {{endif}}
-    {{if 'cuMemHostUnregister' in found_functions}}
-    global __cuMemHostUnregister
-    __cuMemHostUnregister = dlfcn.dlsym(handle, 'cuMemHostUnregister')
-    {{endif}}
-    {{if 'cuArrayCreate_v2' in found_functions}}
-    global __cuArrayCreate_v2
-    __cuArrayCreate_v2 = dlfcn.dlsym(handle, 'cuArrayCreate_v2')
-    {{endif}}
-    {{if 'cuArrayGetDescriptor_v2' in found_functions}}
-    global __cuArrayGetDescriptor_v2
-    __cuArrayGetDescriptor_v2 = dlfcn.dlsym(handle, 'cuArrayGetDescriptor_v2')
-    {{endif}}
-    {{if 'cuArrayGetSparseProperties' in found_functions}}
-    global __cuArrayGetSparseProperties
-    __cuArrayGetSparseProperties = dlfcn.dlsym(handle, 'cuArrayGetSparseProperties')
-    {{endif}}
-    {{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-    global __cuMipmappedArrayGetSparseProperties
-    __cuMipmappedArrayGetSparseProperties = dlfcn.dlsym(handle, 'cuMipmappedArrayGetSparseProperties')
-    {{endif}}
-    {{if 'cuArrayGetMemoryRequirements' in found_functions}}
-    global __cuArrayGetMemoryRequirements
-    __cuArrayGetMemoryRequirements = dlfcn.dlsym(handle, 'cuArrayGetMemoryRequirements')
-    {{endif}}
-    {{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-    global __cuMipmappedArrayGetMemoryRequirements
-    __cuMipmappedArrayGetMemoryRequirements = dlfcn.dlsym(handle, 'cuMipmappedArrayGetMemoryRequirements')
-    {{endif}}
-    {{if 'cuArrayGetPlane' in found_functions}}
-    global __cuArrayGetPlane
-    __cuArrayGetPlane = dlfcn.dlsym(handle, 'cuArrayGetPlane')
-    {{endif}}
-    {{if 'cuArrayDestroy' in found_functions}}
-    global __cuArrayDestroy
-    __cuArrayDestroy = dlfcn.dlsym(handle, 'cuArrayDestroy')
-    {{endif}}
-    {{if 'cuArray3DCreate_v2' in found_functions}}
-    global __cuArray3DCreate_v2
-    __cuArray3DCreate_v2 = dlfcn.dlsym(handle, 'cuArray3DCreate_v2')
-    {{endif}}
-    {{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-    global __cuArray3DGetDescriptor_v2
-    __cuArray3DGetDescriptor_v2 = dlfcn.dlsym(handle, 'cuArray3DGetDescriptor_v2')
-    {{endif}}
-    {{if 'cuMipmappedArrayCreate' in found_functions}}
-    global __cuMipmappedArrayCreate
-    __cuMipmappedArrayCreate = dlfcn.dlsym(handle, 'cuMipmappedArrayCreate')
-    {{endif}}
-    {{if 'cuMipmappedArrayGetLevel' in found_functions}}
-    global __cuMipmappedArrayGetLevel
-    __cuMipmappedArrayGetLevel = dlfcn.dlsym(handle, 'cuMipmappedArrayGetLevel')
-    {{endif}}
-    {{if 'cuMipmappedArrayDestroy' in found_functions}}
-    global __cuMipmappedArrayDestroy
-    __cuMipmappedArrayDestroy = dlfcn.dlsym(handle, 'cuMipmappedArrayDestroy')
-    {{endif}}
-    {{if 'cuMemGetHandleForAddressRange' in found_functions}}
-    global __cuMemGetHandleForAddressRange
-    __cuMemGetHandleForAddressRange = dlfcn.dlsym(handle, 'cuMemGetHandleForAddressRange')
-    {{endif}}
-    {{if 'cuMemAddressReserve' in found_functions}}
-    global __cuMemAddressReserve
-    __cuMemAddressReserve = dlfcn.dlsym(handle, 'cuMemAddressReserve')
-    {{endif}}
-    {{if 'cuMemAddressFree' in found_functions}}
-    global __cuMemAddressFree
-    __cuMemAddressFree = dlfcn.dlsym(handle, 'cuMemAddressFree')
-    {{endif}}
-    {{if 'cuMemCreate' in found_functions}}
-    global __cuMemCreate
-    __cuMemCreate = dlfcn.dlsym(handle, 'cuMemCreate')
-    {{endif}}
-    {{if 'cuMemRelease' in found_functions}}
-    global __cuMemRelease
-    __cuMemRelease = dlfcn.dlsym(handle, 'cuMemRelease')
-    {{endif}}
-    {{if 'cuMemMap' in found_functions}}
-    global __cuMemMap
-    __cuMemMap = dlfcn.dlsym(handle, 'cuMemMap')
-    {{endif}}
-    {{if 'cuMemUnmap' in found_functions}}
-    global __cuMemUnmap
-    __cuMemUnmap = dlfcn.dlsym(handle, 'cuMemUnmap')
-    {{endif}}
-    {{if 'cuMemSetAccess' in found_functions}}
-    global __cuMemSetAccess
-    __cuMemSetAccess = dlfcn.dlsym(handle, 'cuMemSetAccess')
-    {{endif}}
-    {{if 'cuMemGetAccess' in found_functions}}
-    global __cuMemGetAccess
-    __cuMemGetAccess = dlfcn.dlsym(handle, 'cuMemGetAccess')
-    {{endif}}
-    {{if 'cuMemExportToShareableHandle' in found_functions}}
-    global __cuMemExportToShareableHandle
-    __cuMemExportToShareableHandle = dlfcn.dlsym(handle, 'cuMemExportToShareableHandle')
-    {{endif}}
-    {{if 'cuMemImportFromShareableHandle' in found_functions}}
-    global __cuMemImportFromShareableHandle
-    __cuMemImportFromShareableHandle = dlfcn.dlsym(handle, 'cuMemImportFromShareableHandle')
-    {{endif}}
-    {{if 'cuMemGetAllocationGranularity' in found_functions}}
-    global __cuMemGetAllocationGranularity
-    __cuMemGetAllocationGranularity = dlfcn.dlsym(handle, 'cuMemGetAllocationGranularity')
-    {{endif}}
-    {{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-    global __cuMemGetAllocationPropertiesFromHandle
-    __cuMemGetAllocationPropertiesFromHandle = dlfcn.dlsym(handle, 'cuMemGetAllocationPropertiesFromHandle')
-    {{endif}}
-    {{if 'cuMemRetainAllocationHandle' in found_functions}}
-    global __cuMemRetainAllocationHandle
-    __cuMemRetainAllocationHandle = dlfcn.dlsym(handle, 'cuMemRetainAllocationHandle')
-    {{endif}}
-    {{if 'cuMemPoolTrimTo' in found_functions}}
-    global __cuMemPoolTrimTo
-    __cuMemPoolTrimTo = dlfcn.dlsym(handle, 'cuMemPoolTrimTo')
-    {{endif}}
-    {{if 'cuMemPoolSetAttribute' in found_functions}}
-    global __cuMemPoolSetAttribute
-    __cuMemPoolSetAttribute = dlfcn.dlsym(handle, 'cuMemPoolSetAttribute')
-    {{endif}}
-    {{if 'cuMemPoolGetAttribute' in found_functions}}
-    global __cuMemPoolGetAttribute
-    __cuMemPoolGetAttribute = dlfcn.dlsym(handle, 'cuMemPoolGetAttribute')
-    {{endif}}
-    {{if 'cuMemPoolSetAccess' in found_functions}}
-    global __cuMemPoolSetAccess
-    __cuMemPoolSetAccess = dlfcn.dlsym(handle, 'cuMemPoolSetAccess')
-    {{endif}}
-    {{if 'cuMemPoolGetAccess' in found_functions}}
-    global __cuMemPoolGetAccess
-    __cuMemPoolGetAccess = dlfcn.dlsym(handle, 'cuMemPoolGetAccess')
-    {{endif}}
-    {{if 'cuMemPoolCreate' in found_functions}}
-    global __cuMemPoolCreate
-    __cuMemPoolCreate = dlfcn.dlsym(handle, 'cuMemPoolCreate')
-    {{endif}}
-    {{if 'cuMemPoolDestroy' in found_functions}}
-    global __cuMemPoolDestroy
-    __cuMemPoolDestroy = dlfcn.dlsym(handle, 'cuMemPoolDestroy')
-    {{endif}}
-    {{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-    global __cuMemPoolExportToShareableHandle
-    __cuMemPoolExportToShareableHandle = dlfcn.dlsym(handle, 'cuMemPoolExportToShareableHandle')
-    {{endif}}
-    {{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-    global __cuMemPoolImportFromShareableHandle
-    __cuMemPoolImportFromShareableHandle = dlfcn.dlsym(handle, 'cuMemPoolImportFromShareableHandle')
-    {{endif}}
-    {{if 'cuMemPoolExportPointer' in found_functions}}
-    global __cuMemPoolExportPointer
-    __cuMemPoolExportPointer = dlfcn.dlsym(handle, 'cuMemPoolExportPointer')
-    {{endif}}
-    {{if 'cuMemPoolImportPointer' in found_functions}}
-    global __cuMemPoolImportPointer
-    __cuMemPoolImportPointer = dlfcn.dlsym(handle, 'cuMemPoolImportPointer')
-    {{endif}}
-    {{if 'cuMulticastCreate' in found_functions}}
-    global __cuMulticastCreate
-    __cuMulticastCreate = dlfcn.dlsym(handle, 'cuMulticastCreate')
-    {{endif}}
-    {{if 'cuMulticastAddDevice' in found_functions}}
-    global __cuMulticastAddDevice
-    __cuMulticastAddDevice = dlfcn.dlsym(handle, 'cuMulticastAddDevice')
-    {{endif}}
-    {{if 'cuMulticastBindMem' in found_functions}}
-    global __cuMulticastBindMem
-    __cuMulticastBindMem = dlfcn.dlsym(handle, 'cuMulticastBindMem')
-    {{endif}}
-    {{if 'cuMulticastBindAddr' in found_functions}}
-    global __cuMulticastBindAddr
-    __cuMulticastBindAddr = dlfcn.dlsym(handle, 'cuMulticastBindAddr')
-    {{endif}}
-    {{if 'cuMulticastUnbind' in found_functions}}
-    global __cuMulticastUnbind
-    __cuMulticastUnbind = dlfcn.dlsym(handle, 'cuMulticastUnbind')
-    {{endif}}
-    {{if 'cuMulticastGetGranularity' in found_functions}}
-    global __cuMulticastGetGranularity
-    __cuMulticastGetGranularity = dlfcn.dlsym(handle, 'cuMulticastGetGranularity')
-    {{endif}}
-    {{if 'cuPointerGetAttribute' in found_functions}}
-    global __cuPointerGetAttribute
-    __cuPointerGetAttribute = dlfcn.dlsym(handle, 'cuPointerGetAttribute')
-    {{endif}}
-    {{if 'cuMemAdvise' in found_functions}}
-    global __cuMemAdvise
-    __cuMemAdvise = dlfcn.dlsym(handle, 'cuMemAdvise')
-    {{endif}}
-    {{if 'cuMemAdvise_v2' in found_functions}}
-    global __cuMemAdvise_v2
-    __cuMemAdvise_v2 = dlfcn.dlsym(handle, 'cuMemAdvise_v2')
-    {{endif}}
-    {{if 'cuMemRangeGetAttribute' in found_functions}}
-    global __cuMemRangeGetAttribute
-    __cuMemRangeGetAttribute = dlfcn.dlsym(handle, 'cuMemRangeGetAttribute')
-    {{endif}}
-    {{if 'cuMemRangeGetAttributes' in found_functions}}
-    global __cuMemRangeGetAttributes
-    __cuMemRangeGetAttributes = dlfcn.dlsym(handle, 'cuMemRangeGetAttributes')
-    {{endif}}
-    {{if 'cuPointerSetAttribute' in found_functions}}
-    global __cuPointerSetAttribute
-    __cuPointerSetAttribute = dlfcn.dlsym(handle, 'cuPointerSetAttribute')
-    {{endif}}
-    {{if 'cuPointerGetAttributes' in found_functions}}
-    global __cuPointerGetAttributes
-    __cuPointerGetAttributes = dlfcn.dlsym(handle, 'cuPointerGetAttributes')
-    {{endif}}
-    {{if 'cuStreamCreate' in found_functions}}
-    global __cuStreamCreate
-    __cuStreamCreate = dlfcn.dlsym(handle, 'cuStreamCreate')
-    {{endif}}
-    {{if 'cuStreamCreateWithPriority' in found_functions}}
-    global __cuStreamCreateWithPriority
-    __cuStreamCreateWithPriority = dlfcn.dlsym(handle, 'cuStreamCreateWithPriority')
-    {{endif}}
-    {{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-    global __cuThreadExchangeStreamCaptureMode
-    __cuThreadExchangeStreamCaptureMode = dlfcn.dlsym(handle, 'cuThreadExchangeStreamCaptureMode')
-    {{endif}}
-    {{if 'cuStreamDestroy_v2' in found_functions}}
-    global __cuStreamDestroy_v2
-    __cuStreamDestroy_v2 = dlfcn.dlsym(handle, 'cuStreamDestroy_v2')
-    {{endif}}
-    {{if 'cuEventCreate' in found_functions}}
-    global __cuEventCreate
-    __cuEventCreate = dlfcn.dlsym(handle, 'cuEventCreate')
-    {{endif}}
-    {{if 'cuEventQuery' in found_functions}}
-    global __cuEventQuery
-    __cuEventQuery = dlfcn.dlsym(handle, 'cuEventQuery')
-    {{endif}}
-    {{if 'cuEventSynchronize' in found_functions}}
-    global __cuEventSynchronize
-    __cuEventSynchronize = dlfcn.dlsym(handle, 'cuEventSynchronize')
-    {{endif}}
-    {{if 'cuEventDestroy_v2' in found_functions}}
-    global __cuEventDestroy_v2
-    __cuEventDestroy_v2 = dlfcn.dlsym(handle, 'cuEventDestroy_v2')
-    {{endif}}
-    {{if 'cuEventElapsedTime' in found_functions}}
-    global __cuEventElapsedTime
-    __cuEventElapsedTime = dlfcn.dlsym(handle, 'cuEventElapsedTime')
-    {{endif}}
-    {{if 'cuImportExternalMemory' in found_functions}}
-    global __cuImportExternalMemory
-    __cuImportExternalMemory = dlfcn.dlsym(handle, 'cuImportExternalMemory')
-    {{endif}}
-    {{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-    global __cuExternalMemoryGetMappedBuffer
-    __cuExternalMemoryGetMappedBuffer = dlfcn.dlsym(handle, 'cuExternalMemoryGetMappedBuffer')
-    {{endif}}
-    {{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-    global __cuExternalMemoryGetMappedMipmappedArray
-    __cuExternalMemoryGetMappedMipmappedArray = dlfcn.dlsym(handle, 'cuExternalMemoryGetMappedMipmappedArray')
-    {{endif}}
-    {{if 'cuDestroyExternalMemory' in found_functions}}
-    global __cuDestroyExternalMemory
-    __cuDestroyExternalMemory = dlfcn.dlsym(handle, 'cuDestroyExternalMemory')
-    {{endif}}
-    {{if 'cuImportExternalSemaphore' in found_functions}}
-    global __cuImportExternalSemaphore
-    __cuImportExternalSemaphore = dlfcn.dlsym(handle, 'cuImportExternalSemaphore')
-    {{endif}}
-    {{if 'cuDestroyExternalSemaphore' in found_functions}}
-    global __cuDestroyExternalSemaphore
-    __cuDestroyExternalSemaphore = dlfcn.dlsym(handle, 'cuDestroyExternalSemaphore')
-    {{endif}}
-    {{if 'cuFuncGetAttribute' in found_functions}}
-    global __cuFuncGetAttribute
-    __cuFuncGetAttribute = dlfcn.dlsym(handle, 'cuFuncGetAttribute')
-    {{endif}}
-    {{if 'cuFuncSetAttribute' in found_functions}}
-    global __cuFuncSetAttribute
-    __cuFuncSetAttribute = dlfcn.dlsym(handle, 'cuFuncSetAttribute')
-    {{endif}}
-    {{if 'cuFuncSetCacheConfig' in found_functions}}
-    global __cuFuncSetCacheConfig
-    __cuFuncSetCacheConfig = dlfcn.dlsym(handle, 'cuFuncSetCacheConfig')
-    {{endif}}
-    {{if 'cuFuncGetModule' in found_functions}}
-    global __cuFuncGetModule
-    __cuFuncGetModule = dlfcn.dlsym(handle, 'cuFuncGetModule')
-    {{endif}}
-    {{if 'cuFuncGetName' in found_functions}}
-    global __cuFuncGetName
-    __cuFuncGetName = dlfcn.dlsym(handle, 'cuFuncGetName')
-    {{endif}}
-    {{if 'cuFuncGetParamInfo' in found_functions}}
-    global __cuFuncGetParamInfo
-    __cuFuncGetParamInfo = dlfcn.dlsym(handle, 'cuFuncGetParamInfo')
-    {{endif}}
-    {{if 'cuFuncIsLoaded' in found_functions}}
-    global __cuFuncIsLoaded
-    __cuFuncIsLoaded = dlfcn.dlsym(handle, 'cuFuncIsLoaded')
-    {{endif}}
-    {{if 'cuFuncLoad' in found_functions}}
-    global __cuFuncLoad
-    __cuFuncLoad = dlfcn.dlsym(handle, 'cuFuncLoad')
-    {{endif}}
-    {{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-    global __cuLaunchCooperativeKernelMultiDevice
-    __cuLaunchCooperativeKernelMultiDevice = dlfcn.dlsym(handle, 'cuLaunchCooperativeKernelMultiDevice')
-    {{endif}}
-    {{if 'cuFuncSetBlockShape' in found_functions}}
-    global __cuFuncSetBlockShape
-    __cuFuncSetBlockShape = dlfcn.dlsym(handle, 'cuFuncSetBlockShape')
-    {{endif}}
-    {{if 'cuFuncSetSharedSize' in found_functions}}
-    global __cuFuncSetSharedSize
-    __cuFuncSetSharedSize = dlfcn.dlsym(handle, 'cuFuncSetSharedSize')
-    {{endif}}
-    {{if 'cuParamSetSize' in found_functions}}
-    global __cuParamSetSize
-    __cuParamSetSize = dlfcn.dlsym(handle, 'cuParamSetSize')
-    {{endif}}
-    {{if 'cuParamSeti' in found_functions}}
-    global __cuParamSeti
-    __cuParamSeti = dlfcn.dlsym(handle, 'cuParamSeti')
-    {{endif}}
-    {{if 'cuParamSetf' in found_functions}}
-    global __cuParamSetf
-    __cuParamSetf = dlfcn.dlsym(handle, 'cuParamSetf')
-    {{endif}}
-    {{if 'cuParamSetv' in found_functions}}
-    global __cuParamSetv
-    __cuParamSetv = dlfcn.dlsym(handle, 'cuParamSetv')
-    {{endif}}
-    {{if 'cuLaunch' in found_functions}}
-    global __cuLaunch
-    __cuLaunch = dlfcn.dlsym(handle, 'cuLaunch')
-    {{endif}}
-    {{if 'cuLaunchGrid' in found_functions}}
-    global __cuLaunchGrid
-    __cuLaunchGrid = dlfcn.dlsym(handle, 'cuLaunchGrid')
-    {{endif}}
-    {{if 'cuLaunchGridAsync' in found_functions}}
-    global __cuLaunchGridAsync
-    __cuLaunchGridAsync = dlfcn.dlsym(handle, 'cuLaunchGridAsync')
-    {{endif}}
-    {{if 'cuParamSetTexRef' in found_functions}}
-    global __cuParamSetTexRef
-    __cuParamSetTexRef = dlfcn.dlsym(handle, 'cuParamSetTexRef')
-    {{endif}}
-    {{if 'cuFuncSetSharedMemConfig' in found_functions}}
-    global __cuFuncSetSharedMemConfig
-    __cuFuncSetSharedMemConfig = dlfcn.dlsym(handle, 'cuFuncSetSharedMemConfig')
-    {{endif}}
-    {{if 'cuGraphCreate' in found_functions}}
-    global __cuGraphCreate
-    __cuGraphCreate = dlfcn.dlsym(handle, 'cuGraphCreate')
-    {{endif}}
-    {{if 'cuGraphAddKernelNode_v2' in found_functions}}
-    global __cuGraphAddKernelNode_v2
-    __cuGraphAddKernelNode_v2 = dlfcn.dlsym(handle, 'cuGraphAddKernelNode_v2')
-    {{endif}}
-    {{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-    global __cuGraphKernelNodeGetParams_v2
-    __cuGraphKernelNodeGetParams_v2 = dlfcn.dlsym(handle, 'cuGraphKernelNodeGetParams_v2')
-    {{endif}}
-    {{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-    global __cuGraphKernelNodeSetParams_v2
-    __cuGraphKernelNodeSetParams_v2 = dlfcn.dlsym(handle, 'cuGraphKernelNodeSetParams_v2')
-    {{endif}}
-    {{if 'cuGraphAddMemcpyNode' in found_functions}}
-    global __cuGraphAddMemcpyNode
-    __cuGraphAddMemcpyNode = dlfcn.dlsym(handle, 'cuGraphAddMemcpyNode')
-    {{endif}}
-    {{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-    global __cuGraphMemcpyNodeGetParams
-    __cuGraphMemcpyNodeGetParams = dlfcn.dlsym(handle, 'cuGraphMemcpyNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-    global __cuGraphMemcpyNodeSetParams
-    __cuGraphMemcpyNodeSetParams = dlfcn.dlsym(handle, 'cuGraphMemcpyNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddMemsetNode' in found_functions}}
-    global __cuGraphAddMemsetNode
-    __cuGraphAddMemsetNode = dlfcn.dlsym(handle, 'cuGraphAddMemsetNode')
-    {{endif}}
-    {{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-    global __cuGraphMemsetNodeGetParams
-    __cuGraphMemsetNodeGetParams = dlfcn.dlsym(handle, 'cuGraphMemsetNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-    global __cuGraphMemsetNodeSetParams
-    __cuGraphMemsetNodeSetParams = dlfcn.dlsym(handle, 'cuGraphMemsetNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddHostNode' in found_functions}}
-    global __cuGraphAddHostNode
-    __cuGraphAddHostNode = dlfcn.dlsym(handle, 'cuGraphAddHostNode')
-    {{endif}}
-    {{if 'cuGraphHostNodeGetParams' in found_functions}}
-    global __cuGraphHostNodeGetParams
-    __cuGraphHostNodeGetParams = dlfcn.dlsym(handle, 'cuGraphHostNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphHostNodeSetParams' in found_functions}}
-    global __cuGraphHostNodeSetParams
-    __cuGraphHostNodeSetParams = dlfcn.dlsym(handle, 'cuGraphHostNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddChildGraphNode' in found_functions}}
-    global __cuGraphAddChildGraphNode
-    __cuGraphAddChildGraphNode = dlfcn.dlsym(handle, 'cuGraphAddChildGraphNode')
-    {{endif}}
-    {{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-    global __cuGraphChildGraphNodeGetGraph
-    __cuGraphChildGraphNodeGetGraph = dlfcn.dlsym(handle, 'cuGraphChildGraphNodeGetGraph')
-    {{endif}}
-    {{if 'cuGraphAddEmptyNode' in found_functions}}
-    global __cuGraphAddEmptyNode
-    __cuGraphAddEmptyNode = dlfcn.dlsym(handle, 'cuGraphAddEmptyNode')
-    {{endif}}
-    {{if 'cuGraphAddEventRecordNode' in found_functions}}
-    global __cuGraphAddEventRecordNode
-    __cuGraphAddEventRecordNode = dlfcn.dlsym(handle, 'cuGraphAddEventRecordNode')
-    {{endif}}
-    {{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-    global __cuGraphEventRecordNodeGetEvent
-    __cuGraphEventRecordNodeGetEvent = dlfcn.dlsym(handle, 'cuGraphEventRecordNodeGetEvent')
-    {{endif}}
-    {{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-    global __cuGraphEventRecordNodeSetEvent
-    __cuGraphEventRecordNodeSetEvent = dlfcn.dlsym(handle, 'cuGraphEventRecordNodeSetEvent')
-    {{endif}}
-    {{if 'cuGraphAddEventWaitNode' in found_functions}}
-    global __cuGraphAddEventWaitNode
-    __cuGraphAddEventWaitNode = dlfcn.dlsym(handle, 'cuGraphAddEventWaitNode')
-    {{endif}}
-    {{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-    global __cuGraphEventWaitNodeGetEvent
-    __cuGraphEventWaitNodeGetEvent = dlfcn.dlsym(handle, 'cuGraphEventWaitNodeGetEvent')
-    {{endif}}
-    {{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-    global __cuGraphEventWaitNodeSetEvent
-    __cuGraphEventWaitNodeSetEvent = dlfcn.dlsym(handle, 'cuGraphEventWaitNodeSetEvent')
-    {{endif}}
-    {{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-    global __cuGraphAddExternalSemaphoresSignalNode
-    __cuGraphAddExternalSemaphoresSignalNode = dlfcn.dlsym(handle, 'cuGraphAddExternalSemaphoresSignalNode')
-    {{endif}}
-    {{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-    global __cuGraphExternalSemaphoresSignalNodeGetParams
-    __cuGraphExternalSemaphoresSignalNodeGetParams = dlfcn.dlsym(handle, 'cuGraphExternalSemaphoresSignalNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-    global __cuGraphExternalSemaphoresSignalNodeSetParams
-    __cuGraphExternalSemaphoresSignalNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExternalSemaphoresSignalNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-    global __cuGraphAddExternalSemaphoresWaitNode
-    __cuGraphAddExternalSemaphoresWaitNode = dlfcn.dlsym(handle, 'cuGraphAddExternalSemaphoresWaitNode')
-    {{endif}}
-    {{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-    global __cuGraphExternalSemaphoresWaitNodeGetParams
-    __cuGraphExternalSemaphoresWaitNodeGetParams = dlfcn.dlsym(handle, 'cuGraphExternalSemaphoresWaitNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-    global __cuGraphExternalSemaphoresWaitNodeSetParams
-    __cuGraphExternalSemaphoresWaitNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExternalSemaphoresWaitNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-    global __cuGraphAddBatchMemOpNode
-    __cuGraphAddBatchMemOpNode = dlfcn.dlsym(handle, 'cuGraphAddBatchMemOpNode')
-    {{endif}}
-    {{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-    global __cuGraphBatchMemOpNodeGetParams
-    __cuGraphBatchMemOpNodeGetParams = dlfcn.dlsym(handle, 'cuGraphBatchMemOpNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-    global __cuGraphBatchMemOpNodeSetParams
-    __cuGraphBatchMemOpNodeSetParams = dlfcn.dlsym(handle, 'cuGraphBatchMemOpNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-    global __cuGraphExecBatchMemOpNodeSetParams
-    __cuGraphExecBatchMemOpNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecBatchMemOpNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphAddMemAllocNode' in found_functions}}
-    global __cuGraphAddMemAllocNode
-    __cuGraphAddMemAllocNode = dlfcn.dlsym(handle, 'cuGraphAddMemAllocNode')
-    {{endif}}
-    {{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-    global __cuGraphMemAllocNodeGetParams
-    __cuGraphMemAllocNodeGetParams = dlfcn.dlsym(handle, 'cuGraphMemAllocNodeGetParams')
-    {{endif}}
-    {{if 'cuGraphAddMemFreeNode' in found_functions}}
-    global __cuGraphAddMemFreeNode
-    __cuGraphAddMemFreeNode = dlfcn.dlsym(handle, 'cuGraphAddMemFreeNode')
-    {{endif}}
-    {{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-    global __cuGraphMemFreeNodeGetParams
-    __cuGraphMemFreeNodeGetParams = dlfcn.dlsym(handle, 'cuGraphMemFreeNodeGetParams')
-    {{endif}}
-    {{if 'cuDeviceGraphMemTrim' in found_functions}}
-    global __cuDeviceGraphMemTrim
-    __cuDeviceGraphMemTrim = dlfcn.dlsym(handle, 'cuDeviceGraphMemTrim')
-    {{endif}}
-    {{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-    global __cuDeviceGetGraphMemAttribute
-    __cuDeviceGetGraphMemAttribute = dlfcn.dlsym(handle, 'cuDeviceGetGraphMemAttribute')
-    {{endif}}
-    {{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-    global __cuDeviceSetGraphMemAttribute
-    __cuDeviceSetGraphMemAttribute = dlfcn.dlsym(handle, 'cuDeviceSetGraphMemAttribute')
-    {{endif}}
-    {{if 'cuGraphClone' in found_functions}}
-    global __cuGraphClone
-    __cuGraphClone = dlfcn.dlsym(handle, 'cuGraphClone')
-    {{endif}}
-    {{if 'cuGraphNodeFindInClone' in found_functions}}
-    global __cuGraphNodeFindInClone
-    __cuGraphNodeFindInClone = dlfcn.dlsym(handle, 'cuGraphNodeFindInClone')
-    {{endif}}
-    {{if 'cuGraphNodeGetType' in found_functions}}
-    global __cuGraphNodeGetType
-    __cuGraphNodeGetType = dlfcn.dlsym(handle, 'cuGraphNodeGetType')
-    {{endif}}
-    {{if 'cuGraphGetNodes' in found_functions}}
-    global __cuGraphGetNodes
-    __cuGraphGetNodes = dlfcn.dlsym(handle, 'cuGraphGetNodes')
-    {{endif}}
-    {{if 'cuGraphGetRootNodes' in found_functions}}
-    global __cuGraphGetRootNodes
-    __cuGraphGetRootNodes = dlfcn.dlsym(handle, 'cuGraphGetRootNodes')
-    {{endif}}
-    {{if 'cuGraphGetEdges' in found_functions}}
-    global __cuGraphGetEdges
-    __cuGraphGetEdges = dlfcn.dlsym(handle, 'cuGraphGetEdges')
-    {{endif}}
-    {{if 'cuGraphGetEdges_v2' in found_functions}}
-    global __cuGraphGetEdges_v2
-    __cuGraphGetEdges_v2 = dlfcn.dlsym(handle, 'cuGraphGetEdges_v2')
-    {{endif}}
-    {{if 'cuGraphNodeGetDependencies' in found_functions}}
-    global __cuGraphNodeGetDependencies
-    __cuGraphNodeGetDependencies = dlfcn.dlsym(handle, 'cuGraphNodeGetDependencies')
-    {{endif}}
-    {{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-    global __cuGraphNodeGetDependencies_v2
-    __cuGraphNodeGetDependencies_v2 = dlfcn.dlsym(handle, 'cuGraphNodeGetDependencies_v2')
-    {{endif}}
-    {{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-    global __cuGraphNodeGetDependentNodes
-    __cuGraphNodeGetDependentNodes = dlfcn.dlsym(handle, 'cuGraphNodeGetDependentNodes')
-    {{endif}}
-    {{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-    global __cuGraphNodeGetDependentNodes_v2
-    __cuGraphNodeGetDependentNodes_v2 = dlfcn.dlsym(handle, 'cuGraphNodeGetDependentNodes_v2')
-    {{endif}}
-    {{if 'cuGraphAddDependencies' in found_functions}}
-    global __cuGraphAddDependencies
-    __cuGraphAddDependencies = dlfcn.dlsym(handle, 'cuGraphAddDependencies')
-    {{endif}}
-    {{if 'cuGraphAddDependencies_v2' in found_functions}}
-    global __cuGraphAddDependencies_v2
-    __cuGraphAddDependencies_v2 = dlfcn.dlsym(handle, 'cuGraphAddDependencies_v2')
-    {{endif}}
-    {{if 'cuGraphRemoveDependencies' in found_functions}}
-    global __cuGraphRemoveDependencies
-    __cuGraphRemoveDependencies = dlfcn.dlsym(handle, 'cuGraphRemoveDependencies')
-    {{endif}}
-    {{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-    global __cuGraphRemoveDependencies_v2
-    __cuGraphRemoveDependencies_v2 = dlfcn.dlsym(handle, 'cuGraphRemoveDependencies_v2')
-    {{endif}}
-    {{if 'cuGraphDestroyNode' in found_functions}}
-    global __cuGraphDestroyNode
-    __cuGraphDestroyNode = dlfcn.dlsym(handle, 'cuGraphDestroyNode')
-    {{endif}}
-    {{if 'cuGraphInstantiateWithFlags' in found_functions}}
-    global __cuGraphInstantiateWithFlags
-    __cuGraphInstantiateWithFlags = dlfcn.dlsym(handle, 'cuGraphInstantiateWithFlags')
-    {{endif}}
-    {{if 'cuGraphExecGetFlags' in found_functions}}
-    global __cuGraphExecGetFlags
-    __cuGraphExecGetFlags = dlfcn.dlsym(handle, 'cuGraphExecGetFlags')
-    {{endif}}
-    {{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-    global __cuGraphExecKernelNodeSetParams_v2
-    __cuGraphExecKernelNodeSetParams_v2 = dlfcn.dlsym(handle, 'cuGraphExecKernelNodeSetParams_v2')
-    {{endif}}
-    {{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-    global __cuGraphExecMemcpyNodeSetParams
-    __cuGraphExecMemcpyNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecMemcpyNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-    global __cuGraphExecMemsetNodeSetParams
-    __cuGraphExecMemsetNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecMemsetNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-    global __cuGraphExecHostNodeSetParams
-    __cuGraphExecHostNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecHostNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-    global __cuGraphExecChildGraphNodeSetParams
-    __cuGraphExecChildGraphNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecChildGraphNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-    global __cuGraphExecEventRecordNodeSetEvent
-    __cuGraphExecEventRecordNodeSetEvent = dlfcn.dlsym(handle, 'cuGraphExecEventRecordNodeSetEvent')
-    {{endif}}
-    {{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-    global __cuGraphExecEventWaitNodeSetEvent
-    __cuGraphExecEventWaitNodeSetEvent = dlfcn.dlsym(handle, 'cuGraphExecEventWaitNodeSetEvent')
-    {{endif}}
-    {{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-    global __cuGraphExecExternalSemaphoresSignalNodeSetParams
-    __cuGraphExecExternalSemaphoresSignalNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecExternalSemaphoresSignalNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-    global __cuGraphExecExternalSemaphoresWaitNodeSetParams
-    __cuGraphExecExternalSemaphoresWaitNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecExternalSemaphoresWaitNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphNodeSetEnabled' in found_functions}}
-    global __cuGraphNodeSetEnabled
-    __cuGraphNodeSetEnabled = dlfcn.dlsym(handle, 'cuGraphNodeSetEnabled')
-    {{endif}}
-    {{if 'cuGraphNodeGetEnabled' in found_functions}}
-    global __cuGraphNodeGetEnabled
-    __cuGraphNodeGetEnabled = dlfcn.dlsym(handle, 'cuGraphNodeGetEnabled')
-    {{endif}}
-    {{if 'cuGraphExecDestroy' in found_functions}}
-    global __cuGraphExecDestroy
-    __cuGraphExecDestroy = dlfcn.dlsym(handle, 'cuGraphExecDestroy')
-    {{endif}}
-    {{if 'cuGraphDestroy' in found_functions}}
-    global __cuGraphDestroy
-    __cuGraphDestroy = dlfcn.dlsym(handle, 'cuGraphDestroy')
-    {{endif}}
-    {{if 'cuGraphExecUpdate_v2' in found_functions}}
-    global __cuGraphExecUpdate_v2
-    __cuGraphExecUpdate_v2 = dlfcn.dlsym(handle, 'cuGraphExecUpdate_v2')
-    {{endif}}
-    {{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-    global __cuGraphKernelNodeCopyAttributes
-    __cuGraphKernelNodeCopyAttributes = dlfcn.dlsym(handle, 'cuGraphKernelNodeCopyAttributes')
-    {{endif}}
-    {{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-    global __cuGraphKernelNodeGetAttribute
-    __cuGraphKernelNodeGetAttribute = dlfcn.dlsym(handle, 'cuGraphKernelNodeGetAttribute')
-    {{endif}}
-    {{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-    global __cuGraphKernelNodeSetAttribute
-    __cuGraphKernelNodeSetAttribute = dlfcn.dlsym(handle, 'cuGraphKernelNodeSetAttribute')
-    {{endif}}
-    {{if 'cuGraphDebugDotPrint' in found_functions}}
-    global __cuGraphDebugDotPrint
-    __cuGraphDebugDotPrint = dlfcn.dlsym(handle, 'cuGraphDebugDotPrint')
-    {{endif}}
-    {{if 'cuUserObjectCreate' in found_functions}}
-    global __cuUserObjectCreate
-    __cuUserObjectCreate = dlfcn.dlsym(handle, 'cuUserObjectCreate')
-    {{endif}}
-    {{if 'cuUserObjectRetain' in found_functions}}
-    global __cuUserObjectRetain
-    __cuUserObjectRetain = dlfcn.dlsym(handle, 'cuUserObjectRetain')
-    {{endif}}
-    {{if 'cuUserObjectRelease' in found_functions}}
-    global __cuUserObjectRelease
-    __cuUserObjectRelease = dlfcn.dlsym(handle, 'cuUserObjectRelease')
-    {{endif}}
-    {{if 'cuGraphRetainUserObject' in found_functions}}
-    global __cuGraphRetainUserObject
-    __cuGraphRetainUserObject = dlfcn.dlsym(handle, 'cuGraphRetainUserObject')
-    {{endif}}
-    {{if 'cuGraphReleaseUserObject' in found_functions}}
-    global __cuGraphReleaseUserObject
-    __cuGraphReleaseUserObject = dlfcn.dlsym(handle, 'cuGraphReleaseUserObject')
-    {{endif}}
-    {{if 'cuGraphAddNode' in found_functions}}
-    global __cuGraphAddNode
-    __cuGraphAddNode = dlfcn.dlsym(handle, 'cuGraphAddNode')
-    {{endif}}
-    {{if 'cuGraphAddNode_v2' in found_functions}}
-    global __cuGraphAddNode_v2
-    __cuGraphAddNode_v2 = dlfcn.dlsym(handle, 'cuGraphAddNode_v2')
-    {{endif}}
-    {{if 'cuGraphNodeSetParams' in found_functions}}
-    global __cuGraphNodeSetParams
-    __cuGraphNodeSetParams = dlfcn.dlsym(handle, 'cuGraphNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphExecNodeSetParams' in found_functions}}
-    global __cuGraphExecNodeSetParams
-    __cuGraphExecNodeSetParams = dlfcn.dlsym(handle, 'cuGraphExecNodeSetParams')
-    {{endif}}
-    {{if 'cuGraphConditionalHandleCreate' in found_functions}}
-    global __cuGraphConditionalHandleCreate
-    __cuGraphConditionalHandleCreate = dlfcn.dlsym(handle, 'cuGraphConditionalHandleCreate')
-    {{endif}}
-    {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-    global __cuOccupancyMaxActiveBlocksPerMultiprocessor
-    __cuOccupancyMaxActiveBlocksPerMultiprocessor = dlfcn.dlsym(handle, 'cuOccupancyMaxActiveBlocksPerMultiprocessor')
-    {{endif}}
-    {{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-    global __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-    __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = dlfcn.dlsym(handle, 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags')
-    {{endif}}
-    {{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-    global __cuOccupancyMaxPotentialBlockSize
-    __cuOccupancyMaxPotentialBlockSize = dlfcn.dlsym(handle, 'cuOccupancyMaxPotentialBlockSize')
-    {{endif}}
-    {{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-    global __cuOccupancyMaxPotentialBlockSizeWithFlags
-    __cuOccupancyMaxPotentialBlockSizeWithFlags = dlfcn.dlsym(handle, 'cuOccupancyMaxPotentialBlockSizeWithFlags')
-    {{endif}}
-    {{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-    global __cuOccupancyAvailableDynamicSMemPerBlock
-    __cuOccupancyAvailableDynamicSMemPerBlock = dlfcn.dlsym(handle, 'cuOccupancyAvailableDynamicSMemPerBlock')
-    {{endif}}
-    {{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-    global __cuOccupancyMaxPotentialClusterSize
-    __cuOccupancyMaxPotentialClusterSize = dlfcn.dlsym(handle, 'cuOccupancyMaxPotentialClusterSize')
-    {{endif}}
-    {{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-    global __cuOccupancyMaxActiveClusters
-    __cuOccupancyMaxActiveClusters = dlfcn.dlsym(handle, 'cuOccupancyMaxActiveClusters')
-    {{endif}}
-    {{if 'cuTexRefSetArray' in found_functions}}
-    global __cuTexRefSetArray
-    __cuTexRefSetArray = dlfcn.dlsym(handle, 'cuTexRefSetArray')
-    {{endif}}
-    {{if 'cuTexRefSetMipmappedArray' in found_functions}}
-    global __cuTexRefSetMipmappedArray
-    __cuTexRefSetMipmappedArray = dlfcn.dlsym(handle, 'cuTexRefSetMipmappedArray')
-    {{endif}}
-    {{if 'cuTexRefSetAddress_v2' in found_functions}}
-    global __cuTexRefSetAddress_v2
-    __cuTexRefSetAddress_v2 = dlfcn.dlsym(handle, 'cuTexRefSetAddress_v2')
-    {{endif}}
-    {{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-    global __cuTexRefSetAddress2D_v3
-    __cuTexRefSetAddress2D_v3 = dlfcn.dlsym(handle, 'cuTexRefSetAddress2D_v3')
-    {{endif}}
-    {{if 'cuTexRefSetFormat' in found_functions}}
-    global __cuTexRefSetFormat
-    __cuTexRefSetFormat = dlfcn.dlsym(handle, 'cuTexRefSetFormat')
-    {{endif}}
-    {{if 'cuTexRefSetAddressMode' in found_functions}}
-    global __cuTexRefSetAddressMode
-    __cuTexRefSetAddressMode = dlfcn.dlsym(handle, 'cuTexRefSetAddressMode')
-    {{endif}}
-    {{if 'cuTexRefSetFilterMode' in found_functions}}
-    global __cuTexRefSetFilterMode
-    __cuTexRefSetFilterMode = dlfcn.dlsym(handle, 'cuTexRefSetFilterMode')
-    {{endif}}
-    {{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-    global __cuTexRefSetMipmapFilterMode
-    __cuTexRefSetMipmapFilterMode = dlfcn.dlsym(handle, 'cuTexRefSetMipmapFilterMode')
-    {{endif}}
-    {{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-    global __cuTexRefSetMipmapLevelBias
-    __cuTexRefSetMipmapLevelBias = dlfcn.dlsym(handle, 'cuTexRefSetMipmapLevelBias')
-    {{endif}}
-    {{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-    global __cuTexRefSetMipmapLevelClamp
-    __cuTexRefSetMipmapLevelClamp = dlfcn.dlsym(handle, 'cuTexRefSetMipmapLevelClamp')
-    {{endif}}
-    {{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-    global __cuTexRefSetMaxAnisotropy
-    __cuTexRefSetMaxAnisotropy = dlfcn.dlsym(handle, 'cuTexRefSetMaxAnisotropy')
-    {{endif}}
-    {{if 'cuTexRefSetBorderColor' in found_functions}}
-    global __cuTexRefSetBorderColor
-    __cuTexRefSetBorderColor = dlfcn.dlsym(handle, 'cuTexRefSetBorderColor')
-    {{endif}}
-    {{if 'cuTexRefSetFlags' in found_functions}}
-    global __cuTexRefSetFlags
-    __cuTexRefSetFlags = dlfcn.dlsym(handle, 'cuTexRefSetFlags')
-    {{endif}}
-    {{if 'cuTexRefGetAddress_v2' in found_functions}}
-    global __cuTexRefGetAddress_v2
-    __cuTexRefGetAddress_v2 = dlfcn.dlsym(handle, 'cuTexRefGetAddress_v2')
-    {{endif}}
-    {{if 'cuTexRefGetArray' in found_functions}}
-    global __cuTexRefGetArray
-    __cuTexRefGetArray = dlfcn.dlsym(handle, 'cuTexRefGetArray')
-    {{endif}}
-    {{if 'cuTexRefGetMipmappedArray' in found_functions}}
-    global __cuTexRefGetMipmappedArray
-    __cuTexRefGetMipmappedArray = dlfcn.dlsym(handle, 'cuTexRefGetMipmappedArray')
-    {{endif}}
-    {{if 'cuTexRefGetAddressMode' in found_functions}}
-    global __cuTexRefGetAddressMode
-    __cuTexRefGetAddressMode = dlfcn.dlsym(handle, 'cuTexRefGetAddressMode')
-    {{endif}}
-    {{if 'cuTexRefGetFilterMode' in found_functions}}
-    global __cuTexRefGetFilterMode
-    __cuTexRefGetFilterMode = dlfcn.dlsym(handle, 'cuTexRefGetFilterMode')
-    {{endif}}
-    {{if 'cuTexRefGetFormat' in found_functions}}
-    global __cuTexRefGetFormat
-    __cuTexRefGetFormat = dlfcn.dlsym(handle, 'cuTexRefGetFormat')
-    {{endif}}
-    {{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-    global __cuTexRefGetMipmapFilterMode
-    __cuTexRefGetMipmapFilterMode = dlfcn.dlsym(handle, 'cuTexRefGetMipmapFilterMode')
-    {{endif}}
-    {{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-    global __cuTexRefGetMipmapLevelBias
-    __cuTexRefGetMipmapLevelBias = dlfcn.dlsym(handle, 'cuTexRefGetMipmapLevelBias')
-    {{endif}}
-    {{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-    global __cuTexRefGetMipmapLevelClamp
-    __cuTexRefGetMipmapLevelClamp = dlfcn.dlsym(handle, 'cuTexRefGetMipmapLevelClamp')
-    {{endif}}
-    {{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-    global __cuTexRefGetMaxAnisotropy
-    __cuTexRefGetMaxAnisotropy = dlfcn.dlsym(handle, 'cuTexRefGetMaxAnisotropy')
-    {{endif}}
-    {{if 'cuTexRefGetBorderColor' in found_functions}}
-    global __cuTexRefGetBorderColor
-    __cuTexRefGetBorderColor = dlfcn.dlsym(handle, 'cuTexRefGetBorderColor')
-    {{endif}}
-    {{if 'cuTexRefGetFlags' in found_functions}}
-    global __cuTexRefGetFlags
-    __cuTexRefGetFlags = dlfcn.dlsym(handle, 'cuTexRefGetFlags')
-    {{endif}}
-    {{if 'cuTexRefCreate' in found_functions}}
-    global __cuTexRefCreate
-    __cuTexRefCreate = dlfcn.dlsym(handle, 'cuTexRefCreate')
-    {{endif}}
-    {{if 'cuTexRefDestroy' in found_functions}}
-    global __cuTexRefDestroy
-    __cuTexRefDestroy = dlfcn.dlsym(handle, 'cuTexRefDestroy')
-    {{endif}}
-    {{if 'cuSurfRefSetArray' in found_functions}}
-    global __cuSurfRefSetArray
-    __cuSurfRefSetArray = dlfcn.dlsym(handle, 'cuSurfRefSetArray')
-    {{endif}}
-    {{if 'cuSurfRefGetArray' in found_functions}}
-    global __cuSurfRefGetArray
-    __cuSurfRefGetArray = dlfcn.dlsym(handle, 'cuSurfRefGetArray')
-    {{endif}}
-    {{if 'cuTexObjectCreate' in found_functions}}
-    global __cuTexObjectCreate
-    __cuTexObjectCreate = dlfcn.dlsym(handle, 'cuTexObjectCreate')
-    {{endif}}
-    {{if 'cuTexObjectDestroy' in found_functions}}
-    global __cuTexObjectDestroy
-    __cuTexObjectDestroy = dlfcn.dlsym(handle, 'cuTexObjectDestroy')
-    {{endif}}
-    {{if 'cuTexObjectGetResourceDesc' in found_functions}}
-    global __cuTexObjectGetResourceDesc
-    __cuTexObjectGetResourceDesc = dlfcn.dlsym(handle, 'cuTexObjectGetResourceDesc')
-    {{endif}}
-    {{if 'cuTexObjectGetTextureDesc' in found_functions}}
-    global __cuTexObjectGetTextureDesc
-    __cuTexObjectGetTextureDesc = dlfcn.dlsym(handle, 'cuTexObjectGetTextureDesc')
-    {{endif}}
-    {{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-    global __cuTexObjectGetResourceViewDesc
-    __cuTexObjectGetResourceViewDesc = dlfcn.dlsym(handle, 'cuTexObjectGetResourceViewDesc')
-    {{endif}}
-    {{if 'cuSurfObjectCreate' in found_functions}}
-    global __cuSurfObjectCreate
-    __cuSurfObjectCreate = dlfcn.dlsym(handle, 'cuSurfObjectCreate')
-    {{endif}}
-    {{if 'cuSurfObjectDestroy' in found_functions}}
-    global __cuSurfObjectDestroy
-    __cuSurfObjectDestroy = dlfcn.dlsym(handle, 'cuSurfObjectDestroy')
-    {{endif}}
-    {{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-    global __cuSurfObjectGetResourceDesc
-    __cuSurfObjectGetResourceDesc = dlfcn.dlsym(handle, 'cuSurfObjectGetResourceDesc')
-    {{endif}}
-    {{if 'cuTensorMapEncodeTiled' in found_functions}}
-    global __cuTensorMapEncodeTiled
-    __cuTensorMapEncodeTiled = dlfcn.dlsym(handle, 'cuTensorMapEncodeTiled')
-    {{endif}}
-    {{if 'cuTensorMapEncodeIm2col' in found_functions}}
-    global __cuTensorMapEncodeIm2col
-    __cuTensorMapEncodeIm2col = dlfcn.dlsym(handle, 'cuTensorMapEncodeIm2col')
-    {{endif}}
-    {{if 'cuTensorMapReplaceAddress' in found_functions}}
-    global __cuTensorMapReplaceAddress
-    __cuTensorMapReplaceAddress = dlfcn.dlsym(handle, 'cuTensorMapReplaceAddress')
-    {{endif}}
-    {{if 'cuDeviceCanAccessPeer' in found_functions}}
-    global __cuDeviceCanAccessPeer
-    __cuDeviceCanAccessPeer = dlfcn.dlsym(handle, 'cuDeviceCanAccessPeer')
-    {{endif}}
-    {{if 'cuCtxEnablePeerAccess' in found_functions}}
-    global __cuCtxEnablePeerAccess
-    __cuCtxEnablePeerAccess = dlfcn.dlsym(handle, 'cuCtxEnablePeerAccess')
-    {{endif}}
-    {{if 'cuCtxDisablePeerAccess' in found_functions}}
-    global __cuCtxDisablePeerAccess
-    __cuCtxDisablePeerAccess = dlfcn.dlsym(handle, 'cuCtxDisablePeerAccess')
-    {{endif}}
-    {{if 'cuDeviceGetP2PAttribute' in found_functions}}
-    global __cuDeviceGetP2PAttribute
-    __cuDeviceGetP2PAttribute = dlfcn.dlsym(handle, 'cuDeviceGetP2PAttribute')
-    {{endif}}
-    {{if 'cuGraphicsUnregisterResource' in found_functions}}
-    global __cuGraphicsUnregisterResource
-    __cuGraphicsUnregisterResource = dlfcn.dlsym(handle, 'cuGraphicsUnregisterResource')
-    {{endif}}
-    {{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-    global __cuGraphicsSubResourceGetMappedArray
-    __cuGraphicsSubResourceGetMappedArray = dlfcn.dlsym(handle, 'cuGraphicsSubResourceGetMappedArray')
-    {{endif}}
-    {{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-    global __cuGraphicsResourceGetMappedMipmappedArray
-    __cuGraphicsResourceGetMappedMipmappedArray = dlfcn.dlsym(handle, 'cuGraphicsResourceGetMappedMipmappedArray')
-    {{endif}}
-    {{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-    global __cuGraphicsResourceGetMappedPointer_v2
-    __cuGraphicsResourceGetMappedPointer_v2 = dlfcn.dlsym(handle, 'cuGraphicsResourceGetMappedPointer_v2')
-    {{endif}}
-    {{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-    global __cuGraphicsResourceSetMapFlags_v2
-    __cuGraphicsResourceSetMapFlags_v2 = dlfcn.dlsym(handle, 'cuGraphicsResourceSetMapFlags_v2')
-    {{endif}}
-    {{if 'cuGetProcAddress_v2' in found_functions}}
-    global __cuGetProcAddress_v2
-    __cuGetProcAddress_v2 = dlfcn.dlsym(handle, 'cuGetProcAddress_v2')
-    {{endif}}
-    {{if 'cuCoredumpGetAttribute' in found_functions}}
-    global __cuCoredumpGetAttribute
-    __cuCoredumpGetAttribute = dlfcn.dlsym(handle, 'cuCoredumpGetAttribute')
-    {{endif}}
-    {{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-    global __cuCoredumpGetAttributeGlobal
-    __cuCoredumpGetAttributeGlobal = dlfcn.dlsym(handle, 'cuCoredumpGetAttributeGlobal')
-    {{endif}}
-    {{if 'cuCoredumpSetAttribute' in found_functions}}
-    global __cuCoredumpSetAttribute
-    __cuCoredumpSetAttribute = dlfcn.dlsym(handle, 'cuCoredumpSetAttribute')
-    {{endif}}
-    {{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-    global __cuCoredumpSetAttributeGlobal
-    __cuCoredumpSetAttributeGlobal = dlfcn.dlsym(handle, 'cuCoredumpSetAttributeGlobal')
-    {{endif}}
-    {{if 'cuGetExportTable' in found_functions}}
-    global __cuGetExportTable
-    __cuGetExportTable = dlfcn.dlsym(handle, 'cuGetExportTable')
-    {{endif}}
-    {{if 'cuGreenCtxCreate' in found_functions}}
-    global __cuGreenCtxCreate
-    __cuGreenCtxCreate = dlfcn.dlsym(handle, 'cuGreenCtxCreate')
-    {{endif}}
-    {{if 'cuGreenCtxDestroy' in found_functions}}
-    global __cuGreenCtxDestroy
-    __cuGreenCtxDestroy = dlfcn.dlsym(handle, 'cuGreenCtxDestroy')
-    {{endif}}
-    {{if 'cuCtxFromGreenCtx' in found_functions}}
-    global __cuCtxFromGreenCtx
-    __cuCtxFromGreenCtx = dlfcn.dlsym(handle, 'cuCtxFromGreenCtx')
-    {{endif}}
-    {{if 'cuDeviceGetDevResource' in found_functions}}
-    global __cuDeviceGetDevResource
-    __cuDeviceGetDevResource = dlfcn.dlsym(handle, 'cuDeviceGetDevResource')
-    {{endif}}
-    {{if 'cuCtxGetDevResource' in found_functions}}
-    global __cuCtxGetDevResource
-    __cuCtxGetDevResource = dlfcn.dlsym(handle, 'cuCtxGetDevResource')
-    {{endif}}
-    {{if 'cuGreenCtxGetDevResource' in found_functions}}
-    global __cuGreenCtxGetDevResource
-    __cuGreenCtxGetDevResource = dlfcn.dlsym(handle, 'cuGreenCtxGetDevResource')
-    {{endif}}
-    {{if 'cuDevSmResourceSplitByCount' in found_functions}}
-    global __cuDevSmResourceSplitByCount
-    __cuDevSmResourceSplitByCount = dlfcn.dlsym(handle, 'cuDevSmResourceSplitByCount')
-    {{endif}}
-    {{if 'cuDevResourceGenerateDesc' in found_functions}}
-    global __cuDevResourceGenerateDesc
-    __cuDevResourceGenerateDesc = dlfcn.dlsym(handle, 'cuDevResourceGenerateDesc')
-    {{endif}}
-    {{if 'cuGreenCtxRecordEvent' in found_functions}}
-    global __cuGreenCtxRecordEvent
-    __cuGreenCtxRecordEvent = dlfcn.dlsym(handle, 'cuGreenCtxRecordEvent')
-    {{endif}}
-    {{if 'cuGreenCtxWaitEvent' in found_functions}}
-    global __cuGreenCtxWaitEvent
-    __cuGreenCtxWaitEvent = dlfcn.dlsym(handle, 'cuGreenCtxWaitEvent')
-    {{endif}}
-    {{if 'cuStreamGetGreenCtx' in found_functions}}
-    global __cuStreamGetGreenCtx
-    __cuStreamGetGreenCtx = dlfcn.dlsym(handle, 'cuStreamGetGreenCtx')
-    {{endif}}
-    {{if 'cuGreenCtxStreamCreate' in found_functions}}
-    global __cuGreenCtxStreamCreate
-    __cuGreenCtxStreamCreate = dlfcn.dlsym(handle, 'cuGreenCtxStreamCreate')
-    {{endif}}
-    {{if 'cuProfilerStart' in found_functions}}
-    global __cuProfilerStart
-    __cuProfilerStart = dlfcn.dlsym(handle, 'cuProfilerStart')
-    {{endif}}
-    {{if 'cuProfilerStop' in found_functions}}
-    global __cuProfilerStop
-    __cuProfilerStop = dlfcn.dlsym(handle, 'cuProfilerStop')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsEGLRegisterImage
-    __cuGraphicsEGLRegisterImage = dlfcn.dlsym(handle, 'cuGraphicsEGLRegisterImage')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamConsumerConnect
-    __cuEGLStreamConsumerConnect = dlfcn.dlsym(handle, 'cuEGLStreamConsumerConnect')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamConsumerConnectWithFlags
-    __cuEGLStreamConsumerConnectWithFlags = dlfcn.dlsym(handle, 'cuEGLStreamConsumerConnectWithFlags')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamConsumerDisconnect
-    __cuEGLStreamConsumerDisconnect = dlfcn.dlsym(handle, 'cuEGLStreamConsumerDisconnect')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamConsumerAcquireFrame
-    __cuEGLStreamConsumerAcquireFrame = dlfcn.dlsym(handle, 'cuEGLStreamConsumerAcquireFrame')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamConsumerReleaseFrame
-    __cuEGLStreamConsumerReleaseFrame = dlfcn.dlsym(handle, 'cuEGLStreamConsumerReleaseFrame')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamProducerConnect
-    __cuEGLStreamProducerConnect = dlfcn.dlsym(handle, 'cuEGLStreamProducerConnect')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamProducerDisconnect
-    __cuEGLStreamProducerDisconnect = dlfcn.dlsym(handle, 'cuEGLStreamProducerDisconnect')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamProducerPresentFrame
-    __cuEGLStreamProducerPresentFrame = dlfcn.dlsym(handle, 'cuEGLStreamProducerPresentFrame')
-    {{endif}}
-    {{if True}}
-    global __cuEGLStreamProducerReturnFrame
-    __cuEGLStreamProducerReturnFrame = dlfcn.dlsym(handle, 'cuEGLStreamProducerReturnFrame')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsResourceGetMappedEglFrame
-    __cuGraphicsResourceGetMappedEglFrame = dlfcn.dlsym(handle, 'cuGraphicsResourceGetMappedEglFrame')
-    {{endif}}
-    {{if True}}
-    global __cuEventCreateFromEGLSync
-    __cuEventCreateFromEGLSync = dlfcn.dlsym(handle, 'cuEventCreateFromEGLSync')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsGLRegisterBuffer
-    __cuGraphicsGLRegisterBuffer = dlfcn.dlsym(handle, 'cuGraphicsGLRegisterBuffer')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsGLRegisterImage
-    __cuGraphicsGLRegisterImage = dlfcn.dlsym(handle, 'cuGraphicsGLRegisterImage')
-    {{endif}}
-    {{if True}}
-    global __cuGLGetDevices_v2
-    __cuGLGetDevices_v2 = dlfcn.dlsym(handle, 'cuGLGetDevices_v2')
-    {{endif}}
-    {{if True}}
-    global __cuVDPAUGetDevice
-    __cuVDPAUGetDevice = dlfcn.dlsym(handle, 'cuVDPAUGetDevice')
-    {{endif}}
-    {{if True}}
-    global __cuVDPAUCtxCreate_v2
-    __cuVDPAUCtxCreate_v2 = dlfcn.dlsym(handle, 'cuVDPAUCtxCreate_v2')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsVDPAURegisterVideoSurface
-    __cuGraphicsVDPAURegisterVideoSurface = dlfcn.dlsym(handle, 'cuGraphicsVDPAURegisterVideoSurface')
-    {{endif}}
-    {{if True}}
-    global __cuGraphicsVDPAURegisterOutputSurface
-    __cuGraphicsVDPAURegisterOutputSurface = dlfcn.dlsym(handle, 'cuGraphicsVDPAURegisterOutputSurface')
-    {{endif}}
-    {{endif}}
-
-{{if 'cuGetErrorString' in found_functions}}
-
-cdef CUresult _cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGetErrorString
-    cuPythonInit()
-    if __cuGetErrorString == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGetErrorString" not found')
-    err = (<CUresult (*)(CUresult, const char**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGetErrorString)(error, pStr)
-    return err
-{{endif}}
-
-{{if 'cuGetErrorName' in found_functions}}
-
-cdef CUresult _cuGetErrorName(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGetErrorName
-    cuPythonInit()
-    if __cuGetErrorName == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGetErrorName" not found')
-    err = (<CUresult (*)(CUresult, const char**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGetErrorName)(error, pStr)
-    return err
-{{endif}}
-
-{{if 'cuInit' in found_functions}}
-
-cdef CUresult _cuInit(unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuInit
-    cuPythonInit()
-    if __cuInit == NULL:
-        with gil:
-            raise RuntimeError('Function "cuInit" not found')
-    err = (<CUresult (*)(unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuInit)(Flags)
-    return err
-{{endif}}
-
-{{if 'cuDriverGetVersion' in found_functions}}
-
-cdef CUresult _cuDriverGetVersion(int* driverVersion) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDriverGetVersion
-    cuPythonInit()
-    if __cuDriverGetVersion == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDriverGetVersion" not found')
-    err = (<CUresult (*)(int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDriverGetVersion)(driverVersion)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGet' in found_functions}}
-
-cdef CUresult _cuDeviceGet(CUdevice* device, int ordinal) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGet
-    cuPythonInit()
-    if __cuDeviceGet == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGet" not found')
-    err = (<CUresult (*)(CUdevice*, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGet)(device, ordinal)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetCount' in found_functions}}
-
-cdef CUresult _cuDeviceGetCount(int* count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetCount
-    cuPythonInit()
-    if __cuDeviceGetCount == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetCount" not found')
-    err = (<CUresult (*)(int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetCount)(count)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetName' in found_functions}}
-
-cdef CUresult _cuDeviceGetName(char* name, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetName
-    cuPythonInit()
-    if __cuDeviceGetName == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetName" not found')
-    err = (<CUresult (*)(char*, int, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetName)(name, length, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetUuid' in found_functions}}
-
-cdef CUresult _cuDeviceGetUuid(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetUuid
-    cuPythonInit()
-    if __cuDeviceGetUuid == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetUuid" not found')
-    err = (<CUresult (*)(CUuuid*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetUuid)(uuid, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetUuid_v2' in found_functions}}
-
-cdef CUresult _cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetUuid_v2
-    cuPythonInit()
-    if __cuDeviceGetUuid_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetUuid_v2" not found')
-    err = (<CUresult (*)(CUuuid*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetUuid_v2)(uuid, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetLuid' in found_functions}}
-
-cdef CUresult _cuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetLuid
-    cuPythonInit()
-    if __cuDeviceGetLuid == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetLuid" not found')
-    err = (<CUresult (*)(char*, unsigned int*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetLuid)(luid, deviceNodeMask, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceTotalMem_v2' in found_functions}}
-
-cdef CUresult _cuDeviceTotalMem_v2(size_t* numbytes, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceTotalMem_v2
-    cuPythonInit()
-    if __cuDeviceTotalMem_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceTotalMem_v2" not found')
-    err = (<CUresult (*)(size_t*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceTotalMem_v2)(numbytes, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef CUresult _cuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format pformat, unsigned numChannels, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetTexture1DLinearMaxWidth
-    cuPythonInit()
-    if __cuDeviceGetTexture1DLinearMaxWidth == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetTexture1DLinearMaxWidth" not found')
-    err = (<CUresult (*)(size_t*, CUarray_format, unsigned, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetTexture1DLinearMaxWidth)(maxWidthInElements, pformat, numChannels, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetAttribute
-    cuPythonInit()
-    if __cuDeviceGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetAttribute" not found')
-    err = (<CUresult (*)(int*, CUdevice_attribute, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetAttribute)(pi, attrib, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef CUresult _cuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetNvSciSyncAttributes
-    cuPythonInit()
-    if __cuDeviceGetNvSciSyncAttributes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetNvSciSyncAttributes" not found')
-    err = (<CUresult (*)(void*, CUdevice, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetNvSciSyncAttributes)(nvSciSyncAttrList, dev, flags)
-    return err
-{{endif}}
-
-{{if 'cuDeviceSetMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceSetMemPool
-    cuPythonInit()
-    if __cuDeviceSetMemPool == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceSetMemPool" not found')
-    err = (<CUresult (*)(CUdevice, CUmemoryPool) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceSetMemPool)(dev, pool)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetMemPool
-    cuPythonInit()
-    if __cuDeviceGetMemPool == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetMemPool" not found')
-    err = (<CUresult (*)(CUmemoryPool*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetMemPool)(pool, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-
-cdef CUresult _cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetDefaultMemPool
-    cuPythonInit()
-    if __cuDeviceGetDefaultMemPool == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetDefaultMemPool" not found')
-    err = (<CUresult (*)(CUmemoryPool*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetDefaultMemPool)(pool_out, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-
-cdef CUresult _cuDeviceGetExecAffinitySupport(int* pi, CUexecAffinityType typename, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetExecAffinitySupport
-    cuPythonInit()
-    if __cuDeviceGetExecAffinitySupport == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetExecAffinitySupport" not found')
-    err = (<CUresult (*)(int*, CUexecAffinityType, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetExecAffinitySupport)(pi, typename, dev)
-    return err
-{{endif}}
-
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef CUresult _cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFlushGPUDirectRDMAWrites
-    cuPythonInit()
-    if __cuFlushGPUDirectRDMAWrites == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFlushGPUDirectRDMAWrites" not found')
-    err = (<CUresult (*)(CUflushGPUDirectRDMAWritesTarget, CUflushGPUDirectRDMAWritesScope) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFlushGPUDirectRDMAWrites)(target, scope)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetProperties' in found_functions}}
-
-cdef CUresult _cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetProperties
-    cuPythonInit()
-    if __cuDeviceGetProperties == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetProperties" not found')
-    err = (<CUresult (*)(CUdevprop*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetProperties)(prop, dev)
-    return err
-{{endif}}
-
-{{if 'cuDeviceComputeCapability' in found_functions}}
-
-cdef CUresult _cuDeviceComputeCapability(int* major, int* minor, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceComputeCapability
-    cuPythonInit()
-    if __cuDeviceComputeCapability == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceComputeCapability" not found')
-    err = (<CUresult (*)(int*, int*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceComputeCapability)(major, minor, dev)
-    return err
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevicePrimaryCtxRetain
-    cuPythonInit()
-    if __cuDevicePrimaryCtxRetain == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevicePrimaryCtxRetain" not found')
-    err = (<CUresult (*)(CUcontext*, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevicePrimaryCtxRetain)(pctx, dev)
-    return err
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxRelease_v2(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevicePrimaryCtxRelease_v2
-    cuPythonInit()
-    if __cuDevicePrimaryCtxRelease_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevicePrimaryCtxRelease_v2" not found')
-    err = (<CUresult (*)(CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevicePrimaryCtxRelease_v2)(dev)
-    return err
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevicePrimaryCtxSetFlags_v2
-    cuPythonInit()
-    if __cuDevicePrimaryCtxSetFlags_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevicePrimaryCtxSetFlags_v2" not found')
-    err = (<CUresult (*)(CUdevice, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevicePrimaryCtxSetFlags_v2)(dev, flags)
-    return err
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevicePrimaryCtxGetState
-    cuPythonInit()
-    if __cuDevicePrimaryCtxGetState == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevicePrimaryCtxGetState" not found')
-    err = (<CUresult (*)(CUdevice, unsigned int*, int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevicePrimaryCtxGetState)(dev, flags, active)
-    return err
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-
-cdef CUresult _cuDevicePrimaryCtxReset_v2(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevicePrimaryCtxReset_v2
-    cuPythonInit()
-    if __cuDevicePrimaryCtxReset_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevicePrimaryCtxReset_v2" not found')
-    err = (<CUresult (*)(CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevicePrimaryCtxReset_v2)(dev)
-    return err
-{{endif}}
-
-{{if 'cuCtxCreate_v2' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxCreate_v2
-    cuPythonInit()
-    if __cuCtxCreate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxCreate_v2" not found')
-    err = (<CUresult (*)(CUcontext*, unsigned int, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxCreate_v2)(pctx, flags, dev)
-    return err
-{{endif}}
-
-{{if 'cuCtxCreate_v3' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxCreate_v3
-    cuPythonInit()
-    if __cuCtxCreate_v3 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxCreate_v3" not found')
-    err = (<CUresult (*)(CUcontext*, CUexecAffinityParam*, int, unsigned int, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxCreate_v3)(pctx, paramsArray, numParams, flags, dev)
-    return err
-{{endif}}
-
-{{if 'cuCtxCreate_v4' in found_functions}}
-
-cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxCreate_v4
-    cuPythonInit()
-    if __cuCtxCreate_v4 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxCreate_v4" not found')
-    err = (<CUresult (*)(CUcontext*, CUctxCreateParams*, unsigned int, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxCreate_v4)(pctx, ctxCreateParams, flags, dev)
-    return err
-{{endif}}
-
-{{if 'cuCtxDestroy_v2' in found_functions}}
-
-cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxDestroy_v2
-    cuPythonInit()
-    if __cuCtxDestroy_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxDestroy_v2" not found')
-    err = (<CUresult (*)(CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxDestroy_v2)(ctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxPushCurrent_v2' in found_functions}}
-
-cdef CUresult _cuCtxPushCurrent_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxPushCurrent_v2
-    cuPythonInit()
-    if __cuCtxPushCurrent_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxPushCurrent_v2" not found')
-    err = (<CUresult (*)(CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxPushCurrent_v2)(ctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxPopCurrent_v2' in found_functions}}
-
-cdef CUresult _cuCtxPopCurrent_v2(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxPopCurrent_v2
-    cuPythonInit()
-    if __cuCtxPopCurrent_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxPopCurrent_v2" not found')
-    err = (<CUresult (*)(CUcontext*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxPopCurrent_v2)(pctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxSetCurrent' in found_functions}}
-
-cdef CUresult _cuCtxSetCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSetCurrent
-    cuPythonInit()
-    if __cuCtxSetCurrent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSetCurrent" not found')
-    err = (<CUresult (*)(CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSetCurrent)(ctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetCurrent' in found_functions}}
-
-cdef CUresult _cuCtxGetCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetCurrent
-    cuPythonInit()
-    if __cuCtxGetCurrent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetCurrent" not found')
-    err = (<CUresult (*)(CUcontext*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetCurrent)(pctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetDevice' in found_functions}}
-
-cdef CUresult _cuCtxGetDevice(CUdevice* device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetDevice
-    cuPythonInit()
-    if __cuCtxGetDevice == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetDevice" not found')
-    err = (<CUresult (*)(CUdevice*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetDevice)(device)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetFlags' in found_functions}}
-
-cdef CUresult _cuCtxGetFlags(unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetFlags
-    cuPythonInit()
-    if __cuCtxGetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetFlags" not found')
-    err = (<CUresult (*)(unsigned int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetFlags)(flags)
-    return err
-{{endif}}
-
-{{if 'cuCtxSetFlags' in found_functions}}
-
-cdef CUresult _cuCtxSetFlags(unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSetFlags
-    cuPythonInit()
-    if __cuCtxSetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSetFlags" not found')
-    err = (<CUresult (*)(unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSetFlags)(flags)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetId' in found_functions}}
-
-cdef CUresult _cuCtxGetId(CUcontext ctx, unsigned long long* ctxId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetId
-    cuPythonInit()
-    if __cuCtxGetId == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetId" not found')
-    err = (<CUresult (*)(CUcontext, unsigned long long*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetId)(ctx, ctxId)
-    return err
-{{endif}}
-
-{{if 'cuCtxSynchronize' in found_functions}}
-
-cdef CUresult _cuCtxSynchronize() except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSynchronize
-    cuPythonInit()
-    if __cuCtxSynchronize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSynchronize" not found')
-    err = (<CUresult (*)() except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSynchronize)()
-    return err
-{{endif}}
-
-{{if 'cuCtxSetLimit' in found_functions}}
-
-cdef CUresult _cuCtxSetLimit(CUlimit limit, size_t value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSetLimit
-    cuPythonInit()
-    if __cuCtxSetLimit == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSetLimit" not found')
-    err = (<CUresult (*)(CUlimit, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSetLimit)(limit, value)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetLimit' in found_functions}}
-
-cdef CUresult _cuCtxGetLimit(size_t* pvalue, CUlimit limit) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetLimit
-    cuPythonInit()
-    if __cuCtxGetLimit == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetLimit" not found')
-    err = (<CUresult (*)(size_t*, CUlimit) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetLimit)(pvalue, limit)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetCacheConfig' in found_functions}}
-
-cdef CUresult _cuCtxGetCacheConfig(CUfunc_cache* pconfig) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetCacheConfig
-    cuPythonInit()
-    if __cuCtxGetCacheConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetCacheConfig" not found')
-    err = (<CUresult (*)(CUfunc_cache*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetCacheConfig)(pconfig)
-    return err
-{{endif}}
-
-{{if 'cuCtxSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuCtxSetCacheConfig(CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSetCacheConfig
-    cuPythonInit()
-    if __cuCtxSetCacheConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSetCacheConfig" not found')
-    err = (<CUresult (*)(CUfunc_cache) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSetCacheConfig)(config)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetApiVersion' in found_functions}}
-
-cdef CUresult _cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetApiVersion
-    cuPythonInit()
-    if __cuCtxGetApiVersion == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetApiVersion" not found')
-    err = (<CUresult (*)(CUcontext, unsigned int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetApiVersion)(ctx, version)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-
-cdef CUresult _cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetStreamPriorityRange
-    cuPythonInit()
-    if __cuCtxGetStreamPriorityRange == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetStreamPriorityRange" not found')
-    err = (<CUresult (*)(int*, int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetStreamPriorityRange)(leastPriority, greatestPriority)
-    return err
-{{endif}}
-
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-
-cdef CUresult _cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxResetPersistingL2Cache
-    cuPythonInit()
-    if __cuCtxResetPersistingL2Cache == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxResetPersistingL2Cache" not found')
-    err = (<CUresult (*)() except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxResetPersistingL2Cache)()
-    return err
-{{endif}}
-
-{{if 'cuCtxGetExecAffinity' in found_functions}}
-
-cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetExecAffinity
-    cuPythonInit()
-    if __cuCtxGetExecAffinity == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetExecAffinity" not found')
-    err = (<CUresult (*)(CUexecAffinityParam*, CUexecAffinityType) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetExecAffinity)(pExecAffinity, typename)
-    return err
-{{endif}}
-
-{{if 'cuCtxRecordEvent' in found_functions}}
-
-cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxRecordEvent
-    cuPythonInit()
-    if __cuCtxRecordEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxRecordEvent" not found')
-    err = (<CUresult (*)(CUcontext, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxRecordEvent)(hCtx, hEvent)
-    return err
-{{endif}}
-
-{{if 'cuCtxWaitEvent' in found_functions}}
-
-cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxWaitEvent
-    cuPythonInit()
-    if __cuCtxWaitEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxWaitEvent" not found')
-    err = (<CUresult (*)(CUcontext, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxWaitEvent)(hCtx, hEvent)
-    return err
-{{endif}}
-
-{{if 'cuCtxAttach' in found_functions}}
-
-cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxAttach
-    cuPythonInit()
-    if __cuCtxAttach == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxAttach" not found')
-    err = (<CUresult (*)(CUcontext*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxAttach)(pctx, flags)
-    return err
-{{endif}}
-
-{{if 'cuCtxDetach' in found_functions}}
-
-cdef CUresult _cuCtxDetach(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxDetach
-    cuPythonInit()
-    if __cuCtxDetach == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxDetach" not found')
-    err = (<CUresult (*)(CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxDetach)(ctx)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetSharedMemConfig
-    cuPythonInit()
-    if __cuCtxGetSharedMemConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetSharedMemConfig" not found')
-    err = (<CUresult (*)(CUsharedconfig*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetSharedMemConfig)(pConfig)
-    return err
-{{endif}}
-
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuCtxSetSharedMemConfig(CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxSetSharedMemConfig
-    cuPythonInit()
-    if __cuCtxSetSharedMemConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxSetSharedMemConfig" not found')
-    err = (<CUresult (*)(CUsharedconfig) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxSetSharedMemConfig)(config)
-    return err
-{{endif}}
-
-{{if 'cuModuleLoad' in found_functions}}
-
-cdef CUresult _cuModuleLoad(CUmodule* module, const char* fname) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleLoad
-    cuPythonInit()
-    if __cuModuleLoad == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleLoad" not found')
-    err = (<CUresult (*)(CUmodule*, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleLoad)(module, fname)
-    return err
-{{endif}}
-
-{{if 'cuModuleLoadData' in found_functions}}
-
-cdef CUresult _cuModuleLoadData(CUmodule* module, const void* image) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleLoadData
-    cuPythonInit()
-    if __cuModuleLoadData == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleLoadData" not found')
-    err = (<CUresult (*)(CUmodule*, const void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleLoadData)(module, image)
-    return err
-{{endif}}
-
-{{if 'cuModuleLoadDataEx' in found_functions}}
-
-cdef CUresult _cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleLoadDataEx
-    cuPythonInit()
-    if __cuModuleLoadDataEx == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleLoadDataEx" not found')
-    err = (<CUresult (*)(CUmodule*, const void*, unsigned int, CUjit_option*, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleLoadDataEx)(module, image, numOptions, options, optionValues)
-    return err
-{{endif}}
-
-{{if 'cuModuleLoadFatBinary' in found_functions}}
-
-cdef CUresult _cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleLoadFatBinary
-    cuPythonInit()
-    if __cuModuleLoadFatBinary == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleLoadFatBinary" not found')
-    err = (<CUresult (*)(CUmodule*, const void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleLoadFatBinary)(module, fatCubin)
-    return err
-{{endif}}
-
-{{if 'cuModuleUnload' in found_functions}}
-
-cdef CUresult _cuModuleUnload(CUmodule hmod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleUnload
-    cuPythonInit()
-    if __cuModuleUnload == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleUnload" not found')
-    err = (<CUresult (*)(CUmodule) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleUnload)(hmod)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetLoadingMode' in found_functions}}
-
-cdef CUresult _cuModuleGetLoadingMode(CUmoduleLoadingMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetLoadingMode
-    cuPythonInit()
-    if __cuModuleGetLoadingMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetLoadingMode" not found')
-    err = (<CUresult (*)(CUmoduleLoadingMode*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetLoadingMode)(mode)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetFunction' in found_functions}}
-
-cdef CUresult _cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetFunction
-    cuPythonInit()
-    if __cuModuleGetFunction == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetFunction" not found')
-    err = (<CUresult (*)(CUfunction*, CUmodule, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetFunction)(hfunc, hmod, name)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetFunctionCount' in found_functions}}
-
-cdef CUresult _cuModuleGetFunctionCount(unsigned int* count, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetFunctionCount
-    cuPythonInit()
-    if __cuModuleGetFunctionCount == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetFunctionCount" not found')
-    err = (<CUresult (*)(unsigned int*, CUmodule) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetFunctionCount)(count, mod)
-    return err
-{{endif}}
-
-{{if 'cuModuleEnumerateFunctions' in found_functions}}
-
-cdef CUresult _cuModuleEnumerateFunctions(CUfunction* functions, unsigned int numFunctions, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleEnumerateFunctions
-    cuPythonInit()
-    if __cuModuleEnumerateFunctions == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleEnumerateFunctions" not found')
-    err = (<CUresult (*)(CUfunction*, unsigned int, CUmodule) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleEnumerateFunctions)(functions, numFunctions, mod)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetGlobal_v2' in found_functions}}
-
-cdef CUresult _cuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* numbytes, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetGlobal_v2
-    cuPythonInit()
-    if __cuModuleGetGlobal_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetGlobal_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, CUmodule, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetGlobal_v2)(dptr, numbytes, hmod, name)
-    return err
-{{endif}}
-
-{{if 'cuLinkCreate_v2' in found_functions}}
-
-cdef CUresult _cuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLinkCreate_v2
-    cuPythonInit()
-    if __cuLinkCreate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLinkCreate_v2" not found')
-    err = (<CUresult (*)(unsigned int, CUjit_option*, void**, CUlinkState*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLinkCreate_v2)(numOptions, options, optionValues, stateOut)
-    return err
-{{endif}}
-
-{{if 'cuLinkAddData_v2' in found_functions}}
-
-cdef CUresult _cuLinkAddData_v2(CUlinkState state, CUjitInputType typename, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLinkAddData_v2
-    cuPythonInit()
-    if __cuLinkAddData_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLinkAddData_v2" not found')
-    err = (<CUresult (*)(CUlinkState, CUjitInputType, void*, size_t, const char*, unsigned int, CUjit_option*, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLinkAddData_v2)(state, typename, data, size, name, numOptions, options, optionValues)
-    return err
-{{endif}}
-
-{{if 'cuLinkAddFile_v2' in found_functions}}
-
-cdef CUresult _cuLinkAddFile_v2(CUlinkState state, CUjitInputType typename, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLinkAddFile_v2
-    cuPythonInit()
-    if __cuLinkAddFile_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLinkAddFile_v2" not found')
-    err = (<CUresult (*)(CUlinkState, CUjitInputType, const char*, unsigned int, CUjit_option*, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLinkAddFile_v2)(state, typename, path, numOptions, options, optionValues)
-    return err
-{{endif}}
-
-{{if 'cuLinkComplete' in found_functions}}
-
-cdef CUresult _cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLinkComplete
-    cuPythonInit()
-    if __cuLinkComplete == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLinkComplete" not found')
-    err = (<CUresult (*)(CUlinkState, void**, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLinkComplete)(state, cubinOut, sizeOut)
-    return err
-{{endif}}
-
-{{if 'cuLinkDestroy' in found_functions}}
-
-cdef CUresult _cuLinkDestroy(CUlinkState state) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLinkDestroy
-    cuPythonInit()
-    if __cuLinkDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLinkDestroy" not found')
-    err = (<CUresult (*)(CUlinkState) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLinkDestroy)(state)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetTexRef' in found_functions}}
-
-cdef CUresult _cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetTexRef
-    cuPythonInit()
-    if __cuModuleGetTexRef == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetTexRef" not found')
-    err = (<CUresult (*)(CUtexref*, CUmodule, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetTexRef)(pTexRef, hmod, name)
-    return err
-{{endif}}
-
-{{if 'cuModuleGetSurfRef' in found_functions}}
-
-cdef CUresult _cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuModuleGetSurfRef
-    cuPythonInit()
-    if __cuModuleGetSurfRef == NULL:
-        with gil:
-            raise RuntimeError('Function "cuModuleGetSurfRef" not found')
-    err = (<CUresult (*)(CUsurfref*, CUmodule, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuModuleGetSurfRef)(pSurfRef, hmod, name)
-    return err
-{{endif}}
-
-{{if 'cuLibraryLoadData' in found_functions}}
-
-cdef CUresult _cuLibraryLoadData(CUlibrary* library, const void* code, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryLoadData
-    cuPythonInit()
-    if __cuLibraryLoadData == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryLoadData" not found')
-    err = (<CUresult (*)(CUlibrary*, const void*, CUjit_option*, void**, unsigned int, CUlibraryOption*, void**, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryLoadData)(library, code, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions)
-    return err
-{{endif}}
-
-{{if 'cuLibraryLoadFromFile' in found_functions}}
-
-cdef CUresult _cuLibraryLoadFromFile(CUlibrary* library, const char* fileName, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryLoadFromFile
-    cuPythonInit()
-    if __cuLibraryLoadFromFile == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryLoadFromFile" not found')
-    err = (<CUresult (*)(CUlibrary*, const char*, CUjit_option*, void**, unsigned int, CUlibraryOption*, void**, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryLoadFromFile)(library, fileName, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions)
-    return err
-{{endif}}
-
-{{if 'cuLibraryUnload' in found_functions}}
-
-cdef CUresult _cuLibraryUnload(CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryUnload
-    cuPythonInit()
-    if __cuLibraryUnload == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryUnload" not found')
-    err = (<CUresult (*)(CUlibrary) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryUnload)(library)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetKernel' in found_functions}}
-
-cdef CUresult _cuLibraryGetKernel(CUkernel* pKernel, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetKernel
-    cuPythonInit()
-    if __cuLibraryGetKernel == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetKernel" not found')
-    err = (<CUresult (*)(CUkernel*, CUlibrary, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetKernel)(pKernel, library, name)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetKernelCount' in found_functions}}
-
-cdef CUresult _cuLibraryGetKernelCount(unsigned int* count, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetKernelCount
-    cuPythonInit()
-    if __cuLibraryGetKernelCount == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetKernelCount" not found')
-    err = (<CUresult (*)(unsigned int*, CUlibrary) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetKernelCount)(count, lib)
-    return err
-{{endif}}
-
-{{if 'cuLibraryEnumerateKernels' in found_functions}}
-
-cdef CUresult _cuLibraryEnumerateKernels(CUkernel* kernels, unsigned int numKernels, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryEnumerateKernels
-    cuPythonInit()
-    if __cuLibraryEnumerateKernels == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryEnumerateKernels" not found')
-    err = (<CUresult (*)(CUkernel*, unsigned int, CUlibrary) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryEnumerateKernels)(kernels, numKernels, lib)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetModule' in found_functions}}
-
-cdef CUresult _cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetModule
-    cuPythonInit()
-    if __cuLibraryGetModule == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetModule" not found')
-    err = (<CUresult (*)(CUmodule*, CUlibrary) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetModule)(pMod, library)
-    return err
-{{endif}}
-
-{{if 'cuKernelGetFunction' in found_functions}}
-
-cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelGetFunction
-    cuPythonInit()
-    if __cuKernelGetFunction == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelGetFunction" not found')
-    err = (<CUresult (*)(CUfunction*, CUkernel) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelGetFunction)(pFunc, kernel)
-    return err
-{{endif}}
-
-{{if 'cuKernelGetLibrary' in found_functions}}
-
-cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelGetLibrary
-    cuPythonInit()
-    if __cuKernelGetLibrary == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelGetLibrary" not found')
-    err = (<CUresult (*)(CUlibrary*, CUkernel) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelGetLibrary)(pLib, kernel)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetGlobal' in found_functions}}
-
-cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetGlobal
-    cuPythonInit()
-    if __cuLibraryGetGlobal == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetGlobal" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, CUlibrary, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetGlobal)(dptr, numbytes, library, name)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetManaged' in found_functions}}
-
-cdef CUresult _cuLibraryGetManaged(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetManaged
-    cuPythonInit()
-    if __cuLibraryGetManaged == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetManaged" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, CUlibrary, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetManaged)(dptr, numbytes, library, name)
-    return err
-{{endif}}
-
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-
-cdef CUresult _cuLibraryGetUnifiedFunction(void** fptr, CUlibrary library, const char* symbol) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLibraryGetUnifiedFunction
-    cuPythonInit()
-    if __cuLibraryGetUnifiedFunction == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLibraryGetUnifiedFunction" not found')
-    err = (<CUresult (*)(void**, CUlibrary, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLibraryGetUnifiedFunction)(fptr, library, symbol)
-    return err
-{{endif}}
-
-{{if 'cuKernelGetAttribute' in found_functions}}
-
-cdef CUresult _cuKernelGetAttribute(int* pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelGetAttribute
-    cuPythonInit()
-    if __cuKernelGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelGetAttribute" not found')
-    err = (<CUresult (*)(int*, CUfunction_attribute, CUkernel, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelGetAttribute)(pi, attrib, kernel, dev)
-    return err
-{{endif}}
-
-{{if 'cuKernelSetAttribute' in found_functions}}
-
-cdef CUresult _cuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelSetAttribute
-    cuPythonInit()
-    if __cuKernelSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelSetAttribute" not found')
-    err = (<CUresult (*)(CUfunction_attribute, int, CUkernel, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelSetAttribute)(attrib, val, kernel, dev)
-    return err
-{{endif}}
-
-{{if 'cuKernelSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelSetCacheConfig
-    cuPythonInit()
-    if __cuKernelSetCacheConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelSetCacheConfig" not found')
-    err = (<CUresult (*)(CUkernel, CUfunc_cache, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelSetCacheConfig)(kernel, config, dev)
-    return err
-{{endif}}
-
-{{if 'cuKernelGetName' in found_functions}}
-
-cdef CUresult _cuKernelGetName(const char** name, CUkernel hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelGetName
-    cuPythonInit()
-    if __cuKernelGetName == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelGetName" not found')
-    err = (<CUresult (*)(const char**, CUkernel) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelGetName)(name, hfunc)
-    return err
-{{endif}}
-
-{{if 'cuKernelGetParamInfo' in found_functions}}
-
-cdef CUresult _cuKernelGetParamInfo(CUkernel kernel, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuKernelGetParamInfo
-    cuPythonInit()
-    if __cuKernelGetParamInfo == NULL:
-        with gil:
-            raise RuntimeError('Function "cuKernelGetParamInfo" not found')
-    err = (<CUresult (*)(CUkernel, size_t, size_t*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuKernelGetParamInfo)(kernel, paramIndex, paramOffset, paramSize)
-    return err
-{{endif}}
-
-{{if 'cuMemGetInfo_v2' in found_functions}}
-
-cdef CUresult _cuMemGetInfo_v2(size_t* free, size_t* total) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetInfo_v2
-    cuPythonInit()
-    if __cuMemGetInfo_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetInfo_v2" not found')
-    err = (<CUresult (*)(size_t*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetInfo_v2)(free, total)
-    return err
-{{endif}}
-
-{{if 'cuMemAlloc_v2' in found_functions}}
-
-cdef CUresult _cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAlloc_v2
-    cuPythonInit()
-    if __cuMemAlloc_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAlloc_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAlloc_v2)(dptr, bytesize)
-    return err
-{{endif}}
-
-{{if 'cuMemAllocPitch_v2' in found_functions}}
-
-cdef CUresult _cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAllocPitch_v2
-    cuPythonInit()
-    if __cuMemAllocPitch_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAllocPitch_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, size_t, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAllocPitch_v2)(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes)
-    return err
-{{endif}}
-
-{{if 'cuMemFree_v2' in found_functions}}
-
-cdef CUresult _cuMemFree_v2(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemFree_v2
-    cuPythonInit()
-    if __cuMemFree_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemFree_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemFree_v2)(dptr)
-    return err
-{{endif}}
-
-{{if 'cuMemGetAddressRange_v2' in found_functions}}
-
-cdef CUresult _cuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetAddressRange_v2
-    cuPythonInit()
-    if __cuMemGetAddressRange_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetAddressRange_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetAddressRange_v2)(pbase, psize, dptr)
-    return err
-{{endif}}
-
-{{if 'cuMemAllocHost_v2' in found_functions}}
-
-cdef CUresult _cuMemAllocHost_v2(void** pp, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAllocHost_v2
-    cuPythonInit()
-    if __cuMemAllocHost_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAllocHost_v2" not found')
-    err = (<CUresult (*)(void**, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAllocHost_v2)(pp, bytesize)
-    return err
-{{endif}}
-
-{{if 'cuMemFreeHost' in found_functions}}
-
-cdef CUresult _cuMemFreeHost(void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemFreeHost
-    cuPythonInit()
-    if __cuMemFreeHost == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemFreeHost" not found')
-    err = (<CUresult (*)(void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemFreeHost)(p)
-    return err
-{{endif}}
-
-{{if 'cuMemHostAlloc' in found_functions}}
-
-cdef CUresult _cuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemHostAlloc
-    cuPythonInit()
-    if __cuMemHostAlloc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemHostAlloc" not found')
-    err = (<CUresult (*)(void**, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemHostAlloc)(pp, bytesize, Flags)
-    return err
-{{endif}}
-
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-
-cdef CUresult _cuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemHostGetDevicePointer_v2
-    cuPythonInit()
-    if __cuMemHostGetDevicePointer_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemHostGetDevicePointer_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, void*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemHostGetDevicePointer_v2)(pdptr, p, Flags)
-    return err
-{{endif}}
-
-{{if 'cuMemHostGetFlags' in found_functions}}
-
-cdef CUresult _cuMemHostGetFlags(unsigned int* pFlags, void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemHostGetFlags
-    cuPythonInit()
-    if __cuMemHostGetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemHostGetFlags" not found')
-    err = (<CUresult (*)(unsigned int*, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemHostGetFlags)(pFlags, p)
-    return err
-{{endif}}
-
-{{if 'cuMemAllocManaged' in found_functions}}
-
-cdef CUresult _cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAllocManaged
-    cuPythonInit()
-    if __cuMemAllocManaged == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAllocManaged" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAllocManaged)(dptr, bytesize, flags)
-    return err
-{{endif}}
-
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef CUresult _cuDeviceRegisterAsyncNotification(CUdevice device, CUasyncCallback callbackFunc, void* userData, CUasyncCallbackHandle* callback) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceRegisterAsyncNotification
-    cuPythonInit()
-    if __cuDeviceRegisterAsyncNotification == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceRegisterAsyncNotification" not found')
-    err = (<CUresult (*)(CUdevice, CUasyncCallback, void*, CUasyncCallbackHandle*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceRegisterAsyncNotification)(device, callbackFunc, userData, callback)
-    return err
-{{endif}}
-
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef CUresult _cuDeviceUnregisterAsyncNotification(CUdevice device, CUasyncCallbackHandle callback) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceUnregisterAsyncNotification
-    cuPythonInit()
-    if __cuDeviceUnregisterAsyncNotification == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceUnregisterAsyncNotification" not found')
-    err = (<CUresult (*)(CUdevice, CUasyncCallbackHandle) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceUnregisterAsyncNotification)(device, callback)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}
-
-cdef CUresult _cuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetByPCIBusId
-    cuPythonInit()
-    if __cuDeviceGetByPCIBusId == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetByPCIBusId" not found')
-    err = (<CUresult (*)(CUdevice*, const char*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetByPCIBusId)(dev, pciBusId)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetPCIBusId' in found_functions}}
-
-cdef CUresult _cuDeviceGetPCIBusId(char* pciBusId, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetPCIBusId
-    cuPythonInit()
-    if __cuDeviceGetPCIBusId == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetPCIBusId" not found')
-    err = (<CUresult (*)(char*, int, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetPCIBusId)(pciBusId, length, dev)
-    return err
-{{endif}}
-
-{{if 'cuIpcGetEventHandle' in found_functions}}
-
-cdef CUresult _cuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuIpcGetEventHandle
-    cuPythonInit()
-    if __cuIpcGetEventHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuIpcGetEventHandle" not found')
-    err = (<CUresult (*)(CUipcEventHandle*, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuIpcGetEventHandle)(pHandle, event)
-    return err
-{{endif}}
-
-{{if 'cuIpcOpenEventHandle' in found_functions}}
-
-cdef CUresult _cuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuIpcOpenEventHandle
-    cuPythonInit()
-    if __cuIpcOpenEventHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuIpcOpenEventHandle" not found')
-    err = (<CUresult (*)(CUevent*, CUipcEventHandle) except ?CUDA_ERROR_NOT_FOUND nogil> __cuIpcOpenEventHandle)(phEvent, handle)
-    return err
-{{endif}}
-
-{{if 'cuIpcGetMemHandle' in found_functions}}
-
-cdef CUresult _cuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuIpcGetMemHandle
-    cuPythonInit()
-    if __cuIpcGetMemHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuIpcGetMemHandle" not found')
-    err = (<CUresult (*)(CUipcMemHandle*, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuIpcGetMemHandle)(pHandle, dptr)
-    return err
-{{endif}}
-
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-
-cdef CUresult _cuIpcOpenMemHandle_v2(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuIpcOpenMemHandle_v2
-    cuPythonInit()
-    if __cuIpcOpenMemHandle_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuIpcOpenMemHandle_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, CUipcMemHandle, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuIpcOpenMemHandle_v2)(pdptr, handle, Flags)
-    return err
-{{endif}}
-
-{{if 'cuIpcCloseMemHandle' in found_functions}}
-
-cdef CUresult _cuIpcCloseMemHandle(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuIpcCloseMemHandle
-    cuPythonInit()
-    if __cuIpcCloseMemHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuIpcCloseMemHandle" not found')
-    err = (<CUresult (*)(CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuIpcCloseMemHandle)(dptr)
-    return err
-{{endif}}
-
-{{if 'cuMemHostRegister_v2' in found_functions}}
-
-cdef CUresult _cuMemHostRegister_v2(void* p, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemHostRegister_v2
-    cuPythonInit()
-    if __cuMemHostRegister_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemHostRegister_v2" not found')
-    err = (<CUresult (*)(void*, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemHostRegister_v2)(p, bytesize, Flags)
-    return err
-{{endif}}
-
-{{if 'cuMemHostUnregister' in found_functions}}
-
-cdef CUresult _cuMemHostUnregister(void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemHostUnregister
-    cuPythonInit()
-    if __cuMemHostUnregister == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemHostUnregister" not found')
-    err = (<CUresult (*)(void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemHostUnregister)(p)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy' in found_functions}}
-
-cdef CUresult _cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy
-    cuPythonInit()
-    if __cuMemcpy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy)(dst, src, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyPeer' in found_functions}}
-
-cdef CUresult _cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyPeer
-    cuPythonInit()
-    if __cuMemcpyPeer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyPeer" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyPeer)(dstDevice, dstContext, srcDevice, srcContext, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyHtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyHtoD_v2
-    cuPythonInit()
-    if __cuMemcpyHtoD_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyHtoD_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, const void*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyHtoD_v2)(dstDevice, srcHost, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyDtoH_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyDtoH_v2
-    cuPythonInit()
-    if __cuMemcpyDtoH_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyDtoH_v2" not found')
-    err = (<CUresult (*)(void*, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyDtoH_v2)(dstHost, srcDevice, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyDtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyDtoD_v2
-    cuPythonInit()
-    if __cuMemcpyDtoD_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyDtoD_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyDtoD_v2)(dstDevice, srcDevice, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyDtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyDtoA_v2
-    cuPythonInit()
-    if __cuMemcpyDtoA_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyDtoA_v2" not found')
-    err = (<CUresult (*)(CUarray, size_t, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyDtoA_v2)(dstArray, dstOffset, srcDevice, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyAtoD_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyAtoD_v2
-    cuPythonInit()
-    if __cuMemcpyAtoD_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyAtoD_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUarray, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyAtoD_v2)(dstDevice, srcArray, srcOffset, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyHtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyHtoA_v2
-    cuPythonInit()
-    if __cuMemcpyHtoA_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyHtoA_v2" not found')
-    err = (<CUresult (*)(CUarray, size_t, const void*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyHtoA_v2)(dstArray, dstOffset, srcHost, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyAtoH_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyAtoH_v2
-    cuPythonInit()
-    if __cuMemcpyAtoH_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyAtoH_v2" not found')
-    err = (<CUresult (*)(void*, CUarray, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyAtoH_v2)(dstHost, srcArray, srcOffset, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyAtoA_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyAtoA_v2
-    cuPythonInit()
-    if __cuMemcpyAtoA_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyAtoA_v2" not found')
-    err = (<CUresult (*)(CUarray, size_t, CUarray, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyAtoA_v2)(dstArray, dstOffset, srcArray, srcOffset, ByteCount)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy2D_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy2D_v2
-    cuPythonInit()
-    if __cuMemcpy2D_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy2D_v2" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY2D*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy2D_v2)(pCopy)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy2DUnaligned_v2
-    cuPythonInit()
-    if __cuMemcpy2DUnaligned_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy2DUnaligned_v2" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY2D*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy2DUnaligned_v2)(pCopy)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy3D_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy3D_v2
-    cuPythonInit()
-    if __cuMemcpy3D_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy3D_v2" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY3D*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy3D_v2)(pCopy)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy3DPeer' in found_functions}}
-
-cdef CUresult _cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy3DPeer
-    cuPythonInit()
-    if __cuMemcpy3DPeer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy3DPeer" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY3D_PEER*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy3DPeer)(pCopy)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyAsync' in found_functions}}
-
-cdef CUresult _cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyAsync
-    cuPythonInit()
-    if __cuMemcpyAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUdeviceptr, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyAsync)(dst, src, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyPeerAsync' in found_functions}}
-
-cdef CUresult _cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyPeerAsync
-    cuPythonInit()
-    if __cuMemcpyPeerAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyPeerAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyPeerAsync)(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyHtoDAsync_v2
-    cuPythonInit()
-    if __cuMemcpyHtoDAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyHtoDAsync_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, const void*, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyHtoDAsync_v2)(dstDevice, srcHost, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyDtoHAsync_v2
-    cuPythonInit()
-    if __cuMemcpyDtoHAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyDtoHAsync_v2" not found')
-    err = (<CUresult (*)(void*, CUdeviceptr, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyDtoHAsync_v2)(dstHost, srcDevice, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyDtoDAsync_v2
-    cuPythonInit()
-    if __cuMemcpyDtoDAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyDtoDAsync_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUdeviceptr, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyDtoDAsync_v2)(dstDevice, srcDevice, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyHtoAAsync_v2
-    cuPythonInit()
-    if __cuMemcpyHtoAAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyHtoAAsync_v2" not found')
-    err = (<CUresult (*)(CUarray, size_t, const void*, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyHtoAAsync_v2)(dstArray, dstOffset, srcHost, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpyAtoHAsync_v2
-    cuPythonInit()
-    if __cuMemcpyAtoHAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpyAtoHAsync_v2" not found')
-    err = (<CUresult (*)(void*, CUarray, size_t, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpyAtoHAsync_v2)(dstHost, srcArray, srcOffset, ByteCount, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy2DAsync_v2
-    cuPythonInit()
-    if __cuMemcpy2DAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy2DAsync_v2" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY2D*, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy2DAsync_v2)(pCopy, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy3DAsync_v2
-    cuPythonInit()
-    if __cuMemcpy3DAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy3DAsync_v2" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY3D*, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy3DAsync_v2)(pCopy, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}
-
-cdef CUresult _cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemcpy3DPeerAsync
-    cuPythonInit()
-    if __cuMemcpy3DPeerAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemcpy3DPeerAsync" not found')
-    err = (<CUresult (*)(const CUDA_MEMCPY3D_PEER*, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemcpy3DPeerAsync)(pCopy, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD8_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD8_v2
-    cuPythonInit()
-    if __cuMemsetD8_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD8_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned char, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD8_v2)(dstDevice, uc, N)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD16_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD16_v2
-    cuPythonInit()
-    if __cuMemsetD16_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD16_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned short, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD16_v2)(dstDevice, us, N)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD32_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD32_v2
-    cuPythonInit()
-    if __cuMemsetD32_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD32_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned int, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD32_v2)(dstDevice, ui, N)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D8_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D8_v2
-    cuPythonInit()
-    if __cuMemsetD2D8_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D8_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned char, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D8_v2)(dstDevice, dstPitch, uc, Width, Height)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D16_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D16_v2
-    cuPythonInit()
-    if __cuMemsetD2D16_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D16_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned short, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D16_v2)(dstDevice, dstPitch, us, Width, Height)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D32_v2' in found_functions}}
-
-cdef CUresult _cuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D32_v2
-    cuPythonInit()
-    if __cuMemsetD2D32_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D32_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned int, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D32_v2)(dstDevice, dstPitch, ui, Width, Height)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD8Async' in found_functions}}
-
-cdef CUresult _cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD8Async
-    cuPythonInit()
-    if __cuMemsetD8Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD8Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned char, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD8Async)(dstDevice, uc, N, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD16Async' in found_functions}}
-
-cdef CUresult _cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD16Async
-    cuPythonInit()
-    if __cuMemsetD16Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD16Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned short, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD16Async)(dstDevice, us, N, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD32Async' in found_functions}}
-
-cdef CUresult _cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD32Async
-    cuPythonInit()
-    if __cuMemsetD32Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD32Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, unsigned int, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD32Async)(dstDevice, ui, N, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D8Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D8Async
-    cuPythonInit()
-    if __cuMemsetD2D8Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D8Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned char, size_t, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D8Async)(dstDevice, dstPitch, uc, Width, Height, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D16Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D16Async
-    cuPythonInit()
-    if __cuMemsetD2D16Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D16Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned short, size_t, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D16Async)(dstDevice, dstPitch, us, Width, Height, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemsetD2D32Async' in found_functions}}
-
-cdef CUresult _cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemsetD2D32Async
-    cuPythonInit()
-    if __cuMemsetD2D32Async == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemsetD2D32Async" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, unsigned int, size_t, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemsetD2D32Async)(dstDevice, dstPitch, ui, Width, Height, hStream)
-    return err
-{{endif}}
-
-{{if 'cuArrayCreate_v2' in found_functions}}
-
-cdef CUresult _cuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayCreate_v2
-    cuPythonInit()
-    if __cuArrayCreate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayCreate_v2" not found')
-    err = (<CUresult (*)(CUarray*, const CUDA_ARRAY_DESCRIPTOR*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayCreate_v2)(pHandle, pAllocateArray)
-    return err
-{{endif}}
-
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}
-
-cdef CUresult _cuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayGetDescriptor_v2
-    cuPythonInit()
-    if __cuArrayGetDescriptor_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayGetDescriptor_v2" not found')
-    err = (<CUresult (*)(CUDA_ARRAY_DESCRIPTOR*, CUarray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayGetDescriptor_v2)(pArrayDescriptor, hArray)
-    return err
-{{endif}}
-
-{{if 'cuArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult _cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayGetSparseProperties
-    cuPythonInit()
-    if __cuArrayGetSparseProperties == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayGetSparseProperties" not found')
-    err = (<CUresult (*)(CUDA_ARRAY_SPARSE_PROPERTIES*, CUarray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayGetSparseProperties)(sparseProperties, array)
-    return err
-{{endif}}
-
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMipmappedArrayGetSparseProperties
-    cuPythonInit()
-    if __cuMipmappedArrayGetSparseProperties == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMipmappedArrayGetSparseProperties" not found')
-    err = (<CUresult (*)(CUDA_ARRAY_SPARSE_PROPERTIES*, CUmipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMipmappedArrayGetSparseProperties)(sparseProperties, mipmap)
-    return err
-{{endif}}
-
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult _cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUarray array, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayGetMemoryRequirements
-    cuPythonInit()
-    if __cuArrayGetMemoryRequirements == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayGetMemoryRequirements" not found')
-    err = (<CUresult (*)(CUDA_ARRAY_MEMORY_REQUIREMENTS*, CUarray, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayGetMemoryRequirements)(memoryRequirements, array, device)
-    return err
-{{endif}}
-
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUmipmappedArray mipmap, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMipmappedArrayGetMemoryRequirements
-    cuPythonInit()
-    if __cuMipmappedArrayGetMemoryRequirements == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMipmappedArrayGetMemoryRequirements" not found')
-    err = (<CUresult (*)(CUDA_ARRAY_MEMORY_REQUIREMENTS*, CUmipmappedArray, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMipmappedArrayGetMemoryRequirements)(memoryRequirements, mipmap, device)
-    return err
-{{endif}}
-
-{{if 'cuArrayGetPlane' in found_functions}}
-
-cdef CUresult _cuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayGetPlane
-    cuPythonInit()
-    if __cuArrayGetPlane == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayGetPlane" not found')
-    err = (<CUresult (*)(CUarray*, CUarray, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayGetPlane)(pPlaneArray, hArray, planeIdx)
-    return err
-{{endif}}
-
-{{if 'cuArrayDestroy' in found_functions}}
-
-cdef CUresult _cuArrayDestroy(CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArrayDestroy
-    cuPythonInit()
-    if __cuArrayDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArrayDestroy" not found')
-    err = (<CUresult (*)(CUarray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArrayDestroy)(hArray)
-    return err
-{{endif}}
-
-{{if 'cuArray3DCreate_v2' in found_functions}}
-
-cdef CUresult _cuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArray3DCreate_v2
-    cuPythonInit()
-    if __cuArray3DCreate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArray3DCreate_v2" not found')
-    err = (<CUresult (*)(CUarray*, const CUDA_ARRAY3D_DESCRIPTOR*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArray3DCreate_v2)(pHandle, pAllocateArray)
-    return err
-{{endif}}
-
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-
-cdef CUresult _cuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuArray3DGetDescriptor_v2
-    cuPythonInit()
-    if __cuArray3DGetDescriptor_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuArray3DGetDescriptor_v2" not found')
-    err = (<CUresult (*)(CUDA_ARRAY3D_DESCRIPTOR*, CUarray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuArray3DGetDescriptor_v2)(pArrayDescriptor, hArray)
-    return err
-{{endif}}
-
-{{if 'cuMipmappedArrayCreate' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMipmappedArrayCreate
-    cuPythonInit()
-    if __cuMipmappedArrayCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMipmappedArrayCreate" not found')
-    err = (<CUresult (*)(CUmipmappedArray*, const CUDA_ARRAY3D_DESCRIPTOR*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMipmappedArrayCreate)(pHandle, pMipmappedArrayDesc, numMipmapLevels)
-    return err
-{{endif}}
-
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMipmappedArrayGetLevel
-    cuPythonInit()
-    if __cuMipmappedArrayGetLevel == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMipmappedArrayGetLevel" not found')
-    err = (<CUresult (*)(CUarray*, CUmipmappedArray, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMipmappedArrayGetLevel)(pLevelArray, hMipmappedArray, level)
-    return err
-{{endif}}
-
-{{if 'cuMipmappedArrayDestroy' in found_functions}}
-
-cdef CUresult _cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMipmappedArrayDestroy
-    cuPythonInit()
-    if __cuMipmappedArrayDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMipmappedArrayDestroy" not found')
-    err = (<CUresult (*)(CUmipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMipmappedArrayDestroy)(hMipmappedArray)
-    return err
-{{endif}}
-
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}
-
-cdef CUresult _cuMemGetHandleForAddressRange(void* handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetHandleForAddressRange
-    cuPythonInit()
-    if __cuMemGetHandleForAddressRange == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetHandleForAddressRange" not found')
-    err = (<CUresult (*)(void*, CUdeviceptr, size_t, CUmemRangeHandleType, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetHandleForAddressRange)(handle, dptr, size, handleType, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemAddressReserve' in found_functions}}
-
-cdef CUresult _cuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAddressReserve
-    cuPythonInit()
-    if __cuMemAddressReserve == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAddressReserve" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t, size_t, CUdeviceptr, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAddressReserve)(ptr, size, alignment, addr, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemAddressFree' in found_functions}}
-
-cdef CUresult _cuMemAddressFree(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAddressFree
-    cuPythonInit()
-    if __cuMemAddressFree == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAddressFree" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAddressFree)(ptr, size)
-    return err
-{{endif}}
-
-{{if 'cuMemCreate' in found_functions}}
-
-cdef CUresult _cuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemCreate
-    cuPythonInit()
-    if __cuMemCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemCreate" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle*, size_t, const CUmemAllocationProp*, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemCreate)(handle, size, prop, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemRelease' in found_functions}}
-
-cdef CUresult _cuMemRelease(CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemRelease
-    cuPythonInit()
-    if __cuMemRelease == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemRelease" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemRelease)(handle)
-    return err
-{{endif}}
-
-{{if 'cuMemMap' in found_functions}}
-
-cdef CUresult _cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemMap
-    cuPythonInit()
-    if __cuMemMap == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemMap" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, size_t, CUmemGenericAllocationHandle, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemMap)(ptr, size, offset, handle, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemMapArrayAsync' in found_functions}}
-
-cdef CUresult _cuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemMapArrayAsync
-    cuPythonInit()
-    if __cuMemMapArrayAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemMapArrayAsync" not found')
-    err = (<CUresult (*)(CUarrayMapInfo*, unsigned int, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemMapArrayAsync)(mapInfoList, count, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemUnmap' in found_functions}}
-
-cdef CUresult _cuMemUnmap(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemUnmap
-    cuPythonInit()
-    if __cuMemUnmap == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemUnmap" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemUnmap)(ptr, size)
-    return err
-{{endif}}
-
-{{if 'cuMemSetAccess' in found_functions}}
-
-cdef CUresult _cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemSetAccess
-    cuPythonInit()
-    if __cuMemSetAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemSetAccess" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, const CUmemAccessDesc*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemSetAccess)(ptr, size, desc, count)
-    return err
-{{endif}}
-
-{{if 'cuMemGetAccess' in found_functions}}
-
-cdef CUresult _cuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetAccess
-    cuPythonInit()
-    if __cuMemGetAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetAccess" not found')
-    err = (<CUresult (*)(unsigned long long*, const CUmemLocation*, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetAccess)(flags, location, ptr)
-    return err
-{{endif}}
-
-{{if 'cuMemExportToShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemExportToShareableHandle
-    cuPythonInit()
-    if __cuMemExportToShareableHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemExportToShareableHandle" not found')
-    err = (<CUresult (*)(void*, CUmemGenericAllocationHandle, CUmemAllocationHandleType, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemExportToShareableHandle)(shareableHandle, handle, handleType, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemImportFromShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemImportFromShareableHandle
-    cuPythonInit()
-    if __cuMemImportFromShareableHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemImportFromShareableHandle" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle*, void*, CUmemAllocationHandleType) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemImportFromShareableHandle)(handle, osHandle, shHandleType)
-    return err
-{{endif}}
-
-{{if 'cuMemGetAllocationGranularity' in found_functions}}
-
-cdef CUresult _cuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetAllocationGranularity
-    cuPythonInit()
-    if __cuMemGetAllocationGranularity == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetAllocationGranularity" not found')
-    err = (<CUresult (*)(size_t*, const CUmemAllocationProp*, CUmemAllocationGranularity_flags) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetAllocationGranularity)(granularity, prop, option)
-    return err
-{{endif}}
-
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-
-cdef CUresult _cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemGetAllocationPropertiesFromHandle
-    cuPythonInit()
-    if __cuMemGetAllocationPropertiesFromHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemGetAllocationPropertiesFromHandle" not found')
-    err = (<CUresult (*)(CUmemAllocationProp*, CUmemGenericAllocationHandle) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemGetAllocationPropertiesFromHandle)(prop, handle)
-    return err
-{{endif}}
-
-{{if 'cuMemRetainAllocationHandle' in found_functions}}
-
-cdef CUresult _cuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemRetainAllocationHandle
-    cuPythonInit()
-    if __cuMemRetainAllocationHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemRetainAllocationHandle" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle*, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemRetainAllocationHandle)(handle, addr)
-    return err
-{{endif}}
-
-{{if 'cuMemFreeAsync' in found_functions}}
-
-cdef CUresult _cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemFreeAsync
-    cuPythonInit()
-    if __cuMemFreeAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemFreeAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemFreeAsync)(dptr, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemAllocAsync' in found_functions}}
-
-cdef CUresult _cuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAllocAsync
-    cuPythonInit()
-    if __cuMemAllocAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAllocAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAllocAsync)(dptr, bytesize, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolTrimTo' in found_functions}}
-
-cdef CUresult _cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolTrimTo
-    cuPythonInit()
-    if __cuMemPoolTrimTo == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolTrimTo" not found')
-    err = (<CUresult (*)(CUmemoryPool, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolTrimTo)(pool, minBytesToKeep)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolSetAttribute' in found_functions}}
-
-cdef CUresult _cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolSetAttribute
-    cuPythonInit()
-    if __cuMemPoolSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolSetAttribute" not found')
-    err = (<CUresult (*)(CUmemoryPool, CUmemPool_attribute, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolSetAttribute)(pool, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolGetAttribute' in found_functions}}
-
-cdef CUresult _cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolGetAttribute
-    cuPythonInit()
-    if __cuMemPoolGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolGetAttribute" not found')
-    err = (<CUresult (*)(CUmemoryPool, CUmemPool_attribute, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolGetAttribute)(pool, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolSetAccess' in found_functions}}
-
-cdef CUresult _cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolSetAccess
-    cuPythonInit()
-    if __cuMemPoolSetAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolSetAccess" not found')
-    err = (<CUresult (*)(CUmemoryPool, const CUmemAccessDesc*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolSetAccess)(pool, map, count)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolGetAccess' in found_functions}}
-
-cdef CUresult _cuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolGetAccess
-    cuPythonInit()
-    if __cuMemPoolGetAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolGetAccess" not found')
-    err = (<CUresult (*)(CUmemAccess_flags*, CUmemoryPool, CUmemLocation*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolGetAccess)(flags, memPool, location)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolCreate' in found_functions}}
-
-cdef CUresult _cuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolCreate
-    cuPythonInit()
-    if __cuMemPoolCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolCreate" not found')
-    err = (<CUresult (*)(CUmemoryPool*, const CUmemPoolProps*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolCreate)(pool, poolProps)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolDestroy' in found_functions}}
-
-cdef CUresult _cuMemPoolDestroy(CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolDestroy
-    cuPythonInit()
-    if __cuMemPoolDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolDestroy" not found')
-    err = (<CUresult (*)(CUmemoryPool) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolDestroy)(pool)
-    return err
-{{endif}}
-
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}
-
-cdef CUresult _cuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAllocFromPoolAsync
-    cuPythonInit()
-    if __cuMemAllocFromPoolAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAllocFromPoolAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t, CUmemoryPool, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAllocFromPoolAsync)(dptr, bytesize, pool, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolExportToShareableHandle
-    cuPythonInit()
-    if __cuMemPoolExportToShareableHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolExportToShareableHandle" not found')
-    err = (<CUresult (*)(void*, CUmemoryPool, CUmemAllocationHandleType, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolExportToShareableHandle)(handle_out, pool, handleType, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef CUresult _cuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolImportFromShareableHandle
-    cuPythonInit()
-    if __cuMemPoolImportFromShareableHandle == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolImportFromShareableHandle" not found')
-    err = (<CUresult (*)(CUmemoryPool*, void*, CUmemAllocationHandleType, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolImportFromShareableHandle)(pool_out, handle, handleType, flags)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolExportPointer' in found_functions}}
-
-cdef CUresult _cuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolExportPointer
-    cuPythonInit()
-    if __cuMemPoolExportPointer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolExportPointer" not found')
-    err = (<CUresult (*)(CUmemPoolPtrExportData*, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolExportPointer)(shareData_out, ptr)
-    return err
-{{endif}}
-
-{{if 'cuMemPoolImportPointer' in found_functions}}
-
-cdef CUresult _cuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPoolImportPointer
-    cuPythonInit()
-    if __cuMemPoolImportPointer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPoolImportPointer" not found')
-    err = (<CUresult (*)(CUdeviceptr*, CUmemoryPool, CUmemPoolPtrExportData*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPoolImportPointer)(ptr_out, pool, shareData)
-    return err
-{{endif}}
-
-{{if 'cuMulticastCreate' in found_functions}}
-
-cdef CUresult _cuMulticastCreate(CUmemGenericAllocationHandle* mcHandle, const CUmulticastObjectProp* prop) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastCreate
-    cuPythonInit()
-    if __cuMulticastCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastCreate" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle*, const CUmulticastObjectProp*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastCreate)(mcHandle, prop)
-    return err
-{{endif}}
-
-{{if 'cuMulticastAddDevice' in found_functions}}
-
-cdef CUresult _cuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastAddDevice
-    cuPythonInit()
-    if __cuMulticastAddDevice == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastAddDevice" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastAddDevice)(mcHandle, dev)
-    return err
-{{endif}}
-
-{{if 'cuMulticastBindMem' in found_functions}}
-
-cdef CUresult _cuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastBindMem
-    cuPythonInit()
-    if __cuMulticastBindMem == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastBindMem" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle, size_t, CUmemGenericAllocationHandle, size_t, size_t, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastBindMem)(mcHandle, mcOffset, memHandle, memOffset, size, flags)
-    return err
-{{endif}}
-
-{{if 'cuMulticastBindAddr' in found_functions}}
-
-cdef CUresult _cuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastBindAddr
-    cuPythonInit()
-    if __cuMulticastBindAddr == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastBindAddr" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle, size_t, CUdeviceptr, size_t, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastBindAddr)(mcHandle, mcOffset, memptr, size, flags)
-    return err
-{{endif}}
-
-{{if 'cuMulticastUnbind' in found_functions}}
-
-cdef CUresult _cuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastUnbind
-    cuPythonInit()
-    if __cuMulticastUnbind == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastUnbind" not found')
-    err = (<CUresult (*)(CUmemGenericAllocationHandle, CUdevice, size_t, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastUnbind)(mcHandle, dev, mcOffset, size)
-    return err
-{{endif}}
-
-{{if 'cuMulticastGetGranularity' in found_functions}}
-
-cdef CUresult _cuMulticastGetGranularity(size_t* granularity, const CUmulticastObjectProp* prop, CUmulticastGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMulticastGetGranularity
-    cuPythonInit()
-    if __cuMulticastGetGranularity == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMulticastGetGranularity" not found')
-    err = (<CUresult (*)(size_t*, const CUmulticastObjectProp*, CUmulticastGranularity_flags) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMulticastGetGranularity)(granularity, prop, option)
-    return err
-{{endif}}
-
-{{if 'cuPointerGetAttribute' in found_functions}}
-
-cdef CUresult _cuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuPointerGetAttribute
-    cuPythonInit()
-    if __cuPointerGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuPointerGetAttribute" not found')
-    err = (<CUresult (*)(void*, CUpointer_attribute, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuPointerGetAttribute)(data, attribute, ptr)
-    return err
-{{endif}}
-
-{{if 'cuMemPrefetchAsync' in found_functions}}
-
-cdef CUresult _cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPrefetchAsync
-    cuPythonInit()
-    if __cuMemPrefetchAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPrefetchAsync" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, CUdevice, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPrefetchAsync)(devPtr, count, dstDevice, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}
-
-cdef CUresult _cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemPrefetchAsync_v2
-    cuPythonInit()
-    if __cuMemPrefetchAsync_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemPrefetchAsync_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, CUmemLocation, unsigned int, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemPrefetchAsync_v2)(devPtr, count, location, flags, hStream)
-    return err
-{{endif}}
-
-{{if 'cuMemAdvise' in found_functions}}
-
-cdef CUresult _cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAdvise
-    cuPythonInit()
-    if __cuMemAdvise == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAdvise" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, CUmem_advise, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAdvise)(devPtr, count, advice, device)
-    return err
-{{endif}}
-
-{{if 'cuMemAdvise_v2' in found_functions}}
-
-cdef CUresult _cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemAdvise_v2
-    cuPythonInit()
-    if __cuMemAdvise_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemAdvise_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr, size_t, CUmem_advise, CUmemLocation) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemAdvise_v2)(devPtr, count, advice, location)
-    return err
-{{endif}}
-
-{{if 'cuMemRangeGetAttribute' in found_functions}}
-
-cdef CUresult _cuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemRangeGetAttribute
-    cuPythonInit()
-    if __cuMemRangeGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemRangeGetAttribute" not found')
-    err = (<CUresult (*)(void*, size_t, CUmem_range_attribute, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemRangeGetAttribute)(data, dataSize, attribute, devPtr, count)
-    return err
-{{endif}}
-
-{{if 'cuMemRangeGetAttributes' in found_functions}}
-
-cdef CUresult _cuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuMemRangeGetAttributes
-    cuPythonInit()
-    if __cuMemRangeGetAttributes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuMemRangeGetAttributes" not found')
-    err = (<CUresult (*)(void**, size_t*, CUmem_range_attribute*, size_t, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuMemRangeGetAttributes)(data, dataSizes, attributes, numAttributes, devPtr, count)
-    return err
-{{endif}}
-
-{{if 'cuPointerSetAttribute' in found_functions}}
-
-cdef CUresult _cuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuPointerSetAttribute
-    cuPythonInit()
-    if __cuPointerSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuPointerSetAttribute" not found')
-    err = (<CUresult (*)(const void*, CUpointer_attribute, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuPointerSetAttribute)(value, attribute, ptr)
-    return err
-{{endif}}
-
-{{if 'cuPointerGetAttributes' in found_functions}}
-
-cdef CUresult _cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuPointerGetAttributes
-    cuPythonInit()
-    if __cuPointerGetAttributes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuPointerGetAttributes" not found')
-    err = (<CUresult (*)(unsigned int, CUpointer_attribute*, void**, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuPointerGetAttributes)(numAttributes, attributes, data, ptr)
-    return err
-{{endif}}
-
-{{if 'cuStreamCreate' in found_functions}}
-
-cdef CUresult _cuStreamCreate(CUstream* phStream, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamCreate
-    cuPythonInit()
-    if __cuStreamCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamCreate" not found')
-    err = (<CUresult (*)(CUstream*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamCreate)(phStream, Flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamCreateWithPriority' in found_functions}}
-
-cdef CUresult _cuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamCreateWithPriority
-    cuPythonInit()
-    if __cuStreamCreateWithPriority == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamCreateWithPriority" not found')
-    err = (<CUresult (*)(CUstream*, unsigned int, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamCreateWithPriority)(phStream, flags, priority)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetPriority' in found_functions}}
-
-cdef CUresult _cuStreamGetPriority(CUstream hStream, int* priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetPriority
-    cuPythonInit()
-    if __cuStreamGetPriority == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetPriority" not found')
-    err = (<CUresult (*)(CUstream, int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetPriority)(hStream, priority)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetFlags' in found_functions}}
-
-cdef CUresult _cuStreamGetFlags(CUstream hStream, unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetFlags
-    cuPythonInit()
-    if __cuStreamGetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetFlags" not found')
-    err = (<CUresult (*)(CUstream, unsigned int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetFlags)(hStream, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetId' in found_functions}}
-
-cdef CUresult _cuStreamGetId(CUstream hStream, unsigned long long* streamId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetId
-    cuPythonInit()
-    if __cuStreamGetId == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetId" not found')
-    err = (<CUresult (*)(CUstream, unsigned long long*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetId)(hStream, streamId)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetCtx' in found_functions}}
-
-cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetCtx
-    cuPythonInit()
-    if __cuStreamGetCtx == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetCtx" not found')
-    err = (<CUresult (*)(CUstream, CUcontext*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetCtx)(hStream, pctx)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetCtx_v2' in found_functions}}
-
-cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetCtx_v2
-    cuPythonInit()
-    if __cuStreamGetCtx_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetCtx_v2" not found')
-    err = (<CUresult (*)(CUstream, CUcontext*, CUgreenCtx*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetCtx_v2)(hStream, pCtx, pGreenCtx)
-    return err
-{{endif}}
-
-{{if 'cuStreamWaitEvent' in found_functions}}
-
-cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamWaitEvent
-    cuPythonInit()
-    if __cuStreamWaitEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamWaitEvent" not found')
-    err = (<CUresult (*)(CUstream, CUevent, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamWaitEvent)(hStream, hEvent, Flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamAddCallback' in found_functions}}
-
-cdef CUresult _cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamAddCallback
-    cuPythonInit()
-    if __cuStreamAddCallback == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamAddCallback" not found')
-    err = (<CUresult (*)(CUstream, CUstreamCallback, void*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamAddCallback)(hStream, callback, userData, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamBeginCapture_v2' in found_functions}}
-
-cdef CUresult _cuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamBeginCapture_v2
-    cuPythonInit()
-    if __cuStreamBeginCapture_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamBeginCapture_v2" not found')
-    err = (<CUresult (*)(CUstream, CUstreamCaptureMode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamBeginCapture_v2)(hStream, mode)
-    return err
-{{endif}}
-
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-
-cdef CUresult _cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamBeginCaptureToGraph
-    cuPythonInit()
-    if __cuStreamBeginCaptureToGraph == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamBeginCaptureToGraph" not found')
-    err = (<CUresult (*)(CUstream, CUgraph, const CUgraphNode*, const CUgraphEdgeData*, size_t, CUstreamCaptureMode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamBeginCaptureToGraph)(hStream, hGraph, dependencies, dependencyData, numDependencies, mode)
-    return err
-{{endif}}
-
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef CUresult _cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuThreadExchangeStreamCaptureMode
-    cuPythonInit()
-    if __cuThreadExchangeStreamCaptureMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuThreadExchangeStreamCaptureMode" not found')
-    err = (<CUresult (*)(CUstreamCaptureMode*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuThreadExchangeStreamCaptureMode)(mode)
-    return err
-{{endif}}
-
-{{if 'cuStreamEndCapture' in found_functions}}
-
-cdef CUresult _cuStreamEndCapture(CUstream hStream, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamEndCapture
-    cuPythonInit()
-    if __cuStreamEndCapture == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamEndCapture" not found')
-    err = (<CUresult (*)(CUstream, CUgraph*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamEndCapture)(hStream, phGraph)
-    return err
-{{endif}}
-
-{{if 'cuStreamIsCapturing' in found_functions}}
-
-cdef CUresult _cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamIsCapturing
-    cuPythonInit()
-    if __cuStreamIsCapturing == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamIsCapturing" not found')
-    err = (<CUresult (*)(CUstream, CUstreamCaptureStatus*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamIsCapturing)(hStream, captureStatus)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef CUresult _cuStreamGetCaptureInfo_v2(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetCaptureInfo_v2
-    cuPythonInit()
-    if __cuStreamGetCaptureInfo_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetCaptureInfo_v2" not found')
-    err = (<CUresult (*)(CUstream, CUstreamCaptureStatus*, cuuint64_t*, CUgraph*, const CUgraphNode**, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetCaptureInfo_v2)(hStream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef CUresult _cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, const CUgraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetCaptureInfo_v3
-    cuPythonInit()
-    if __cuStreamGetCaptureInfo_v3 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetCaptureInfo_v3" not found')
-    err = (<CUresult (*)(CUstream, CUstreamCaptureStatus*, cuuint64_t*, CUgraph*, const CUgraphNode**, const CUgraphEdgeData**, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetCaptureInfo_v3)(hStream, captureStatus_out, id_out, graph_out, dependencies_out, edgeData_out, numDependencies_out)
-    return err
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef CUresult _cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode* dependencies, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamUpdateCaptureDependencies
-    cuPythonInit()
-    if __cuStreamUpdateCaptureDependencies == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamUpdateCaptureDependencies" not found')
-    err = (<CUresult (*)(CUstream, CUgraphNode*, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamUpdateCaptureDependencies)(hStream, dependencies, numDependencies, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef CUresult _cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamUpdateCaptureDependencies_v2
-    cuPythonInit()
-    if __cuStreamUpdateCaptureDependencies_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamUpdateCaptureDependencies_v2" not found')
-    err = (<CUresult (*)(CUstream, CUgraphNode*, const CUgraphEdgeData*, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamUpdateCaptureDependencies_v2)(hStream, dependencies, dependencyData, numDependencies, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamAttachMemAsync' in found_functions}}
-
-cdef CUresult _cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamAttachMemAsync
-    cuPythonInit()
-    if __cuStreamAttachMemAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamAttachMemAsync" not found')
-    err = (<CUresult (*)(CUstream, CUdeviceptr, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamAttachMemAsync)(hStream, dptr, length, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamQuery' in found_functions}}
-
-cdef CUresult _cuStreamQuery(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamQuery
-    cuPythonInit()
-    if __cuStreamQuery == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamQuery" not found')
-    err = (<CUresult (*)(CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamQuery)(hStream)
-    return err
-{{endif}}
-
-{{if 'cuStreamSynchronize' in found_functions}}
-
-cdef CUresult _cuStreamSynchronize(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamSynchronize
-    cuPythonInit()
-    if __cuStreamSynchronize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamSynchronize" not found')
-    err = (<CUresult (*)(CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamSynchronize)(hStream)
-    return err
-{{endif}}
-
-{{if 'cuStreamDestroy_v2' in found_functions}}
-
-cdef CUresult _cuStreamDestroy_v2(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamDestroy_v2
-    cuPythonInit()
-    if __cuStreamDestroy_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamDestroy_v2" not found')
-    err = (<CUresult (*)(CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamDestroy_v2)(hStream)
-    return err
-{{endif}}
-
-{{if 'cuStreamCopyAttributes' in found_functions}}
-
-cdef CUresult _cuStreamCopyAttributes(CUstream dst, CUstream src) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamCopyAttributes
-    cuPythonInit()
-    if __cuStreamCopyAttributes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamCopyAttributes" not found')
-    err = (<CUresult (*)(CUstream, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamCopyAttributes)(dst, src)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetAttribute' in found_functions}}
-
-cdef CUresult _cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetAttribute
-    cuPythonInit()
-    if __cuStreamGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetAttribute" not found')
-    err = (<CUresult (*)(CUstream, CUstreamAttrID, CUstreamAttrValue*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetAttribute)(hStream, attr, value_out)
-    return err
-{{endif}}
-
-{{if 'cuStreamSetAttribute' in found_functions}}
-
-cdef CUresult _cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamSetAttribute
-    cuPythonInit()
-    if __cuStreamSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamSetAttribute" not found')
-    err = (<CUresult (*)(CUstream, CUstreamAttrID, const CUstreamAttrValue*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamSetAttribute)(hStream, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuEventCreate' in found_functions}}
-
-cdef CUresult _cuEventCreate(CUevent* phEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventCreate
-    cuPythonInit()
-    if __cuEventCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventCreate" not found')
-    err = (<CUresult (*)(CUevent*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventCreate)(phEvent, Flags)
-    return err
-{{endif}}
-
-{{if 'cuEventRecord' in found_functions}}
-
-cdef CUresult _cuEventRecord(CUevent hEvent, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventRecord
-    cuPythonInit()
-    if __cuEventRecord == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventRecord" not found')
-    err = (<CUresult (*)(CUevent, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventRecord)(hEvent, hStream)
-    return err
-{{endif}}
-
-{{if 'cuEventRecordWithFlags' in found_functions}}
-
-cdef CUresult _cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventRecordWithFlags
-    cuPythonInit()
-    if __cuEventRecordWithFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventRecordWithFlags" not found')
-    err = (<CUresult (*)(CUevent, CUstream, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventRecordWithFlags)(hEvent, hStream, flags)
-    return err
-{{endif}}
-
-{{if 'cuEventQuery' in found_functions}}
-
-cdef CUresult _cuEventQuery(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventQuery
-    cuPythonInit()
-    if __cuEventQuery == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventQuery" not found')
-    err = (<CUresult (*)(CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventQuery)(hEvent)
-    return err
-{{endif}}
-
-{{if 'cuEventSynchronize' in found_functions}}
-
-cdef CUresult _cuEventSynchronize(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventSynchronize
-    cuPythonInit()
-    if __cuEventSynchronize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventSynchronize" not found')
-    err = (<CUresult (*)(CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventSynchronize)(hEvent)
-    return err
-{{endif}}
-
-{{if 'cuEventDestroy_v2' in found_functions}}
-
-cdef CUresult _cuEventDestroy_v2(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventDestroy_v2
-    cuPythonInit()
-    if __cuEventDestroy_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventDestroy_v2" not found')
-    err = (<CUresult (*)(CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventDestroy_v2)(hEvent)
-    return err
-{{endif}}
-
-{{if 'cuEventElapsedTime' in found_functions}}
-
-cdef CUresult _cuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventElapsedTime
-    cuPythonInit()
-    if __cuEventElapsedTime == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventElapsedTime" not found')
-    err = (<CUresult (*)(float*, CUevent, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventElapsedTime)(pMilliseconds, hStart, hEnd)
-    return err
-{{endif}}
-
-{{if 'cuImportExternalMemory' in found_functions}}
-
-cdef CUresult _cuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuImportExternalMemory
-    cuPythonInit()
-    if __cuImportExternalMemory == NULL:
-        with gil:
-            raise RuntimeError('Function "cuImportExternalMemory" not found')
-    err = (<CUresult (*)(CUexternalMemory*, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuImportExternalMemory)(extMem_out, memHandleDesc)
-    return err
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef CUresult _cuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuExternalMemoryGetMappedBuffer
-    cuPythonInit()
-    if __cuExternalMemoryGetMappedBuffer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuExternalMemoryGetMappedBuffer" not found')
-    err = (<CUresult (*)(CUdeviceptr*, CUexternalMemory, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuExternalMemoryGetMappedBuffer)(devPtr, extMem, bufferDesc)
-    return err
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult _cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuExternalMemoryGetMappedMipmappedArray
-    cuPythonInit()
-    if __cuExternalMemoryGetMappedMipmappedArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuExternalMemoryGetMappedMipmappedArray" not found')
-    err = (<CUresult (*)(CUmipmappedArray*, CUexternalMemory, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuExternalMemoryGetMappedMipmappedArray)(mipmap, extMem, mipmapDesc)
-    return err
-{{endif}}
-
-{{if 'cuDestroyExternalMemory' in found_functions}}
-
-cdef CUresult _cuDestroyExternalMemory(CUexternalMemory extMem) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDestroyExternalMemory
-    cuPythonInit()
-    if __cuDestroyExternalMemory == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDestroyExternalMemory" not found')
-    err = (<CUresult (*)(CUexternalMemory) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDestroyExternalMemory)(extMem)
-    return err
-{{endif}}
-
-{{if 'cuImportExternalSemaphore' in found_functions}}
-
-cdef CUresult _cuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuImportExternalSemaphore
-    cuPythonInit()
-    if __cuImportExternalSemaphore == NULL:
-        with gil:
-            raise RuntimeError('Function "cuImportExternalSemaphore" not found')
-    err = (<CUresult (*)(CUexternalSemaphore*, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuImportExternalSemaphore)(extSem_out, semHandleDesc)
-    return err
-{{endif}}
-
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult _cuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSignalExternalSemaphoresAsync
-    cuPythonInit()
-    if __cuSignalExternalSemaphoresAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSignalExternalSemaphoresAsync" not found')
-    err = (<CUresult (*)(const CUexternalSemaphore*, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS*, unsigned int, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSignalExternalSemaphoresAsync)(extSemArray, paramsArray, numExtSems, stream)
-    return err
-{{endif}}
-
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult _cuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuWaitExternalSemaphoresAsync
-    cuPythonInit()
-    if __cuWaitExternalSemaphoresAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuWaitExternalSemaphoresAsync" not found')
-    err = (<CUresult (*)(const CUexternalSemaphore*, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS*, unsigned int, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuWaitExternalSemaphoresAsync)(extSemArray, paramsArray, numExtSems, stream)
-    return err
-{{endif}}
-
-{{if 'cuDestroyExternalSemaphore' in found_functions}}
-
-cdef CUresult _cuDestroyExternalSemaphore(CUexternalSemaphore extSem) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDestroyExternalSemaphore
-    cuPythonInit()
-    if __cuDestroyExternalSemaphore == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDestroyExternalSemaphore" not found')
-    err = (<CUresult (*)(CUexternalSemaphore) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDestroyExternalSemaphore)(extSem)
-    return err
-{{endif}}
-
-{{if 'cuStreamWaitValue32_v2' in found_functions}}
-
-cdef CUresult _cuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamWaitValue32_v2
-    cuPythonInit()
-    if __cuStreamWaitValue32_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamWaitValue32_v2" not found')
-    err = (<CUresult (*)(CUstream, CUdeviceptr, cuuint32_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamWaitValue32_v2)(stream, addr, value, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamWaitValue64_v2' in found_functions}}
-
-cdef CUresult _cuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamWaitValue64_v2
-    cuPythonInit()
-    if __cuStreamWaitValue64_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamWaitValue64_v2" not found')
-    err = (<CUresult (*)(CUstream, CUdeviceptr, cuuint64_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamWaitValue64_v2)(stream, addr, value, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamWriteValue32_v2' in found_functions}}
-
-cdef CUresult _cuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamWriteValue32_v2
-    cuPythonInit()
-    if __cuStreamWriteValue32_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamWriteValue32_v2" not found')
-    err = (<CUresult (*)(CUstream, CUdeviceptr, cuuint32_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamWriteValue32_v2)(stream, addr, value, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamWriteValue64_v2' in found_functions}}
-
-cdef CUresult _cuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamWriteValue64_v2
-    cuPythonInit()
-    if __cuStreamWriteValue64_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamWriteValue64_v2" not found')
-    err = (<CUresult (*)(CUstream, CUdeviceptr, cuuint64_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamWriteValue64_v2)(stream, addr, value, flags)
-    return err
-{{endif}}
-
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}
-
-cdef CUresult _cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamBatchMemOp_v2
-    cuPythonInit()
-    if __cuStreamBatchMemOp_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamBatchMemOp_v2" not found')
-    err = (<CUresult (*)(CUstream, unsigned int, CUstreamBatchMemOpParams*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamBatchMemOp_v2)(stream, count, paramArray, flags)
-    return err
-{{endif}}
-
-{{if 'cuFuncGetAttribute' in found_functions}}
-
-cdef CUresult _cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncGetAttribute
-    cuPythonInit()
-    if __cuFuncGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncGetAttribute" not found')
-    err = (<CUresult (*)(int*, CUfunction_attribute, CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncGetAttribute)(pi, attrib, hfunc)
-    return err
-{{endif}}
-
-{{if 'cuFuncSetAttribute' in found_functions}}
-
-cdef CUresult _cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncSetAttribute
-    cuPythonInit()
-    if __cuFuncSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncSetAttribute" not found')
-    err = (<CUresult (*)(CUfunction, CUfunction_attribute, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncSetAttribute)(hfunc, attrib, value)
-    return err
-{{endif}}
-
-{{if 'cuFuncSetCacheConfig' in found_functions}}
-
-cdef CUresult _cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncSetCacheConfig
-    cuPythonInit()
-    if __cuFuncSetCacheConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncSetCacheConfig" not found')
-    err = (<CUresult (*)(CUfunction, CUfunc_cache) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncSetCacheConfig)(hfunc, config)
-    return err
-{{endif}}
-
-{{if 'cuFuncGetModule' in found_functions}}
-
-cdef CUresult _cuFuncGetModule(CUmodule* hmod, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncGetModule
-    cuPythonInit()
-    if __cuFuncGetModule == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncGetModule" not found')
-    err = (<CUresult (*)(CUmodule*, CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncGetModule)(hmod, hfunc)
-    return err
-{{endif}}
-
-{{if 'cuFuncGetName' in found_functions}}
-
-cdef CUresult _cuFuncGetName(const char** name, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncGetName
-    cuPythonInit()
-    if __cuFuncGetName == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncGetName" not found')
-    err = (<CUresult (*)(const char**, CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncGetName)(name, hfunc)
-    return err
-{{endif}}
-
-{{if 'cuFuncGetParamInfo' in found_functions}}
-
-cdef CUresult _cuFuncGetParamInfo(CUfunction func, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncGetParamInfo
-    cuPythonInit()
-    if __cuFuncGetParamInfo == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncGetParamInfo" not found')
-    err = (<CUresult (*)(CUfunction, size_t, size_t*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncGetParamInfo)(func, paramIndex, paramOffset, paramSize)
-    return err
-{{endif}}
-
-{{if 'cuFuncIsLoaded' in found_functions}}
-
-cdef CUresult _cuFuncIsLoaded(CUfunctionLoadingState* state, CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncIsLoaded
-    cuPythonInit()
-    if __cuFuncIsLoaded == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncIsLoaded" not found')
-    err = (<CUresult (*)(CUfunctionLoadingState*, CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncIsLoaded)(state, function)
-    return err
-{{endif}}
-
-{{if 'cuFuncLoad' in found_functions}}
-
-cdef CUresult _cuFuncLoad(CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncLoad
-    cuPythonInit()
-    if __cuFuncLoad == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncLoad" not found')
-    err = (<CUresult (*)(CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncLoad)(function)
-    return err
-{{endif}}
-
-{{if 'cuLaunchKernel' in found_functions}}
-
-cdef CUresult _cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchKernel
-    cuPythonInit()
-    if __cuLaunchKernel == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchKernel" not found')
-    err = (<CUresult (*)(CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void**, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchKernel)(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra)
-    return err
-{{endif}}
-
-{{if 'cuLaunchKernelEx' in found_functions}}
-
-cdef CUresult _cuLaunchKernelEx(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchKernelEx
-    cuPythonInit()
-    if __cuLaunchKernelEx == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchKernelEx" not found')
-    err = (<CUresult (*)(const CUlaunchConfig*, CUfunction, void**, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchKernelEx)(config, f, kernelParams, extra)
-    return err
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernel' in found_functions}}
-
-cdef CUresult _cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchCooperativeKernel
-    cuPythonInit()
-    if __cuLaunchCooperativeKernel == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchCooperativeKernel" not found')
-    err = (<CUresult (*)(CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void**) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchCooperativeKernel)(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams)
-    return err
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-
-cdef CUresult _cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchCooperativeKernelMultiDevice
-    cuPythonInit()
-    if __cuLaunchCooperativeKernelMultiDevice == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchCooperativeKernelMultiDevice" not found')
-    err = (<CUresult (*)(CUDA_LAUNCH_PARAMS*, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchCooperativeKernelMultiDevice)(launchParamsList, numDevices, flags)
-    return err
-{{endif}}
-
-{{if 'cuLaunchHostFunc' in found_functions}}
-
-cdef CUresult _cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchHostFunc
-    cuPythonInit()
-    if __cuLaunchHostFunc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchHostFunc" not found')
-    err = (<CUresult (*)(CUstream, CUhostFn, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchHostFunc)(hStream, fn, userData)
-    return err
-{{endif}}
-
-{{if 'cuFuncSetBlockShape' in found_functions}}
-
-cdef CUresult _cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncSetBlockShape
-    cuPythonInit()
-    if __cuFuncSetBlockShape == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncSetBlockShape" not found')
-    err = (<CUresult (*)(CUfunction, int, int, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncSetBlockShape)(hfunc, x, y, z)
-    return err
-{{endif}}
-
-{{if 'cuFuncSetSharedSize' in found_functions}}
-
-cdef CUresult _cuFuncSetSharedSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncSetSharedSize
-    cuPythonInit()
-    if __cuFuncSetSharedSize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncSetSharedSize" not found')
-    err = (<CUresult (*)(CUfunction, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncSetSharedSize)(hfunc, numbytes)
-    return err
-{{endif}}
-
-{{if 'cuParamSetSize' in found_functions}}
-
-cdef CUresult _cuParamSetSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuParamSetSize
-    cuPythonInit()
-    if __cuParamSetSize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuParamSetSize" not found')
-    err = (<CUresult (*)(CUfunction, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuParamSetSize)(hfunc, numbytes)
-    return err
-{{endif}}
-
-{{if 'cuParamSeti' in found_functions}}
-
-cdef CUresult _cuParamSeti(CUfunction hfunc, int offset, unsigned int value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuParamSeti
-    cuPythonInit()
-    if __cuParamSeti == NULL:
-        with gil:
-            raise RuntimeError('Function "cuParamSeti" not found')
-    err = (<CUresult (*)(CUfunction, int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuParamSeti)(hfunc, offset, value)
-    return err
-{{endif}}
-
-{{if 'cuParamSetf' in found_functions}}
-
-cdef CUresult _cuParamSetf(CUfunction hfunc, int offset, float value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuParamSetf
-    cuPythonInit()
-    if __cuParamSetf == NULL:
-        with gil:
-            raise RuntimeError('Function "cuParamSetf" not found')
-    err = (<CUresult (*)(CUfunction, int, float) except ?CUDA_ERROR_NOT_FOUND nogil> __cuParamSetf)(hfunc, offset, value)
-    return err
-{{endif}}
-
-{{if 'cuParamSetv' in found_functions}}
-
-cdef CUresult _cuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuParamSetv
-    cuPythonInit()
-    if __cuParamSetv == NULL:
-        with gil:
-            raise RuntimeError('Function "cuParamSetv" not found')
-    err = (<CUresult (*)(CUfunction, int, void*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuParamSetv)(hfunc, offset, ptr, numbytes)
-    return err
-{{endif}}
-
-{{if 'cuLaunch' in found_functions}}
-
-cdef CUresult _cuLaunch(CUfunction f) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunch
-    cuPythonInit()
-    if __cuLaunch == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunch" not found')
-    err = (<CUresult (*)(CUfunction) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunch)(f)
-    return err
-{{endif}}
-
-{{if 'cuLaunchGrid' in found_functions}}
-
-cdef CUresult _cuLaunchGrid(CUfunction f, int grid_width, int grid_height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchGrid
-    cuPythonInit()
-    if __cuLaunchGrid == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchGrid" not found')
-    err = (<CUresult (*)(CUfunction, int, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchGrid)(f, grid_width, grid_height)
-    return err
-{{endif}}
-
-{{if 'cuLaunchGridAsync' in found_functions}}
-
-cdef CUresult _cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuLaunchGridAsync
-    cuPythonInit()
-    if __cuLaunchGridAsync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuLaunchGridAsync" not found')
-    err = (<CUresult (*)(CUfunction, int, int, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuLaunchGridAsync)(f, grid_width, grid_height, hStream)
-    return err
-{{endif}}
-
-{{if 'cuParamSetTexRef' in found_functions}}
-
-cdef CUresult _cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuParamSetTexRef
-    cuPythonInit()
-    if __cuParamSetTexRef == NULL:
-        with gil:
-            raise RuntimeError('Function "cuParamSetTexRef" not found')
-    err = (<CUresult (*)(CUfunction, int, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuParamSetTexRef)(hfunc, texunit, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}
-
-cdef CUresult _cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuFuncSetSharedMemConfig
-    cuPythonInit()
-    if __cuFuncSetSharedMemConfig == NULL:
-        with gil:
-            raise RuntimeError('Function "cuFuncSetSharedMemConfig" not found')
-    err = (<CUresult (*)(CUfunction, CUsharedconfig) except ?CUDA_ERROR_NOT_FOUND nogil> __cuFuncSetSharedMemConfig)(hfunc, config)
-    return err
-{{endif}}
-
-{{if 'cuGraphCreate' in found_functions}}
-
-cdef CUresult _cuGraphCreate(CUgraph* phGraph, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphCreate
-    cuPythonInit()
-    if __cuGraphCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphCreate" not found')
-    err = (<CUresult (*)(CUgraph*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphCreate)(phGraph, flags)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddKernelNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddKernelNode_v2
-    cuPythonInit()
-    if __cuGraphAddKernelNode_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddKernelNode_v2" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_KERNEL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddKernelNode_v2)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeGetParams_v2(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphKernelNodeGetParams_v2
-    cuPythonInit()
-    if __cuGraphKernelNodeGetParams_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphKernelNodeGetParams_v2" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphKernelNodeGetParams_v2)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeSetParams_v2(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphKernelNodeSetParams_v2
-    cuPythonInit()
-    if __cuGraphKernelNodeSetParams_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphKernelNodeSetParams_v2" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphKernelNodeSetParams_v2)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddMemcpyNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddMemcpyNode
-    cuPythonInit()
-    if __cuGraphAddMemcpyNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddMemcpyNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_MEMCPY3D*, CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddMemcpyNode)(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemcpyNodeGetParams
-    cuPythonInit()
-    if __cuGraphMemcpyNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemcpyNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_MEMCPY3D*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemcpyNodeGetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemcpyNodeSetParams
-    cuPythonInit()
-    if __cuGraphMemcpyNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemcpyNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_MEMCPY3D*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemcpyNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddMemsetNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddMemsetNode
-    cuPythonInit()
-    if __cuGraphAddMemsetNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddMemsetNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_MEMSET_NODE_PARAMS*, CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddMemsetNode)(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemsetNodeGetParams
-    cuPythonInit()
-    if __cuGraphMemsetNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemsetNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemsetNodeGetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemsetNodeSetParams
-    cuPythonInit()
-    if __cuGraphMemsetNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemsetNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemsetNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddHostNode' in found_functions}}
-
-cdef CUresult _cuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddHostNode
-    cuPythonInit()
-    if __cuGraphAddHostNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddHostNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_HOST_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddHostNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphHostNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphHostNodeGetParams
-    cuPythonInit()
-    if __cuGraphHostNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphHostNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_HOST_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphHostNodeGetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphHostNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphHostNodeSetParams
-    cuPythonInit()
-    if __cuGraphHostNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphHostNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_HOST_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphHostNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddChildGraphNode' in found_functions}}
-
-cdef CUresult _cuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddChildGraphNode
-    cuPythonInit()
-    if __cuGraphAddChildGraphNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddChildGraphNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUgraph) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddChildGraphNode)(phGraphNode, hGraph, dependencies, numDependencies, childGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef CUresult _cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphChildGraphNodeGetGraph
-    cuPythonInit()
-    if __cuGraphChildGraphNodeGetGraph == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphChildGraphNodeGetGraph" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraph*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphChildGraphNodeGetGraph)(hNode, phGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddEmptyNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddEmptyNode
-    cuPythonInit()
-    if __cuGraphAddEmptyNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddEmptyNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddEmptyNode)(phGraphNode, hGraph, dependencies, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddEventRecordNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddEventRecordNode
-    cuPythonInit()
-    if __cuGraphAddEventRecordNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddEventRecordNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddEventRecordNode)(phGraphNode, hGraph, dependencies, numDependencies, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphEventRecordNodeGetEvent
-    cuPythonInit()
-    if __cuGraphEventRecordNodeGetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphEventRecordNodeGetEvent" not found')
-    err = (<CUresult (*)(CUgraphNode, CUevent*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphEventRecordNodeGetEvent)(hNode, event_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphEventRecordNodeSetEvent
-    cuPythonInit()
-    if __cuGraphEventRecordNodeSetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphEventRecordNodeSetEvent" not found')
-    err = (<CUresult (*)(CUgraphNode, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphEventRecordNodeSetEvent)(hNode, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddEventWaitNode' in found_functions}}
-
-cdef CUresult _cuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddEventWaitNode
-    cuPythonInit()
-    if __cuGraphAddEventWaitNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddEventWaitNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddEventWaitNode)(phGraphNode, hGraph, dependencies, numDependencies, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphEventWaitNodeGetEvent
-    cuPythonInit()
-    if __cuGraphEventWaitNodeGetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphEventWaitNodeGetEvent" not found')
-    err = (<CUresult (*)(CUgraphNode, CUevent*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphEventWaitNodeGetEvent)(hNode, event_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphEventWaitNodeSetEvent
-    cuPythonInit()
-    if __cuGraphEventWaitNodeSetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphEventWaitNodeSetEvent" not found')
-    err = (<CUresult (*)(CUgraphNode, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphEventWaitNodeSetEvent)(hNode, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef CUresult _cuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddExternalSemaphoresSignalNode
-    cuPythonInit()
-    if __cuGraphAddExternalSemaphoresSignalNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddExternalSemaphoresSignalNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddExternalSemaphoresSignalNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExternalSemaphoresSignalNodeGetParams
-    cuPythonInit()
-    if __cuGraphExternalSemaphoresSignalNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExternalSemaphoresSignalNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExternalSemaphoresSignalNodeGetParams)(hNode, params_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExternalSemaphoresSignalNodeSetParams
-    cuPythonInit()
-    if __cuGraphExternalSemaphoresSignalNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExternalSemaphoresSignalNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExternalSemaphoresSignalNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef CUresult _cuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddExternalSemaphoresWaitNode
-    cuPythonInit()
-    if __cuGraphAddExternalSemaphoresWaitNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddExternalSemaphoresWaitNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_EXT_SEM_WAIT_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddExternalSemaphoresWaitNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExternalSemaphoresWaitNodeGetParams
-    cuPythonInit()
-    if __cuGraphExternalSemaphoresWaitNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExternalSemaphoresWaitNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExternalSemaphoresWaitNodeGetParams)(hNode, params_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExternalSemaphoresWaitNodeSetParams
-    cuPythonInit()
-    if __cuGraphExternalSemaphoresWaitNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExternalSemaphoresWaitNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExternalSemaphoresWaitNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-
-cdef CUresult _cuGraphAddBatchMemOpNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddBatchMemOpNode
-    cuPythonInit()
-    if __cuGraphAddBatchMemOpNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddBatchMemOpNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, const CUDA_BATCH_MEM_OP_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddBatchMemOpNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphBatchMemOpNodeGetParams
-    cuPythonInit()
-    if __cuGraphBatchMemOpNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphBatchMemOpNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_BATCH_MEM_OP_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphBatchMemOpNodeGetParams)(hNode, nodeParams_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphBatchMemOpNodeSetParams
-    cuPythonInit()
-    if __cuGraphBatchMemOpNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphBatchMemOpNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphBatchMemOpNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecBatchMemOpNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecBatchMemOpNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecBatchMemOpNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecBatchMemOpNodeSetParams)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddMemAllocNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemAllocNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddMemAllocNode
-    cuPythonInit()
-    if __cuGraphAddMemAllocNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddMemAllocNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUDA_MEM_ALLOC_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddMemAllocNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemAllocNodeGetParams
-    cuPythonInit()
-    if __cuGraphMemAllocNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemAllocNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUDA_MEM_ALLOC_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemAllocNodeGetParams)(hNode, params_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddMemFreeNode' in found_functions}}
-
-cdef CUresult _cuGraphAddMemFreeNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddMemFreeNode
-    cuPythonInit()
-    if __cuGraphAddMemFreeNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddMemFreeNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUdeviceptr) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddMemFreeNode)(phGraphNode, hGraph, dependencies, numDependencies, dptr)
-    return err
-{{endif}}
-
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef CUresult _cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr* dptr_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphMemFreeNodeGetParams
-    cuPythonInit()
-    if __cuGraphMemFreeNodeGetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphMemFreeNodeGetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUdeviceptr*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphMemFreeNodeGetParams)(hNode, dptr_out)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGraphMemTrim' in found_functions}}
-
-cdef CUresult _cuDeviceGraphMemTrim(CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGraphMemTrim
-    cuPythonInit()
-    if __cuDeviceGraphMemTrim == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGraphMemTrim" not found')
-    err = (<CUresult (*)(CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGraphMemTrim)(device)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetGraphMemAttribute
-    cuPythonInit()
-    if __cuDeviceGetGraphMemAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetGraphMemAttribute" not found')
-    err = (<CUresult (*)(CUdevice, CUgraphMem_attribute, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetGraphMemAttribute)(device, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceSetGraphMemAttribute
-    cuPythonInit()
-    if __cuDeviceSetGraphMemAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceSetGraphMemAttribute" not found')
-    err = (<CUresult (*)(CUdevice, CUgraphMem_attribute, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceSetGraphMemAttribute)(device, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuGraphClone' in found_functions}}
-
-cdef CUresult _cuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphClone
-    cuPythonInit()
-    if __cuGraphClone == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphClone" not found')
-    err = (<CUresult (*)(CUgraph*, CUgraph) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphClone)(phGraphClone, originalGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeFindInClone' in found_functions}}
-
-cdef CUresult _cuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeFindInClone
-    cuPythonInit()
-    if __cuGraphNodeFindInClone == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeFindInClone" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraphNode, CUgraph) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeFindInClone)(phNode, hOriginalNode, hClonedGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetType' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetType
-    cuPythonInit()
-    if __cuGraphNodeGetType == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetType" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNodeType*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetType)(hNode, typename)
-    return err
-{{endif}}
-
-{{if 'cuGraphGetNodes' in found_functions}}
-
-cdef CUresult _cuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphGetNodes
-    cuPythonInit()
-    if __cuGraphGetNodes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphGetNodes" not found')
-    err = (<CUresult (*)(CUgraph, CUgraphNode*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphGetNodes)(hGraph, nodes, numNodes)
-    return err
-{{endif}}
-
-{{if 'cuGraphGetRootNodes' in found_functions}}
-
-cdef CUresult _cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphGetRootNodes
-    cuPythonInit()
-    if __cuGraphGetRootNodes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphGetRootNodes" not found')
-    err = (<CUresult (*)(CUgraph, CUgraphNode*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphGetRootNodes)(hGraph, rootNodes, numRootNodes)
-    return err
-{{endif}}
-
-{{if 'cuGraphGetEdges' in found_functions}}
-
-cdef CUresult _cuGraphGetEdges(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphGetEdges
-    cuPythonInit()
-    if __cuGraphGetEdges == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphGetEdges" not found')
-    err = (<CUresult (*)(CUgraph, CUgraphNode*, CUgraphNode*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphGetEdges)(hGraph, from_, to, numEdges)
-    return err
-{{endif}}
-
-{{if 'cuGraphGetEdges_v2' in found_functions}}
-
-cdef CUresult _cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, CUgraphEdgeData* edgeData, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphGetEdges_v2
-    cuPythonInit()
-    if __cuGraphGetEdges_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphGetEdges_v2" not found')
-    err = (<CUresult (*)(CUgraph, CUgraphNode*, CUgraphNode*, CUgraphEdgeData*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphGetEdges_v2)(hGraph, from_, to, edgeData, numEdges)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetDependencies
-    cuPythonInit()
-    if __cuGraphNodeGetDependencies == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetDependencies" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNode*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetDependencies)(hNode, dependencies, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode* dependencies, CUgraphEdgeData* edgeData, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetDependencies_v2
-    cuPythonInit()
-    if __cuGraphNodeGetDependencies_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetDependencies_v2" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNode*, CUgraphEdgeData*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetDependencies_v2)(hNode, dependencies, edgeData, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetDependentNodes
-    cuPythonInit()
-    if __cuGraphNodeGetDependentNodes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetDependentNodes" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNode*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetDependentNodes)(hNode, dependentNodes, numDependentNodes)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode* dependentNodes, CUgraphEdgeData* edgeData, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetDependentNodes_v2
-    cuPythonInit()
-    if __cuGraphNodeGetDependentNodes_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetDependentNodes_v2" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNode*, CUgraphEdgeData*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetDependentNodes_v2)(hNode, dependentNodes, edgeData, numDependentNodes)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddDependencies' in found_functions}}
-
-cdef CUresult _cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddDependencies
-    cuPythonInit()
-    if __cuGraphAddDependencies == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddDependencies" not found')
-    err = (<CUresult (*)(CUgraph, const CUgraphNode*, const CUgraphNode*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddDependencies)(hGraph, from_, to, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddDependencies_v2
-    cuPythonInit()
-    if __cuGraphAddDependencies_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddDependencies_v2" not found')
-    err = (<CUresult (*)(CUgraph, const CUgraphNode*, const CUgraphNode*, const CUgraphEdgeData*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddDependencies_v2)(hGraph, from_, to, edgeData, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies' in found_functions}}
-
-cdef CUresult _cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphRemoveDependencies
-    cuPythonInit()
-    if __cuGraphRemoveDependencies == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphRemoveDependencies" not found')
-    err = (<CUresult (*)(CUgraph, const CUgraphNode*, const CUgraphNode*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphRemoveDependencies)(hGraph, from_, to, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-
-cdef CUresult _cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphRemoveDependencies_v2
-    cuPythonInit()
-    if __cuGraphRemoveDependencies_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphRemoveDependencies_v2" not found')
-    err = (<CUresult (*)(CUgraph, const CUgraphNode*, const CUgraphNode*, const CUgraphEdgeData*, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphRemoveDependencies_v2)(hGraph, from_, to, edgeData, numDependencies)
-    return err
-{{endif}}
-
-{{if 'cuGraphDestroyNode' in found_functions}}
-
-cdef CUresult _cuGraphDestroyNode(CUgraphNode hNode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphDestroyNode
-    cuPythonInit()
-    if __cuGraphDestroyNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphDestroyNode" not found')
-    err = (<CUresult (*)(CUgraphNode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphDestroyNode)(hNode)
-    return err
-{{endif}}
-
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}
-
-cdef CUresult _cuGraphInstantiateWithFlags(CUgraphExec* phGraphExec, CUgraph hGraph, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphInstantiateWithFlags
-    cuPythonInit()
-    if __cuGraphInstantiateWithFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphInstantiateWithFlags" not found')
-    err = (<CUresult (*)(CUgraphExec*, CUgraph, unsigned long long) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphInstantiateWithFlags)(phGraphExec, hGraph, flags)
-    return err
-{{endif}}
-
-{{if 'cuGraphInstantiateWithParams' in found_functions}}
-
-cdef CUresult _cuGraphInstantiateWithParams(CUgraphExec* phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS* instantiateParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphInstantiateWithParams
-    cuPythonInit()
-    if __cuGraphInstantiateWithParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphInstantiateWithParams" not found')
-    err = (<CUresult (*)(CUgraphExec*, CUgraph, CUDA_GRAPH_INSTANTIATE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphInstantiateWithParams)(phGraphExec, hGraph, instantiateParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecGetFlags' in found_functions}}
-
-cdef CUresult _cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecGetFlags
-    cuPythonInit()
-    if __cuGraphExecGetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecGetFlags" not found')
-    err = (<CUresult (*)(CUgraphExec, cuuint64_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecGetFlags)(hGraphExec, flags)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult _cuGraphExecKernelNodeSetParams_v2(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecKernelNodeSetParams_v2
-    cuPythonInit()
-    if __cuGraphExecKernelNodeSetParams_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecKernelNodeSetParams_v2" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_KERNEL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecKernelNodeSetParams_v2)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecMemcpyNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecMemcpyNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecMemcpyNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_MEMCPY3D*, CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecMemcpyNodeSetParams)(hGraphExec, hNode, copyParams, ctx)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecMemsetNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecMemsetNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecMemsetNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS*, CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecMemsetNodeSetParams)(hGraphExec, hNode, memsetParams, ctx)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecHostNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecHostNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecHostNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_HOST_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecHostNodeSetParams)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecChildGraphNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecChildGraphNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecChildGraphNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, CUgraph) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecChildGraphNodeSetParams)(hGraphExec, hNode, childGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecEventRecordNodeSetEvent
-    cuPythonInit()
-    if __cuGraphExecEventRecordNodeSetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecEventRecordNodeSetEvent" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecEventRecordNodeSetEvent)(hGraphExec, hNode, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult _cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecEventWaitNodeSetEvent
-    cuPythonInit()
-    if __cuGraphExecEventWaitNodeSetEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecEventWaitNodeSetEvent" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecEventWaitNodeSetEvent)(hGraphExec, hNode, event)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecExternalSemaphoresSignalNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecExternalSemaphoresSignalNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecExternalSemaphoresSignalNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecExternalSemaphoresSignalNodeSetParams)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecExternalSemaphoresWaitNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecExternalSemaphoresWaitNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecExternalSemaphoresWaitNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecExternalSemaphoresWaitNodeSetParams)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeSetEnabled' in found_functions}}
-
-cdef CUresult _cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeSetEnabled
-    cuPythonInit()
-    if __cuGraphNodeSetEnabled == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeSetEnabled" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeSetEnabled)(hGraphExec, hNode, isEnabled)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeGetEnabled' in found_functions}}
-
-cdef CUresult _cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int* isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeGetEnabled
-    cuPythonInit()
-    if __cuGraphNodeGetEnabled == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeGetEnabled" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, unsigned int*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeGetEnabled)(hGraphExec, hNode, isEnabled)
-    return err
-{{endif}}
-
-{{if 'cuGraphUpload' in found_functions}}
-
-cdef CUresult _cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphUpload
-    cuPythonInit()
-    if __cuGraphUpload == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphUpload" not found')
-    err = (<CUresult (*)(CUgraphExec, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphUpload)(hGraphExec, hStream)
-    return err
-{{endif}}
-
-{{if 'cuGraphLaunch' in found_functions}}
-
-cdef CUresult _cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphLaunch
-    cuPythonInit()
-    if __cuGraphLaunch == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphLaunch" not found')
-    err = (<CUresult (*)(CUgraphExec, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphLaunch)(hGraphExec, hStream)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecDestroy' in found_functions}}
-
-cdef CUresult _cuGraphExecDestroy(CUgraphExec hGraphExec) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecDestroy
-    cuPythonInit()
-    if __cuGraphExecDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecDestroy" not found')
-    err = (<CUresult (*)(CUgraphExec) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecDestroy)(hGraphExec)
-    return err
-{{endif}}
-
-{{if 'cuGraphDestroy' in found_functions}}
-
-cdef CUresult _cuGraphDestroy(CUgraph hGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphDestroy
-    cuPythonInit()
-    if __cuGraphDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphDestroy" not found')
-    err = (<CUresult (*)(CUgraph) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphDestroy)(hGraph)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecUpdate_v2' in found_functions}}
-
-cdef CUresult _cuGraphExecUpdate_v2(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo* resultInfo) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecUpdate_v2
-    cuPythonInit()
-    if __cuGraphExecUpdate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecUpdate_v2" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraph, CUgraphExecUpdateResultInfo*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecUpdate_v2)(hGraphExec, hGraph, resultInfo)
-    return err
-{{endif}}
-
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphKernelNodeCopyAttributes
-    cuPythonInit()
-    if __cuGraphKernelNodeCopyAttributes == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphKernelNodeCopyAttributes" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphKernelNodeCopyAttributes)(dst, src)
-    return err
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphKernelNodeGetAttribute
-    cuPythonInit()
-    if __cuGraphKernelNodeGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphKernelNodeGetAttribute" not found')
-    err = (<CUresult (*)(CUgraphNode, CUkernelNodeAttrID, CUkernelNodeAttrValue*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphKernelNodeGetAttribute)(hNode, attr, value_out)
-    return err
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef CUresult _cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphKernelNodeSetAttribute
-    cuPythonInit()
-    if __cuGraphKernelNodeSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphKernelNodeSetAttribute" not found')
-    err = (<CUresult (*)(CUgraphNode, CUkernelNodeAttrID, const CUkernelNodeAttrValue*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphKernelNodeSetAttribute)(hNode, attr, value)
-    return err
-{{endif}}
-
-{{if 'cuGraphDebugDotPrint' in found_functions}}
-
-cdef CUresult _cuGraphDebugDotPrint(CUgraph hGraph, const char* path, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphDebugDotPrint
-    cuPythonInit()
-    if __cuGraphDebugDotPrint == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphDebugDotPrint" not found')
-    err = (<CUresult (*)(CUgraph, const char*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphDebugDotPrint)(hGraph, path, flags)
-    return err
-{{endif}}
-
-{{if 'cuUserObjectCreate' in found_functions}}
-
-cdef CUresult _cuUserObjectCreate(CUuserObject* object_out, void* ptr, CUhostFn destroy, unsigned int initialRefcount, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuUserObjectCreate
-    cuPythonInit()
-    if __cuUserObjectCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuUserObjectCreate" not found')
-    err = (<CUresult (*)(CUuserObject*, void*, CUhostFn, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuUserObjectCreate)(object_out, ptr, destroy, initialRefcount, flags)
-    return err
-{{endif}}
-
-{{if 'cuUserObjectRetain' in found_functions}}
-
-cdef CUresult _cuUserObjectRetain(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuUserObjectRetain
-    cuPythonInit()
-    if __cuUserObjectRetain == NULL:
-        with gil:
-            raise RuntimeError('Function "cuUserObjectRetain" not found')
-    err = (<CUresult (*)(CUuserObject, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuUserObjectRetain)(object, count)
-    return err
-{{endif}}
-
-{{if 'cuUserObjectRelease' in found_functions}}
-
-cdef CUresult _cuUserObjectRelease(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuUserObjectRelease
-    cuPythonInit()
-    if __cuUserObjectRelease == NULL:
-        with gil:
-            raise RuntimeError('Function "cuUserObjectRelease" not found')
-    err = (<CUresult (*)(CUuserObject, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuUserObjectRelease)(object, count)
-    return err
-{{endif}}
-
-{{if 'cuGraphRetainUserObject' in found_functions}}
-
-cdef CUresult _cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphRetainUserObject
-    cuPythonInit()
-    if __cuGraphRetainUserObject == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphRetainUserObject" not found')
-    err = (<CUresult (*)(CUgraph, CUuserObject, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphRetainUserObject)(graph, object, count, flags)
-    return err
-{{endif}}
-
-{{if 'cuGraphReleaseUserObject' in found_functions}}
-
-cdef CUresult _cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphReleaseUserObject
-    cuPythonInit()
-    if __cuGraphReleaseUserObject == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphReleaseUserObject" not found')
-    err = (<CUresult (*)(CUgraph, CUuserObject, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphReleaseUserObject)(graph, object, count)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddNode' in found_functions}}
-
-cdef CUresult _cuGraphAddNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddNode
-    cuPythonInit()
-    if __cuGraphAddNode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddNode" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, size_t, CUgraphNodeParams*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddNode)(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphAddNode_v2' in found_functions}}
-
-cdef CUresult _cuGraphAddNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphAddNode_v2
-    cuPythonInit()
-    if __cuGraphAddNode_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphAddNode_v2" not found')
-    err = (<CUresult (*)(CUgraphNode*, CUgraph, const CUgraphNode*, const CUgraphEdgeData*, size_t, CUgraphNodeParams*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphAddNode_v2)(phGraphNode, hGraph, dependencies, dependencyData, numDependencies, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphNodeSetParams
-    cuPythonInit()
-    if __cuGraphNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphNode, CUgraphNodeParams*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphNodeSetParams)(hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphExecNodeSetParams' in found_functions}}
-
-cdef CUresult _cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphExecNodeSetParams
-    cuPythonInit()
-    if __cuGraphExecNodeSetParams == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphExecNodeSetParams" not found')
-    err = (<CUresult (*)(CUgraphExec, CUgraphNode, CUgraphNodeParams*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphExecNodeSetParams)(hGraphExec, hNode, nodeParams)
-    return err
-{{endif}}
-
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}
-
-cdef CUresult _cuGraphConditionalHandleCreate(CUgraphConditionalHandle* pHandle_out, CUgraph hGraph, CUcontext ctx, unsigned int defaultLaunchValue, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphConditionalHandleCreate
-    cuPythonInit()
-    if __cuGraphConditionalHandleCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphConditionalHandleCreate" not found')
-    err = (<CUresult (*)(CUgraphConditionalHandle*, CUgraph, CUcontext, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphConditionalHandleCreate)(pHandle_out, hGraph, ctx, defaultLaunchValue, flags)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxActiveBlocksPerMultiprocessor
-    cuPythonInit()
-    if __cuOccupancyMaxActiveBlocksPerMultiprocessor == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxActiveBlocksPerMultiprocessor" not found')
-    err = (<CUresult (*)(int*, CUfunction, int, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxActiveBlocksPerMultiprocessor)(numBlocks, func, blockSize, dynamicSMemSize)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-    cuPythonInit()
-    if __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags" not found')
-    err = (<CUresult (*)(int*, CUfunction, int, size_t, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags)(numBlocks, func, blockSize, dynamicSMemSize, flags)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxPotentialBlockSize
-    cuPythonInit()
-    if __cuOccupancyMaxPotentialBlockSize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxPotentialBlockSize" not found')
-    err = (<CUresult (*)(int*, int*, CUfunction, CUoccupancyB2DSize, size_t, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxPotentialBlockSize)(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxPotentialBlockSizeWithFlags
-    cuPythonInit()
-    if __cuOccupancyMaxPotentialBlockSizeWithFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxPotentialBlockSizeWithFlags" not found')
-    err = (<CUresult (*)(int*, int*, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxPotentialBlockSizeWithFlags)(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef CUresult _cuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyAvailableDynamicSMemPerBlock
-    cuPythonInit()
-    if __cuOccupancyAvailableDynamicSMemPerBlock == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyAvailableDynamicSMemPerBlock" not found')
-    err = (<CUresult (*)(size_t*, CUfunction, int, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyAvailableDynamicSMemPerBlock)(dynamicSmemSize, func, numBlocks, blockSize)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxPotentialClusterSize(int* clusterSize, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxPotentialClusterSize
-    cuPythonInit()
-    if __cuOccupancyMaxPotentialClusterSize == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxPotentialClusterSize" not found')
-    err = (<CUresult (*)(int*, CUfunction, const CUlaunchConfig*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxPotentialClusterSize)(clusterSize, func, config)
-    return err
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-
-cdef CUresult _cuOccupancyMaxActiveClusters(int* numClusters, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuOccupancyMaxActiveClusters
-    cuPythonInit()
-    if __cuOccupancyMaxActiveClusters == NULL:
-        with gil:
-            raise RuntimeError('Function "cuOccupancyMaxActiveClusters" not found')
-    err = (<CUresult (*)(int*, CUfunction, const CUlaunchConfig*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuOccupancyMaxActiveClusters)(numClusters, func, config)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetArray' in found_functions}}
-
-cdef CUresult _cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetArray
-    cuPythonInit()
-    if __cuTexRefSetArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetArray" not found')
-    err = (<CUresult (*)(CUtexref, CUarray, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetArray)(hTexRef, hArray, Flags)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetMipmappedArray
-    cuPythonInit()
-    if __cuTexRefSetMipmappedArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetMipmappedArray" not found')
-    err = (<CUresult (*)(CUtexref, CUmipmappedArray, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetMipmappedArray)(hTexRef, hMipmappedArray, Flags)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetAddress_v2' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetAddress_v2
-    cuPythonInit()
-    if __cuTexRefSetAddress_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetAddress_v2" not found')
-    err = (<CUresult (*)(size_t*, CUtexref, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetAddress_v2)(ByteOffset, hTexRef, dptr, numbytes)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetAddress2D_v3
-    cuPythonInit()
-    if __cuTexRefSetAddress2D_v3 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetAddress2D_v3" not found')
-    err = (<CUresult (*)(CUtexref, const CUDA_ARRAY_DESCRIPTOR*, CUdeviceptr, size_t) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetAddress2D_v3)(hTexRef, desc, dptr, Pitch)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetFormat' in found_functions}}
-
-cdef CUresult _cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetFormat
-    cuPythonInit()
-    if __cuTexRefSetFormat == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetFormat" not found')
-    err = (<CUresult (*)(CUtexref, CUarray_format, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetFormat)(hTexRef, fmt, NumPackedComponents)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetAddressMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetAddressMode
-    cuPythonInit()
-    if __cuTexRefSetAddressMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetAddressMode" not found')
-    err = (<CUresult (*)(CUtexref, int, CUaddress_mode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetAddressMode)(hTexRef, dim, am)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetFilterMode
-    cuPythonInit()
-    if __cuTexRefSetFilterMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetFilterMode" not found')
-    err = (<CUresult (*)(CUtexref, CUfilter_mode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetFilterMode)(hTexRef, fm)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetMipmapFilterMode
-    cuPythonInit()
-    if __cuTexRefSetMipmapFilterMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetMipmapFilterMode" not found')
-    err = (<CUresult (*)(CUtexref, CUfilter_mode) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetMipmapFilterMode)(hTexRef, fm)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetMipmapLevelBias
-    cuPythonInit()
-    if __cuTexRefSetMipmapLevelBias == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetMipmapLevelBias" not found')
-    err = (<CUresult (*)(CUtexref, float) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetMipmapLevelBias)(hTexRef, bias)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult _cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetMipmapLevelClamp
-    cuPythonInit()
-    if __cuTexRefSetMipmapLevelClamp == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetMipmapLevelClamp" not found')
-    err = (<CUresult (*)(CUtexref, float, float) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetMipmapLevelClamp)(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-
-cdef CUresult _cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetMaxAnisotropy
-    cuPythonInit()
-    if __cuTexRefSetMaxAnisotropy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetMaxAnisotropy" not found')
-    err = (<CUresult (*)(CUtexref, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetMaxAnisotropy)(hTexRef, maxAniso)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetBorderColor' in found_functions}}
-
-cdef CUresult _cuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetBorderColor
-    cuPythonInit()
-    if __cuTexRefSetBorderColor == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetBorderColor" not found')
-    err = (<CUresult (*)(CUtexref, float*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetBorderColor)(hTexRef, pBorderColor)
-    return err
-{{endif}}
-
-{{if 'cuTexRefSetFlags' in found_functions}}
-
-cdef CUresult _cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefSetFlags
-    cuPythonInit()
-    if __cuTexRefSetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefSetFlags" not found')
-    err = (<CUresult (*)(CUtexref, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefSetFlags)(hTexRef, Flags)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetAddress_v2' in found_functions}}
-
-cdef CUresult _cuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetAddress_v2
-    cuPythonInit()
-    if __cuTexRefGetAddress_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetAddress_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetAddress_v2)(pdptr, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetArray' in found_functions}}
-
-cdef CUresult _cuTexRefGetArray(CUarray* phArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetArray
-    cuPythonInit()
-    if __cuTexRefGetArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetArray" not found')
-    err = (<CUresult (*)(CUarray*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetArray)(phArray, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetMipmappedArray
-    cuPythonInit()
-    if __cuTexRefGetMipmappedArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetMipmappedArray" not found')
-    err = (<CUresult (*)(CUmipmappedArray*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetMipmappedArray)(phMipmappedArray, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetAddressMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetAddressMode
-    cuPythonInit()
-    if __cuTexRefGetAddressMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetAddressMode" not found')
-    err = (<CUresult (*)(CUaddress_mode*, CUtexref, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetAddressMode)(pam, hTexRef, dim)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetFilterMode
-    cuPythonInit()
-    if __cuTexRefGetFilterMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetFilterMode" not found')
-    err = (<CUresult (*)(CUfilter_mode*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetFilterMode)(pfm, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetFormat' in found_functions}}
-
-cdef CUresult _cuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetFormat
-    cuPythonInit()
-    if __cuTexRefGetFormat == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetFormat" not found')
-    err = (<CUresult (*)(CUarray_format*, int*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetFormat)(pFormat, pNumChannels, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetMipmapFilterMode
-    cuPythonInit()
-    if __cuTexRefGetMipmapFilterMode == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetMipmapFilterMode" not found')
-    err = (<CUresult (*)(CUfilter_mode*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetMipmapFilterMode)(pfm, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetMipmapLevelBias
-    cuPythonInit()
-    if __cuTexRefGetMipmapLevelBias == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetMipmapLevelBias" not found')
-    err = (<CUresult (*)(float*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetMipmapLevelBias)(pbias, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult _cuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetMipmapLevelClamp
-    cuPythonInit()
-    if __cuTexRefGetMipmapLevelClamp == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetMipmapLevelClamp" not found')
-    err = (<CUresult (*)(float*, float*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetMipmapLevelClamp)(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-
-cdef CUresult _cuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetMaxAnisotropy
-    cuPythonInit()
-    if __cuTexRefGetMaxAnisotropy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetMaxAnisotropy" not found')
-    err = (<CUresult (*)(int*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetMaxAnisotropy)(pmaxAniso, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetBorderColor' in found_functions}}
-
-cdef CUresult _cuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetBorderColor
-    cuPythonInit()
-    if __cuTexRefGetBorderColor == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetBorderColor" not found')
-    err = (<CUresult (*)(float*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetBorderColor)(pBorderColor, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefGetFlags' in found_functions}}
-
-cdef CUresult _cuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefGetFlags
-    cuPythonInit()
-    if __cuTexRefGetFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefGetFlags" not found')
-    err = (<CUresult (*)(unsigned int*, CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefGetFlags)(pFlags, hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefCreate' in found_functions}}
-
-cdef CUresult _cuTexRefCreate(CUtexref* pTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefCreate
-    cuPythonInit()
-    if __cuTexRefCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefCreate" not found')
-    err = (<CUresult (*)(CUtexref*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefCreate)(pTexRef)
-    return err
-{{endif}}
-
-{{if 'cuTexRefDestroy' in found_functions}}
-
-cdef CUresult _cuTexRefDestroy(CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexRefDestroy
-    cuPythonInit()
-    if __cuTexRefDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexRefDestroy" not found')
-    err = (<CUresult (*)(CUtexref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexRefDestroy)(hTexRef)
-    return err
-{{endif}}
-
-{{if 'cuSurfRefSetArray' in found_functions}}
-
-cdef CUresult _cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSurfRefSetArray
-    cuPythonInit()
-    if __cuSurfRefSetArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSurfRefSetArray" not found')
-    err = (<CUresult (*)(CUsurfref, CUarray, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSurfRefSetArray)(hSurfRef, hArray, Flags)
-    return err
-{{endif}}
-
-{{if 'cuSurfRefGetArray' in found_functions}}
-
-cdef CUresult _cuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSurfRefGetArray
-    cuPythonInit()
-    if __cuSurfRefGetArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSurfRefGetArray" not found')
-    err = (<CUresult (*)(CUarray*, CUsurfref) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSurfRefGetArray)(phArray, hSurfRef)
-    return err
-{{endif}}
-
-{{if 'cuTexObjectCreate' in found_functions}}
-
-cdef CUresult _cuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexObjectCreate
-    cuPythonInit()
-    if __cuTexObjectCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexObjectCreate" not found')
-    err = (<CUresult (*)(CUtexObject*, const CUDA_RESOURCE_DESC*, const CUDA_TEXTURE_DESC*, const CUDA_RESOURCE_VIEW_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexObjectCreate)(pTexObject, pResDesc, pTexDesc, pResViewDesc)
-    return err
-{{endif}}
-
-{{if 'cuTexObjectDestroy' in found_functions}}
-
-cdef CUresult _cuTexObjectDestroy(CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexObjectDestroy
-    cuPythonInit()
-    if __cuTexObjectDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexObjectDestroy" not found')
-    err = (<CUresult (*)(CUtexObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexObjectDestroy)(texObject)
-    return err
-{{endif}}
-
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexObjectGetResourceDesc
-    cuPythonInit()
-    if __cuTexObjectGetResourceDesc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexObjectGetResourceDesc" not found')
-    err = (<CUresult (*)(CUDA_RESOURCE_DESC*, CUtexObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexObjectGetResourceDesc)(pResDesc, texObject)
-    return err
-{{endif}}
-
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexObjectGetTextureDesc
-    cuPythonInit()
-    if __cuTexObjectGetTextureDesc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexObjectGetTextureDesc" not found')
-    err = (<CUresult (*)(CUDA_TEXTURE_DESC*, CUtexObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexObjectGetTextureDesc)(pTexDesc, texObject)
-    return err
-{{endif}}
-
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-
-cdef CUresult _cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTexObjectGetResourceViewDesc
-    cuPythonInit()
-    if __cuTexObjectGetResourceViewDesc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTexObjectGetResourceViewDesc" not found')
-    err = (<CUresult (*)(CUDA_RESOURCE_VIEW_DESC*, CUtexObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTexObjectGetResourceViewDesc)(pResViewDesc, texObject)
-    return err
-{{endif}}
-
-{{if 'cuSurfObjectCreate' in found_functions}}
-
-cdef CUresult _cuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSurfObjectCreate
-    cuPythonInit()
-    if __cuSurfObjectCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSurfObjectCreate" not found')
-    err = (<CUresult (*)(CUsurfObject*, const CUDA_RESOURCE_DESC*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSurfObjectCreate)(pSurfObject, pResDesc)
-    return err
-{{endif}}
-
-{{if 'cuSurfObjectDestroy' in found_functions}}
-
-cdef CUresult _cuSurfObjectDestroy(CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSurfObjectDestroy
-    cuPythonInit()
-    if __cuSurfObjectDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSurfObjectDestroy" not found')
-    err = (<CUresult (*)(CUsurfObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSurfObjectDestroy)(surfObject)
-    return err
-{{endif}}
-
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult _cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuSurfObjectGetResourceDesc
-    cuPythonInit()
-    if __cuSurfObjectGetResourceDesc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuSurfObjectGetResourceDesc" not found')
-    err = (<CUresult (*)(CUDA_RESOURCE_DESC*, CUsurfObject) except ?CUDA_ERROR_NOT_FOUND nogil> __cuSurfObjectGetResourceDesc)(pResDesc, surfObject)
-    return err
-{{endif}}
-
-{{if 'cuTensorMapEncodeTiled' in found_functions}}
-
-cdef CUresult _cuTensorMapEncodeTiled(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const cuuint32_t* boxDim, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTensorMapEncodeTiled
-    cuPythonInit()
-    if __cuTensorMapEncodeTiled == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTensorMapEncodeTiled" not found')
-    err = (<CUresult (*)(CUtensorMap*, CUtensorMapDataType, cuuint32_t, void*, const cuuint64_t*, const cuuint64_t*, const cuuint32_t*, const cuuint32_t*, CUtensorMapInterleave, CUtensorMapSwizzle, CUtensorMapL2promotion, CUtensorMapFloatOOBfill) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTensorMapEncodeTiled)(tensorMap, tensorDataType, tensorRank, globalAddress, globalDim, globalStrides, boxDim, elementStrides, interleave, swizzle, l2Promotion, oobFill)
-    return err
-{{endif}}
-
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}
-
-cdef CUresult _cuTensorMapEncodeIm2col(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const int* pixelBoxLowerCorner, const int* pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTensorMapEncodeIm2col
-    cuPythonInit()
-    if __cuTensorMapEncodeIm2col == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTensorMapEncodeIm2col" not found')
-    err = (<CUresult (*)(CUtensorMap*, CUtensorMapDataType, cuuint32_t, void*, const cuuint64_t*, const cuuint64_t*, const int*, const int*, cuuint32_t, cuuint32_t, const cuuint32_t*, CUtensorMapInterleave, CUtensorMapSwizzle, CUtensorMapL2promotion, CUtensorMapFloatOOBfill) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTensorMapEncodeIm2col)(tensorMap, tensorDataType, tensorRank, globalAddress, globalDim, globalStrides, pixelBoxLowerCorner, pixelBoxUpperCorner, channelsPerPixel, pixelsPerColumn, elementStrides, interleave, swizzle, l2Promotion, oobFill)
-    return err
-{{endif}}
-
-{{if 'cuTensorMapReplaceAddress' in found_functions}}
-
-cdef CUresult _cuTensorMapReplaceAddress(CUtensorMap* tensorMap, void* globalAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuTensorMapReplaceAddress
-    cuPythonInit()
-    if __cuTensorMapReplaceAddress == NULL:
-        with gil:
-            raise RuntimeError('Function "cuTensorMapReplaceAddress" not found')
-    err = (<CUresult (*)(CUtensorMap*, void*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuTensorMapReplaceAddress)(tensorMap, globalAddress)
-    return err
-{{endif}}
-
-{{if 'cuDeviceCanAccessPeer' in found_functions}}
-
-cdef CUresult _cuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceCanAccessPeer
-    cuPythonInit()
-    if __cuDeviceCanAccessPeer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceCanAccessPeer" not found')
-    err = (<CUresult (*)(int*, CUdevice, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceCanAccessPeer)(canAccessPeer, dev, peerDev)
-    return err
-{{endif}}
-
-{{if 'cuCtxEnablePeerAccess' in found_functions}}
-
-cdef CUresult _cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxEnablePeerAccess
-    cuPythonInit()
-    if __cuCtxEnablePeerAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxEnablePeerAccess" not found')
-    err = (<CUresult (*)(CUcontext, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxEnablePeerAccess)(peerContext, Flags)
-    return err
-{{endif}}
-
-{{if 'cuCtxDisablePeerAccess' in found_functions}}
-
-cdef CUresult _cuCtxDisablePeerAccess(CUcontext peerContext) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxDisablePeerAccess
-    cuPythonInit()
-    if __cuCtxDisablePeerAccess == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxDisablePeerAccess" not found')
-    err = (<CUresult (*)(CUcontext) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxDisablePeerAccess)(peerContext)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}
-
-cdef CUresult _cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetP2PAttribute
-    cuPythonInit()
-    if __cuDeviceGetP2PAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetP2PAttribute" not found')
-    err = (<CUresult (*)(int*, CUdevice_P2PAttribute, CUdevice, CUdevice) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetP2PAttribute)(value, attrib, srcDevice, dstDevice)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsUnregisterResource' in found_functions}}
-
-cdef CUresult _cuGraphicsUnregisterResource(CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsUnregisterResource
-    cuPythonInit()
-    if __cuGraphicsUnregisterResource == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsUnregisterResource" not found')
-    err = (<CUresult (*)(CUgraphicsResource) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsUnregisterResource)(resource)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef CUresult _cuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsSubResourceGetMappedArray
-    cuPythonInit()
-    if __cuGraphicsSubResourceGetMappedArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsSubResourceGetMappedArray" not found')
-    err = (<CUresult (*)(CUarray*, CUgraphicsResource, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsSubResourceGetMappedArray)(pArray, resource, arrayIndex, mipLevel)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsResourceGetMappedMipmappedArray
-    cuPythonInit()
-    if __cuGraphicsResourceGetMappedMipmappedArray == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsResourceGetMappedMipmappedArray" not found')
-    err = (<CUresult (*)(CUmipmappedArray*, CUgraphicsResource) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsResourceGetMappedMipmappedArray)(pMipmappedArray, resource)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsResourceGetMappedPointer_v2
-    cuPythonInit()
-    if __cuGraphicsResourceGetMappedPointer_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsResourceGetMappedPointer_v2" not found')
-    err = (<CUresult (*)(CUdeviceptr*, size_t*, CUgraphicsResource) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsResourceGetMappedPointer_v2)(pDevPtr, pSize, resource)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-
-cdef CUresult _cuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsResourceSetMapFlags_v2
-    cuPythonInit()
-    if __cuGraphicsResourceSetMapFlags_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsResourceSetMapFlags_v2" not found')
-    err = (<CUresult (*)(CUgraphicsResource, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsResourceSetMapFlags_v2)(resource, flags)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsMapResources' in found_functions}}
-
-cdef CUresult _cuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsMapResources
-    cuPythonInit()
-    if __cuGraphicsMapResources == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsMapResources" not found')
-    err = (<CUresult (*)(unsigned int, CUgraphicsResource*, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsMapResources)(count, resources, hStream)
-    return err
-{{endif}}
-
-{{if 'cuGraphicsUnmapResources' in found_functions}}
-
-cdef CUresult _cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsUnmapResources
-    cuPythonInit()
-    if __cuGraphicsUnmapResources == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsUnmapResources" not found')
-    err = (<CUresult (*)(unsigned int, CUgraphicsResource*, CUstream) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsUnmapResources)(count, resources, hStream)
-    return err
-{{endif}}
-
-{{if 'cuGetProcAddress_v2' in found_functions}}
-
-cdef CUresult _cuGetProcAddress_v2(const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGetProcAddress_v2
-    cuPythonInit()
-    if __cuGetProcAddress_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGetProcAddress_v2" not found')
-    err = (<CUresult (*)(const char*, void**, int, cuuint64_t, CUdriverProcAddressQueryResult*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGetProcAddress_v2)(symbol, pfn, cudaVersion, flags, symbolStatus)
-    return err
-{{endif}}
-
-{{if 'cuCoredumpGetAttribute' in found_functions}}
-
-cdef CUresult _cuCoredumpGetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCoredumpGetAttribute
-    cuPythonInit()
-    if __cuCoredumpGetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCoredumpGetAttribute" not found')
-    err = (<CUresult (*)(CUcoredumpSettings, void*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCoredumpGetAttribute)(attrib, value, size)
-    return err
-{{endif}}
-
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-
-cdef CUresult _cuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCoredumpGetAttributeGlobal
-    cuPythonInit()
-    if __cuCoredumpGetAttributeGlobal == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCoredumpGetAttributeGlobal" not found')
-    err = (<CUresult (*)(CUcoredumpSettings, void*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCoredumpGetAttributeGlobal)(attrib, value, size)
-    return err
-{{endif}}
-
-{{if 'cuCoredumpSetAttribute' in found_functions}}
-
-cdef CUresult _cuCoredumpSetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCoredumpSetAttribute
-    cuPythonInit()
-    if __cuCoredumpSetAttribute == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCoredumpSetAttribute" not found')
-    err = (<CUresult (*)(CUcoredumpSettings, void*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCoredumpSetAttribute)(attrib, value, size)
-    return err
-{{endif}}
-
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-
-cdef CUresult _cuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCoredumpSetAttributeGlobal
-    cuPythonInit()
-    if __cuCoredumpSetAttributeGlobal == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCoredumpSetAttributeGlobal" not found')
-    err = (<CUresult (*)(CUcoredumpSettings, void*, size_t*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCoredumpSetAttributeGlobal)(attrib, value, size)
-    return err
-{{endif}}
-
-{{if 'cuGetExportTable' in found_functions}}
-
-cdef CUresult _cuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGetExportTable
-    cuPythonInit()
-    if __cuGetExportTable == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGetExportTable" not found')
-    err = (<CUresult (*)(const void**, const CUuuid*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGetExportTable)(ppExportTable, pExportTableId)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxCreate' in found_functions}}
-
-cdef CUresult _cuGreenCtxCreate(CUgreenCtx* phCtx, CUdevResourceDesc desc, CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxCreate
-    cuPythonInit()
-    if __cuGreenCtxCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxCreate" not found')
-    err = (<CUresult (*)(CUgreenCtx*, CUdevResourceDesc, CUdevice, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxCreate)(phCtx, desc, dev, flags)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxDestroy' in found_functions}}
-
-cdef CUresult _cuGreenCtxDestroy(CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxDestroy
-    cuPythonInit()
-    if __cuGreenCtxDestroy == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxDestroy" not found')
-    err = (<CUresult (*)(CUgreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxDestroy)(hCtx)
-    return err
-{{endif}}
-
-{{if 'cuCtxFromGreenCtx' in found_functions}}
-
-cdef CUresult _cuCtxFromGreenCtx(CUcontext* pContext, CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxFromGreenCtx
-    cuPythonInit()
-    if __cuCtxFromGreenCtx == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxFromGreenCtx" not found')
-    err = (<CUresult (*)(CUcontext*, CUgreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxFromGreenCtx)(pContext, hCtx)
-    return err
-{{endif}}
-
-{{if 'cuDeviceGetDevResource' in found_functions}}
-
-cdef CUresult _cuDeviceGetDevResource(CUdevice device, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDeviceGetDevResource
-    cuPythonInit()
-    if __cuDeviceGetDevResource == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDeviceGetDevResource" not found')
-    err = (<CUresult (*)(CUdevice, CUdevResource*, CUdevResourceType) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDeviceGetDevResource)(device, resource, typename)
-    return err
-{{endif}}
-
-{{if 'cuCtxGetDevResource' in found_functions}}
-
-cdef CUresult _cuCtxGetDevResource(CUcontext hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuCtxGetDevResource
-    cuPythonInit()
-    if __cuCtxGetDevResource == NULL:
-        with gil:
-            raise RuntimeError('Function "cuCtxGetDevResource" not found')
-    err = (<CUresult (*)(CUcontext, CUdevResource*, CUdevResourceType) except ?CUDA_ERROR_NOT_FOUND nogil> __cuCtxGetDevResource)(hCtx, resource, typename)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxGetDevResource' in found_functions}}
-
-cdef CUresult _cuGreenCtxGetDevResource(CUgreenCtx hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxGetDevResource
-    cuPythonInit()
-    if __cuGreenCtxGetDevResource == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxGetDevResource" not found')
-    err = (<CUresult (*)(CUgreenCtx, CUdevResource*, CUdevResourceType) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxGetDevResource)(hCtx, resource, typename)
-    return err
-{{endif}}
-
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}
-
-cdef CUresult _cuDevSmResourceSplitByCount(CUdevResource* result, unsigned int* nbGroups, const CUdevResource* input, CUdevResource* remaining, unsigned int useFlags, unsigned int minCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevSmResourceSplitByCount
-    cuPythonInit()
-    if __cuDevSmResourceSplitByCount == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevSmResourceSplitByCount" not found')
-    err = (<CUresult (*)(CUdevResource*, unsigned int*, const CUdevResource*, CUdevResource*, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevSmResourceSplitByCount)(result, nbGroups, input, remaining, useFlags, minCount)
-    return err
-{{endif}}
-
-{{if 'cuDevResourceGenerateDesc' in found_functions}}
-
-cdef CUresult _cuDevResourceGenerateDesc(CUdevResourceDesc* phDesc, CUdevResource* resources, unsigned int nbResources) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuDevResourceGenerateDesc
-    cuPythonInit()
-    if __cuDevResourceGenerateDesc == NULL:
-        with gil:
-            raise RuntimeError('Function "cuDevResourceGenerateDesc" not found')
-    err = (<CUresult (*)(CUdevResourceDesc*, CUdevResource*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuDevResourceGenerateDesc)(phDesc, resources, nbResources)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxRecordEvent' in found_functions}}
-
-cdef CUresult _cuGreenCtxRecordEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxRecordEvent
-    cuPythonInit()
-    if __cuGreenCtxRecordEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxRecordEvent" not found')
-    err = (<CUresult (*)(CUgreenCtx, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxRecordEvent)(hCtx, hEvent)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxWaitEvent' in found_functions}}
-
-cdef CUresult _cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxWaitEvent
-    cuPythonInit()
-    if __cuGreenCtxWaitEvent == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxWaitEvent" not found')
-    err = (<CUresult (*)(CUgreenCtx, CUevent) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxWaitEvent)(hCtx, hEvent)
-    return err
-{{endif}}
-
-{{if 'cuStreamGetGreenCtx' in found_functions}}
-
-cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuStreamGetGreenCtx
-    cuPythonInit()
-    if __cuStreamGetGreenCtx == NULL:
-        with gil:
-            raise RuntimeError('Function "cuStreamGetGreenCtx" not found')
-    err = (<CUresult (*)(CUstream, CUgreenCtx*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuStreamGetGreenCtx)(hStream, phCtx)
-    return err
-{{endif}}
-
-{{if 'cuGreenCtxStreamCreate' in found_functions}}
-
-cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGreenCtxStreamCreate
-    cuPythonInit()
-    if __cuGreenCtxStreamCreate == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGreenCtxStreamCreate" not found')
-    err = (<CUresult (*)(CUstream*, CUgreenCtx, unsigned int, int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGreenCtxStreamCreate)(phStream, greenCtx, flags, priority)
-    return err
-{{endif}}
-
-{{if 'cuProfilerStart' in found_functions}}
-
-cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuProfilerStart
-    cuPythonInit()
-    if __cuProfilerStart == NULL:
-        with gil:
-            raise RuntimeError('Function "cuProfilerStart" not found')
-    err = (<CUresult (*)() except ?CUDA_ERROR_NOT_FOUND nogil> __cuProfilerStart)()
-    return err
-{{endif}}
-
-{{if 'cuProfilerStop' in found_functions}}
-
-cdef CUresult _cuProfilerStop() except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuProfilerStop
-    cuPythonInit()
-    if __cuProfilerStop == NULL:
-        with gil:
-            raise RuntimeError('Function "cuProfilerStop" not found')
-    err = (<CUresult (*)() except ?CUDA_ERROR_NOT_FOUND nogil> __cuProfilerStop)()
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsEGLRegisterImage(CUgraphicsResource* pCudaResource, EGLImageKHR image, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsEGLRegisterImage
-    cuPythonInit()
-    if __cuGraphicsEGLRegisterImage == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsEGLRegisterImage" not found')
-    err = (<CUresult (*)(CUgraphicsResource*, EGLImageKHR, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsEGLRegisterImage)(pCudaResource, image, flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamConsumerConnect
-    cuPythonInit()
-    if __cuEGLStreamConsumerConnect == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamConsumerConnect" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, EGLStreamKHR) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamConsumerConnect)(conn, stream)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerConnectWithFlags(CUeglStreamConnection* conn, EGLStreamKHR stream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamConsumerConnectWithFlags
-    cuPythonInit()
-    if __cuEGLStreamConsumerConnectWithFlags == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamConsumerConnectWithFlags" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, EGLStreamKHR, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamConsumerConnectWithFlags)(conn, stream, flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamConsumerDisconnect
-    cuPythonInit()
-    if __cuEGLStreamConsumerDisconnect == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamConsumerDisconnect" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamConsumerDisconnect)(conn)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerAcquireFrame(CUeglStreamConnection* conn, CUgraphicsResource* pCudaResource, CUstream* pStream, unsigned int timeout) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamConsumerAcquireFrame
-    cuPythonInit()
-    if __cuEGLStreamConsumerAcquireFrame == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamConsumerAcquireFrame" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, CUgraphicsResource*, CUstream*, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamConsumerAcquireFrame)(conn, pCudaResource, pStream, timeout)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamConsumerReleaseFrame(CUeglStreamConnection* conn, CUgraphicsResource pCudaResource, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamConsumerReleaseFrame
-    cuPythonInit()
-    if __cuEGLStreamConsumerReleaseFrame == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamConsumerReleaseFrame" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, CUgraphicsResource, CUstream*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamConsumerReleaseFrame)(conn, pCudaResource, pStream)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream, EGLint width, EGLint height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamProducerConnect
-    cuPythonInit()
-    if __cuEGLStreamProducerConnect == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamProducerConnect" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, EGLStreamKHR, EGLint, EGLint) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamProducerConnect)(conn, stream, width, height)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamProducerDisconnect
-    cuPythonInit()
-    if __cuEGLStreamProducerDisconnect == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamProducerDisconnect" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamProducerDisconnect)(conn)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerPresentFrame(CUeglStreamConnection* conn, CUeglFrame eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamProducerPresentFrame
-    cuPythonInit()
-    if __cuEGLStreamProducerPresentFrame == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamProducerPresentFrame" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, CUeglFrame, CUstream*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamProducerPresentFrame)(conn, eglframe, pStream)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEGLStreamProducerReturnFrame(CUeglStreamConnection* conn, CUeglFrame* eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEGLStreamProducerReturnFrame
-    cuPythonInit()
-    if __cuEGLStreamProducerReturnFrame == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEGLStreamProducerReturnFrame" not found')
-    err = (<CUresult (*)(CUeglStreamConnection*, CUeglFrame*, CUstream*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEGLStreamProducerReturnFrame)(conn, eglframe, pStream)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsResourceGetMappedEglFrame
-    cuPythonInit()
-    if __cuGraphicsResourceGetMappedEglFrame == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsResourceGetMappedEglFrame" not found')
-    err = (<CUresult (*)(CUeglFrame*, CUgraphicsResource, unsigned int, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsResourceGetMappedEglFrame)(eglFrame, resource, index, mipLevel)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuEventCreateFromEGLSync(CUevent* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuEventCreateFromEGLSync
-    cuPythonInit()
-    if __cuEventCreateFromEGLSync == NULL:
-        with gil:
-            raise RuntimeError('Function "cuEventCreateFromEGLSync" not found')
-    err = (<CUresult (*)(CUevent*, EGLSyncKHR, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuEventCreateFromEGLSync)(phEvent, eglSync, flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsGLRegisterBuffer
-    cuPythonInit()
-    if __cuGraphicsGLRegisterBuffer == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsGLRegisterBuffer" not found')
-    err = (<CUresult (*)(CUgraphicsResource*, GLuint, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsGLRegisterBuffer)(pCudaResource, buffer, Flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsGLRegisterImage
-    cuPythonInit()
-    if __cuGraphicsGLRegisterImage == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsGLRegisterImage" not found')
-    err = (<CUresult (*)(CUgraphicsResource*, GLuint, GLenum, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsGLRegisterImage)(pCudaResource, image, target, Flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGLGetDevices_v2
-    cuPythonInit()
-    if __cuGLGetDevices_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGLGetDevices_v2" not found')
-    err = (<CUresult (*)(unsigned int*, CUdevice*, unsigned int, CUGLDeviceList) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGLGetDevices_v2)(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, deviceList)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuVDPAUGetDevice(CUdevice* pDevice, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuVDPAUGetDevice
-    cuPythonInit()
-    if __cuVDPAUGetDevice == NULL:
-        with gil:
-            raise RuntimeError('Function "cuVDPAUGetDevice" not found')
-    err = (<CUresult (*)(CUdevice*, VdpDevice, VdpGetProcAddress*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuVDPAUGetDevice)(pDevice, vdpDevice, vdpGetProcAddress)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuVDPAUCtxCreate_v2(CUcontext* pCtx, unsigned int flags, CUdevice device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuVDPAUCtxCreate_v2
-    cuPythonInit()
-    if __cuVDPAUCtxCreate_v2 == NULL:
-        with gil:
-            raise RuntimeError('Function "cuVDPAUCtxCreate_v2" not found')
-    err = (<CUresult (*)(CUcontext*, unsigned int, CUdevice, VdpDevice, VdpGetProcAddress*) except ?CUDA_ERROR_NOT_FOUND nogil> __cuVDPAUCtxCreate_v2)(pCtx, flags, device, vdpDevice, vdpGetProcAddress)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResource, VdpVideoSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsVDPAURegisterVideoSurface
-    cuPythonInit()
-    if __cuGraphicsVDPAURegisterVideoSurface == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsVDPAURegisterVideoSurface" not found')
-    err = (<CUresult (*)(CUgraphicsResource*, VdpVideoSurface, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsVDPAURegisterVideoSurface)(pCudaResource, vdpSurface, flags)
-    return err
-{{endif}}
-
-{{if True}}
-
-cdef CUresult _cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    global __cuGraphicsVDPAURegisterOutputSurface
-    cuPythonInit()
-    if __cuGraphicsVDPAURegisterOutputSurface == NULL:
-        with gil:
-            raise RuntimeError('Function "cuGraphicsVDPAURegisterOutputSurface" not found')
-    err = (<CUresult (*)(CUgraphicsResource*, VdpOutputSurface, unsigned int) except ?CUDA_ERROR_NOT_FOUND nogil> __cuGraphicsVDPAURegisterOutputSurface)(pCudaResource, vdpSurface, flags)
-    return err
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
deleted file mode 100644
index 26d3a6ff..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pxd.in
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cuda.bindings.cynvrtc cimport *
-
-{{if 'nvrtcGetErrorString' in found_functions}}
-
-cdef const char* _nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil
-{{endif}}
-
-{{if 'nvrtcVersion' in found_functions}}
-
-cdef nvrtcResult _nvrtcVersion(int* major, int* minor) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNumSupportedArchs(int* numArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetSupportedArchs' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetSupportedArchs(int* supportedArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcCreateProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcDestroyProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcDestroyProgram(nvrtcProgram* prog) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcCompileProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetPTXSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetPTX' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetPTX(nvrtcProgram prog, char* ptx) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetCUBINSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetCUBIN' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetCUBIN(nvrtcProgram prog, char* cubin) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNVVMSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNVVMSize(nvrtcProgram prog, size_t* nvvmSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNVVM' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNVVM(nvrtcProgram prog, char* nvvm) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLTOIRSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLTOIRSize(nvrtcProgram prog, size_t* LTOIRSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLTOIR' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLTOIR(nvrtcProgram prog, char* LTOIR) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t* optixirSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetOptiXIR' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetOptiXIR(nvrtcProgram prog, char* optixir) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetProgramLogSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetProgramLog' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetProgramLog(nvrtcProgram prog, char* log) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcAddNameExpression' in found_functions}}
-
-cdef nvrtcResult _nvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLoweredName' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
diff --git a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
deleted file mode 100644
index bb4b4cdb..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/cynvrtc.pyx.in
+++ /dev/null
@@ -1,552 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-{{if 'Windows' == platform.system()}}
-import win32api
-import struct
-from pywintypes import error
-{{else}}
-cimport cuda.bindings._lib.dlfcn as dlfcn
-{{endif}}
-
-cdef bint __cuPythonInit = False
-{{if 'nvrtcGetErrorString' in found_functions}}cdef void *__nvrtcGetErrorString = NULL{{endif}}
-{{if 'nvrtcVersion' in found_functions}}cdef void *__nvrtcVersion = NULL{{endif}}
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}cdef void *__nvrtcGetNumSupportedArchs = NULL{{endif}}
-{{if 'nvrtcGetSupportedArchs' in found_functions}}cdef void *__nvrtcGetSupportedArchs = NULL{{endif}}
-{{if 'nvrtcCreateProgram' in found_functions}}cdef void *__nvrtcCreateProgram = NULL{{endif}}
-{{if 'nvrtcDestroyProgram' in found_functions}}cdef void *__nvrtcDestroyProgram = NULL{{endif}}
-{{if 'nvrtcCompileProgram' in found_functions}}cdef void *__nvrtcCompileProgram = NULL{{endif}}
-{{if 'nvrtcGetPTXSize' in found_functions}}cdef void *__nvrtcGetPTXSize = NULL{{endif}}
-{{if 'nvrtcGetPTX' in found_functions}}cdef void *__nvrtcGetPTX = NULL{{endif}}
-{{if 'nvrtcGetCUBINSize' in found_functions}}cdef void *__nvrtcGetCUBINSize = NULL{{endif}}
-{{if 'nvrtcGetCUBIN' in found_functions}}cdef void *__nvrtcGetCUBIN = NULL{{endif}}
-{{if 'nvrtcGetNVVMSize' in found_functions}}cdef void *__nvrtcGetNVVMSize = NULL{{endif}}
-{{if 'nvrtcGetNVVM' in found_functions}}cdef void *__nvrtcGetNVVM = NULL{{endif}}
-{{if 'nvrtcGetLTOIRSize' in found_functions}}cdef void *__nvrtcGetLTOIRSize = NULL{{endif}}
-{{if 'nvrtcGetLTOIR' in found_functions}}cdef void *__nvrtcGetLTOIR = NULL{{endif}}
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}cdef void *__nvrtcGetOptiXIRSize = NULL{{endif}}
-{{if 'nvrtcGetOptiXIR' in found_functions}}cdef void *__nvrtcGetOptiXIR = NULL{{endif}}
-{{if 'nvrtcGetProgramLogSize' in found_functions}}cdef void *__nvrtcGetProgramLogSize = NULL{{endif}}
-{{if 'nvrtcGetProgramLog' in found_functions}}cdef void *__nvrtcGetProgramLog = NULL{{endif}}
-{{if 'nvrtcAddNameExpression' in found_functions}}cdef void *__nvrtcAddNameExpression = NULL{{endif}}
-{{if 'nvrtcGetLoweredName' in found_functions}}cdef void *__nvrtcGetLoweredName = NULL{{endif}}
-
-cdef int cuPythonInit() except -1 nogil:
-    global __cuPythonInit
-    if __cuPythonInit:
-        return 0
-    __cuPythonInit = True
-
-    # Load library
-    {{if 'Windows' == platform.system()}}
-    LOAD_LIBRARY_SAFE_CURRENT_DIRS = 0x00002000
-    with gil:
-        try:
-            handle = win32api.LoadLibraryEx("nvrtc64_120_0.dll", 0, LOAD_LIBRARY_SAFE_CURRENT_DIRS)
-        except:
-            raise RuntimeError('Failed to LoadLibraryEx nvrtc64_120_0.dll')
-    {{else}}
-    handle = NULL
-    if handle == NULL:
-        handle = dlfcn.dlopen('libnvrtc.so.12', dlfcn.RTLD_NOW)
-    if handle == NULL:
-        with gil:
-            raise RuntimeError('Failed to dlopen libnvrtc.so.12')
-    {{endif}}
-
-
-    # Load function
-    {{if 'Windows' == platform.system()}}
-    with gil:
-        {{if 'nvrtcGetErrorString' in found_functions}}
-        try:
-            global __nvrtcGetErrorString
-            __nvrtcGetErrorString = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetErrorString')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcVersion' in found_functions}}
-        try:
-            global __nvrtcVersion
-            __nvrtcVersion = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcVersion')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-        try:
-            global __nvrtcGetNumSupportedArchs
-            __nvrtcGetNumSupportedArchs = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetNumSupportedArchs')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetSupportedArchs' in found_functions}}
-        try:
-            global __nvrtcGetSupportedArchs
-            __nvrtcGetSupportedArchs = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetSupportedArchs')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcCreateProgram' in found_functions}}
-        try:
-            global __nvrtcCreateProgram
-            __nvrtcCreateProgram = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcCreateProgram')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcDestroyProgram' in found_functions}}
-        try:
-            global __nvrtcDestroyProgram
-            __nvrtcDestroyProgram = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcDestroyProgram')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcCompileProgram' in found_functions}}
-        try:
-            global __nvrtcCompileProgram
-            __nvrtcCompileProgram = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcCompileProgram')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetPTXSize' in found_functions}}
-        try:
-            global __nvrtcGetPTXSize
-            __nvrtcGetPTXSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetPTXSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetPTX' in found_functions}}
-        try:
-            global __nvrtcGetPTX
-            __nvrtcGetPTX = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetPTX')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetCUBINSize' in found_functions}}
-        try:
-            global __nvrtcGetCUBINSize
-            __nvrtcGetCUBINSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetCUBINSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetCUBIN' in found_functions}}
-        try:
-            global __nvrtcGetCUBIN
-            __nvrtcGetCUBIN = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetCUBIN')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetNVVMSize' in found_functions}}
-        try:
-            global __nvrtcGetNVVMSize
-            __nvrtcGetNVVMSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetNVVMSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetNVVM' in found_functions}}
-        try:
-            global __nvrtcGetNVVM
-            __nvrtcGetNVVM = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetNVVM')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetLTOIRSize' in found_functions}}
-        try:
-            global __nvrtcGetLTOIRSize
-            __nvrtcGetLTOIRSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetLTOIRSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetLTOIR' in found_functions}}
-        try:
-            global __nvrtcGetLTOIR
-            __nvrtcGetLTOIR = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetLTOIR')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetOptiXIRSize' in found_functions}}
-        try:
-            global __nvrtcGetOptiXIRSize
-            __nvrtcGetOptiXIRSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetOptiXIRSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetOptiXIR' in found_functions}}
-        try:
-            global __nvrtcGetOptiXIR
-            __nvrtcGetOptiXIR = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetOptiXIR')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetProgramLogSize' in found_functions}}
-        try:
-            global __nvrtcGetProgramLogSize
-            __nvrtcGetProgramLogSize = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetProgramLogSize')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetProgramLog' in found_functions}}
-        try:
-            global __nvrtcGetProgramLog
-            __nvrtcGetProgramLog = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetProgramLog')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcAddNameExpression' in found_functions}}
-        try:
-            global __nvrtcAddNameExpression
-            __nvrtcAddNameExpression = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcAddNameExpression')
-        except:
-            pass
-        {{endif}}
-        {{if 'nvrtcGetLoweredName' in found_functions}}
-        try:
-            global __nvrtcGetLoweredName
-            __nvrtcGetLoweredName = <void*><unsigned long long>win32api.GetProcAddress(handle, 'nvrtcGetLoweredName')
-        except:
-            pass
-        {{endif}}
-
-    {{else}}
-    {{if 'nvrtcGetErrorString' in found_functions}}
-    global __nvrtcGetErrorString
-    __nvrtcGetErrorString = dlfcn.dlsym(handle, 'nvrtcGetErrorString')
-    {{endif}}
-    {{if 'nvrtcVersion' in found_functions}}
-    global __nvrtcVersion
-    __nvrtcVersion = dlfcn.dlsym(handle, 'nvrtcVersion')
-    {{endif}}
-    {{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-    global __nvrtcGetNumSupportedArchs
-    __nvrtcGetNumSupportedArchs = dlfcn.dlsym(handle, 'nvrtcGetNumSupportedArchs')
-    {{endif}}
-    {{if 'nvrtcGetSupportedArchs' in found_functions}}
-    global __nvrtcGetSupportedArchs
-    __nvrtcGetSupportedArchs = dlfcn.dlsym(handle, 'nvrtcGetSupportedArchs')
-    {{endif}}
-    {{if 'nvrtcCreateProgram' in found_functions}}
-    global __nvrtcCreateProgram
-    __nvrtcCreateProgram = dlfcn.dlsym(handle, 'nvrtcCreateProgram')
-    {{endif}}
-    {{if 'nvrtcDestroyProgram' in found_functions}}
-    global __nvrtcDestroyProgram
-    __nvrtcDestroyProgram = dlfcn.dlsym(handle, 'nvrtcDestroyProgram')
-    {{endif}}
-    {{if 'nvrtcCompileProgram' in found_functions}}
-    global __nvrtcCompileProgram
-    __nvrtcCompileProgram = dlfcn.dlsym(handle, 'nvrtcCompileProgram')
-    {{endif}}
-    {{if 'nvrtcGetPTXSize' in found_functions}}
-    global __nvrtcGetPTXSize
-    __nvrtcGetPTXSize = dlfcn.dlsym(handle, 'nvrtcGetPTXSize')
-    {{endif}}
-    {{if 'nvrtcGetPTX' in found_functions}}
-    global __nvrtcGetPTX
-    __nvrtcGetPTX = dlfcn.dlsym(handle, 'nvrtcGetPTX')
-    {{endif}}
-    {{if 'nvrtcGetCUBINSize' in found_functions}}
-    global __nvrtcGetCUBINSize
-    __nvrtcGetCUBINSize = dlfcn.dlsym(handle, 'nvrtcGetCUBINSize')
-    {{endif}}
-    {{if 'nvrtcGetCUBIN' in found_functions}}
-    global __nvrtcGetCUBIN
-    __nvrtcGetCUBIN = dlfcn.dlsym(handle, 'nvrtcGetCUBIN')
-    {{endif}}
-    {{if 'nvrtcGetNVVMSize' in found_functions}}
-    global __nvrtcGetNVVMSize
-    __nvrtcGetNVVMSize = dlfcn.dlsym(handle, 'nvrtcGetNVVMSize')
-    {{endif}}
-    {{if 'nvrtcGetNVVM' in found_functions}}
-    global __nvrtcGetNVVM
-    __nvrtcGetNVVM = dlfcn.dlsym(handle, 'nvrtcGetNVVM')
-    {{endif}}
-    {{if 'nvrtcGetLTOIRSize' in found_functions}}
-    global __nvrtcGetLTOIRSize
-    __nvrtcGetLTOIRSize = dlfcn.dlsym(handle, 'nvrtcGetLTOIRSize')
-    {{endif}}
-    {{if 'nvrtcGetLTOIR' in found_functions}}
-    global __nvrtcGetLTOIR
-    __nvrtcGetLTOIR = dlfcn.dlsym(handle, 'nvrtcGetLTOIR')
-    {{endif}}
-    {{if 'nvrtcGetOptiXIRSize' in found_functions}}
-    global __nvrtcGetOptiXIRSize
-    __nvrtcGetOptiXIRSize = dlfcn.dlsym(handle, 'nvrtcGetOptiXIRSize')
-    {{endif}}
-    {{if 'nvrtcGetOptiXIR' in found_functions}}
-    global __nvrtcGetOptiXIR
-    __nvrtcGetOptiXIR = dlfcn.dlsym(handle, 'nvrtcGetOptiXIR')
-    {{endif}}
-    {{if 'nvrtcGetProgramLogSize' in found_functions}}
-    global __nvrtcGetProgramLogSize
-    __nvrtcGetProgramLogSize = dlfcn.dlsym(handle, 'nvrtcGetProgramLogSize')
-    {{endif}}
-    {{if 'nvrtcGetProgramLog' in found_functions}}
-    global __nvrtcGetProgramLog
-    __nvrtcGetProgramLog = dlfcn.dlsym(handle, 'nvrtcGetProgramLog')
-    {{endif}}
-    {{if 'nvrtcAddNameExpression' in found_functions}}
-    global __nvrtcAddNameExpression
-    __nvrtcAddNameExpression = dlfcn.dlsym(handle, 'nvrtcAddNameExpression')
-    {{endif}}
-    {{if 'nvrtcGetLoweredName' in found_functions}}
-    global __nvrtcGetLoweredName
-    __nvrtcGetLoweredName = dlfcn.dlsym(handle, 'nvrtcGetLoweredName')
-    {{endif}}
-
-    {{endif}}
-
-{{if 'nvrtcGetErrorString' in found_functions}}
-
-cdef const char* _nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil:
-    global __nvrtcGetErrorString
-    cuPythonInit()
-    if __nvrtcGetErrorString == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetErrorString" not found')
-    err = (<const char* (*)(nvrtcResult) except ?NULL nogil> __nvrtcGetErrorString)(result)
-    return err
-{{endif}}
-
-{{if 'nvrtcVersion' in found_functions}}
-
-cdef nvrtcResult _nvrtcVersion(int* major, int* minor) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcVersion
-    cuPythonInit()
-    if __nvrtcVersion == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcVersion" not found')
-    err = (<nvrtcResult (*)(int*, int*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcVersion)(major, minor)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNumSupportedArchs(int* numArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetNumSupportedArchs
-    cuPythonInit()
-    if __nvrtcGetNumSupportedArchs == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetNumSupportedArchs" not found')
-    err = (<nvrtcResult (*)(int*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetNumSupportedArchs)(numArchs)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetSupportedArchs' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetSupportedArchs(int* supportedArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetSupportedArchs
-    cuPythonInit()
-    if __nvrtcGetSupportedArchs == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetSupportedArchs" not found')
-    err = (<nvrtcResult (*)(int*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetSupportedArchs)(supportedArchs)
-    return err
-{{endif}}
-
-{{if 'nvrtcCreateProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcCreateProgram
-    cuPythonInit()
-    if __nvrtcCreateProgram == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcCreateProgram" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram*, const char*, const char*, int, const char**, const char**) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcCreateProgram)(prog, src, name, numHeaders, headers, includeNames)
-    return err
-{{endif}}
-
-{{if 'nvrtcDestroyProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcDestroyProgram(nvrtcProgram* prog) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcDestroyProgram
-    cuPythonInit()
-    if __nvrtcDestroyProgram == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcDestroyProgram" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcDestroyProgram)(prog)
-    return err
-{{endif}}
-
-{{if 'nvrtcCompileProgram' in found_functions}}
-
-cdef nvrtcResult _nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcCompileProgram
-    cuPythonInit()
-    if __nvrtcCompileProgram == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcCompileProgram" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, int, const char**) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcCompileProgram)(prog, numOptions, options)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetPTXSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetPTXSize
-    cuPythonInit()
-    if __nvrtcGetPTXSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetPTXSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetPTXSize)(prog, ptxSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetPTX' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetPTX(nvrtcProgram prog, char* ptx) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetPTX
-    cuPythonInit()
-    if __nvrtcGetPTX == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetPTX" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetPTX)(prog, ptx)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetCUBINSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetCUBINSize
-    cuPythonInit()
-    if __nvrtcGetCUBINSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetCUBINSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetCUBINSize)(prog, cubinSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetCUBIN' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetCUBIN(nvrtcProgram prog, char* cubin) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetCUBIN
-    cuPythonInit()
-    if __nvrtcGetCUBIN == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetCUBIN" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetCUBIN)(prog, cubin)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetNVVMSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNVVMSize(nvrtcProgram prog, size_t* nvvmSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetNVVMSize
-    cuPythonInit()
-    if __nvrtcGetNVVMSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetNVVMSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetNVVMSize)(prog, nvvmSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetNVVM' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetNVVM(nvrtcProgram prog, char* nvvm) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetNVVM
-    cuPythonInit()
-    if __nvrtcGetNVVM == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetNVVM" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetNVVM)(prog, nvvm)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetLTOIRSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLTOIRSize(nvrtcProgram prog, size_t* LTOIRSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetLTOIRSize
-    cuPythonInit()
-    if __nvrtcGetLTOIRSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetLTOIRSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetLTOIRSize)(prog, LTOIRSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetLTOIR' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLTOIR(nvrtcProgram prog, char* LTOIR) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetLTOIR
-    cuPythonInit()
-    if __nvrtcGetLTOIR == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetLTOIR" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetLTOIR)(prog, LTOIR)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t* optixirSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetOptiXIRSize
-    cuPythonInit()
-    if __nvrtcGetOptiXIRSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetOptiXIRSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetOptiXIRSize)(prog, optixirSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetOptiXIR' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetOptiXIR(nvrtcProgram prog, char* optixir) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetOptiXIR
-    cuPythonInit()
-    if __nvrtcGetOptiXIR == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetOptiXIR" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetOptiXIR)(prog, optixir)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetProgramLogSize' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetProgramLogSize
-    cuPythonInit()
-    if __nvrtcGetProgramLogSize == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetProgramLogSize" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, size_t*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetProgramLogSize)(prog, logSizeRet)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetProgramLog' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetProgramLog(nvrtcProgram prog, char* log) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetProgramLog
-    cuPythonInit()
-    if __nvrtcGetProgramLog == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetProgramLog" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetProgramLog)(prog, log)
-    return err
-{{endif}}
-
-{{if 'nvrtcAddNameExpression' in found_functions}}
-
-cdef nvrtcResult _nvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcAddNameExpression
-    cuPythonInit()
-    if __nvrtcAddNameExpression == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcAddNameExpression" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, const char*) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcAddNameExpression)(prog, name_expression)
-    return err
-{{endif}}
-
-{{if 'nvrtcGetLoweredName' in found_functions}}
-
-cdef nvrtcResult _nvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    global __nvrtcGetLoweredName
-    cuPythonInit()
-    if __nvrtcGetLoweredName == NULL:
-        with gil:
-            raise RuntimeError('Function "nvrtcGetLoweredName" not found')
-    err = (<nvrtcResult (*)(nvrtcProgram, const char*, const char**) except ?NVRTC_ERROR_INVALID_INPUT nogil> __nvrtcGetLoweredName)(prog, name_expression, lowered_name)
-    return err
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_bindings/loader.cpp b/cuda_bindings/cuda/bindings/_bindings/loader.cpp
deleted file mode 100644
index b2477fef..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/loader.cpp
+++ /dev/null
@@ -1,354 +0,0 @@
-// Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-//
-// Please refer to the NVIDIA end user license agreement (EULA) associated
-// with this source code for terms and conditions that govern your use of
-// this software. Any use, reproduction, disclosure, or distribution of
-// this software and related documentation outside the terms of the EULA
-// is strictly prohibited.
-#include <stdio.h>
-#include <stdlib.h>
-#include <cstring>
-#include "loader.h"
-
-#define DXCORE_MAX_PATH 260
-
-#if defined(_WIN32)
-#include "windows.h"
-#define _getAddr GetProcAddress
-#define _Handle HMODULE
-static const size_t sysrootName64_length = (sizeof("System32") - 1);
-static const char* sysrootName64 = "System32";
-static const size_t libcudaName64_length = (sizeof("\\nvcuda64.dll") - 1);
-static const char* libcudaName64 = "\\nvcuda64.dll";
-static const size_t sysrootNameX86_length = (sizeof("SysWOW64") - 1);
-static const char* sysrootNameX86 = "SysWOW64";
-static const size_t libcudaNameX86_length = (sizeof("\\nvcuda32.dll") - 1);
-static const char* libcudaNameX86 = "\\nvcuda32.dll";
-static size_t sysrootName_length = NULL;
-static const char* sysrootName = NULL;
-
-#else
-#include <dlfcn.h>
-#include <unistd.h>
-#define _getAddr dlsym
-#define _Handle void*
-static const size_t libcudaNameLinux_length = (sizeof("/libcuda.so.1.1") - 1);
-static const char* libcudaNameLinux = "/libcuda.so.1.1";
-#endif
-static size_t libcudaName_length = 0;
-static const char* libcudaName = NULL;
-
-struct dxcore_enumAdapters2;
-struct dxcore_queryAdapterInfo;
-
-typedef int (*pfnDxcoreEnumAdapters2)(const dxcore_enumAdapters2 *pParams);
-typedef int (*pfnDxcoreQueryAdapterInfo)(const dxcore_queryAdapterInfo *pParams);
-
-struct dxcore_lib {
-    _Handle hDxcoreLib;
-    pfnDxcoreEnumAdapters2 pDxcoreEnumAdapters2;
-    pfnDxcoreQueryAdapterInfo pDxcoreQueryAdapterInfo;
-};
-
-struct dxcore_luid
-{
-    unsigned int lowPart;
-    int highPart;
-};
-
-struct dxcore_adapterInfo
-{
-    unsigned int              hAdapter;
-    struct dxcore_luid        AdapterLuid;
-    unsigned int              NumOfSources;
-    unsigned int              bPresentMoveRegionsPreferred;
-};
-
-struct dxcore_enumAdapters2
-{
-    unsigned int                   NumAdapters;
-    struct dxcore_adapterInfo     *pAdapters;
-};
-
-enum dxcore_kmtqueryAdapterInfoType
-{
-    DXCORE_QUERYDRIVERVERSION = 13,
-    DXCORE_QUERYREGISTRY = 48,
-};
-
-enum dxcore_queryregistry_type {
-    DXCORE_QUERYREGISTRY_DRIVERSTOREPATH = 2,
-};
-
-enum dxcore_queryregistry_status {
-    DXCORE_QUERYREGISTRY_STATUS_SUCCESS = 0,
-    DXCORE_QUERYREGISTRY_STATUS_BUFFER_OVERFLOW = 1,
-    DXCORE_QUERYREGISTRY_STATUS_FAIL = 2,
-};
-
-struct dxcore_queryregistry_info {
-    enum dxcore_queryregistry_type        QueryType;
-    unsigned int                          QueryFlags;
-    wchar_t                               ValueName[DXCORE_MAX_PATH];
-    unsigned int                          ValueType;
-    unsigned int                          PhysicalAdapterIndex;
-    unsigned int                          OutputValueSize;
-    enum dxcore_queryregistry_status      Status;
-    union {
-        unsigned long long                    OutputQword;
-        wchar_t                               Output;
-    };
-};
-
-struct dxcore_queryAdapterInfo
-{
-    unsigned int                           hAdapter;
-    enum dxcore_kmtqueryAdapterInfoType    Type;
-    void                                   *pPrivateDriverData;
-    unsigned int                           PrivateDriverDataSize;
-};
-
-static int dxcore_query_adapter_info_helper(struct dxcore_lib* pLib,
-                                            unsigned int hAdapter,
-                                            enum dxcore_kmtqueryAdapterInfoType type,
-                                            void* pPrivateDriverDate,
-                                            unsigned int privateDriverDataSize)
-{
-    struct dxcore_queryAdapterInfo queryAdapterInfo = {};
-
-    queryAdapterInfo.hAdapter = hAdapter;
-    queryAdapterInfo.Type = type;
-    queryAdapterInfo.pPrivateDriverData = pPrivateDriverDate;
-    queryAdapterInfo.PrivateDriverDataSize = privateDriverDataSize;
-
-    return pLib->pDxcoreQueryAdapterInfo(&queryAdapterInfo);
-}
-
-static int dxcore_query_adapter_wddm_version(struct dxcore_lib* pLib, unsigned int hAdapter, unsigned int* version)
-{
-        return dxcore_query_adapter_info_helper(pLib,
-                                                hAdapter,
-                                                DXCORE_QUERYDRIVERVERSION,
-                                                (void*)version,
-                                                (unsigned int)sizeof(*version));
-}
-
-static int dxcore_query_adapter_driverstore_path(struct dxcore_lib* pLib, unsigned int hAdapter, char** ppDriverStorePath)
-{
-    struct dxcore_queryregistry_info params = {};
-    struct dxcore_queryregistry_info* pValue = NULL;
-    wchar_t* pOutput;
-    size_t outputSizeInBytes;
-    size_t outputSize;
-
-    // 1. Fetch output size
-    params.QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
-
-    if (dxcore_query_adapter_info_helper(pLib,
-                                         hAdapter,
-                                         DXCORE_QUERYREGISTRY,
-                                         (void*)&params,
-                                         (unsigned int)sizeof(struct dxcore_queryregistry_info)))
-    {
-        return (-1);
-    }
-
-    if (params.OutputValueSize > DXCORE_MAX_PATH * sizeof(wchar_t)) {
-        return (-1);
-    }
-
-    outputSizeInBytes = (size_t)params.OutputValueSize;
-    outputSize = outputSizeInBytes / sizeof(wchar_t);
-
-    // 2. Retrieve output
-    pValue = (struct dxcore_queryregistry_info*)calloc(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes + sizeof(wchar_t), 1);
-    if (!pValue) {
-        return (-1);
-    }
-
-    pValue->QueryType = DXCORE_QUERYREGISTRY_DRIVERSTOREPATH;
-    pValue->OutputValueSize = (unsigned int)outputSizeInBytes;
-
-    if (dxcore_query_adapter_info_helper(pLib,
-                                         hAdapter,
-                                         DXCORE_QUERYREGISTRY,
-                                         (void*)pValue,
-                                         (unsigned int)(sizeof(struct dxcore_queryregistry_info) + outputSizeInBytes)))
-    {
-        free(pValue);
-        return (-1);
-    }
-    pOutput = (wchar_t*)(&pValue->Output);
-
-    // Make sure no matter what happened the wchar_t string is null terminated
-    pOutput[outputSize] = L'\0';
-
-    // Convert the output into a regular c string
-    *ppDriverStorePath = (char*)calloc(outputSize + 1, sizeof(char));
-    if (!*ppDriverStorePath) {
-        free(pValue);
-        return (-1);
-    }
-    wcstombs(*ppDriverStorePath, pOutput, outputSize);
-
-    free(pValue);
-
-    return 0;
-}
-
-static char* replaceSystemPath(char* path)
-{
-    char *replacedPath = (char*)calloc(DXCORE_MAX_PATH + 1, sizeof(char));
-
-#if defined(_WIN32)
-    wchar_t *systemPath = (wchar_t*)calloc(DXCORE_MAX_PATH + 1, sizeof(wchar_t));
-    // Get system root path
-    if (GetSystemDirectoryW(systemPath, DXCORE_MAX_PATH) == 0) {
-        free(replacedPath);
-        free(systemPath);
-        return NULL;
-    }
-    wcstombs(replacedPath, systemPath, DXCORE_MAX_PATH);
-    free(systemPath);
-
-    // Replace the /SystemRoot/ part of the registry-obtained path with
-    // the actual system root path from above
-    char* sysrootPath = strstr(path, sysrootName);
-    strncat(replacedPath, sysrootPath + sysrootName_length, DXCORE_MAX_PATH - strlen(replacedPath));
-#else
-    strncat(replacedPath, path, DXCORE_MAX_PATH);
-#endif
-
-    // Append nvcuda dll
-    if (libcudaName_length < DXCORE_MAX_PATH - strlen(replacedPath)) {
-        strncat(replacedPath, libcudaName, libcudaName_length);
-    }
-    else {
-        strncat(replacedPath, libcudaName, DXCORE_MAX_PATH - strlen(replacedPath));
-    }
-
-    return replacedPath;
-}
-
-static int dxcore_check_adapter(struct dxcore_lib *pLib, char *libPath, struct dxcore_adapterInfo *pAdapterInfo)
-{
-    unsigned int wddmVersion = 0;
-    char* driverStorePath = NULL;
-
-    if (dxcore_query_adapter_wddm_version(pLib, pAdapterInfo->hAdapter, &wddmVersion)) {
-        return 1;
-    }
-
-    if (wddmVersion < 2500) {
-        return 1;
-    }
-
-    if (dxcore_query_adapter_driverstore_path(pLib, pAdapterInfo->hAdapter, &driverStorePath)) {
-        return 1;
-    }
-
-    // Replace with valid path
-    char* replacedPath = replaceSystemPath(driverStorePath);
-    if (!replacedPath) {
-        free(driverStorePath);
-        free(replacedPath);
-        return 1;
-    }
-
-    // Does file exist?
-#if defined(_WIN32)
-    if (GetFileAttributes(replacedPath) == INVALID_FILE_ATTRIBUTES) {
-        free(driverStorePath);
-        free(replacedPath);
-        return 1;
-    }
-#else
-    if (access(replacedPath, F_OK) < 0) {
-        free(driverStorePath);
-        free(replacedPath);
-        return 1;
-    }
-#endif
-
-    memcpy(libPath, replacedPath, DXCORE_MAX_PATH);
-    free(driverStorePath);
-    free(replacedPath);
-
-    return 0;
-}
-
-static int dxcore_enum_adapters(struct dxcore_lib *pLib, char *libPath)
-{
-    struct dxcore_enumAdapters2 params = {0};
-    unsigned int adapterIndex = 0;
-
-    if (pLib->pDxcoreEnumAdapters2(&params)) {
-        return 1;
-    }
-    params.pAdapters = (dxcore_adapterInfo*)calloc(params.NumAdapters, sizeof(struct dxcore_adapterInfo));
-    if (pLib->pDxcoreEnumAdapters2(&params)) {
-        free(params.pAdapters);
-        return 1;
-    }
-
-    for (adapterIndex = 0; adapterIndex < params.NumAdapters; adapterIndex++) {
-        if (!dxcore_check_adapter(pLib, libPath, &params.pAdapters[adapterIndex])) {
-            free(params.pAdapters);
-            return 0;
-        }
-    }
-
-    free(params.pAdapters);
-    return 1;
-}
-
-int getCUDALibraryPath(char *libPath, bool isBit64)
-{
-    struct dxcore_lib lib = {0};
-
-    if (!libPath) {
-        return 1;
-    }
-
-    // Configure paths based on app's bit configuration
-#if defined(_WIN32)
-    if (isBit64) {
-        sysrootName_length = sysrootName64_length;
-        sysrootName = sysrootName64;
-        libcudaName_length = libcudaName64_length;
-        libcudaName = libcudaName64;
-    }
-    else {
-        sysrootName_length = sysrootNameX86_length;
-        sysrootName = sysrootNameX86;
-        libcudaName_length = libcudaNameX86_length;
-        libcudaName = libcudaNameX86;
-    }
-#else
-    libcudaName_length = libcudaNameLinux_length;
-    libcudaName = libcudaNameLinux;
-#endif
-
-#if defined(_WIN32)
-    lib.hDxcoreLib = LoadLibraryExW(L"gdi32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
-#else
-    lib.hDxcoreLib = dlopen("libdxcore.so", RTLD_LAZY);
-#endif
-    if (!lib.hDxcoreLib) {
-        return 1;
-    }
-
-    lib.pDxcoreEnumAdapters2 = (pfnDxcoreEnumAdapters2)_getAddr(lib.hDxcoreLib, "D3DKMTEnumAdapters2");
-    if (!lib.pDxcoreEnumAdapters2) {
-        return 1;
-    }
-    lib.pDxcoreQueryAdapterInfo = (pfnDxcoreQueryAdapterInfo)_getAddr(lib.hDxcoreLib, "D3DKMTQueryAdapterInfo");
-    if (!lib.pDxcoreQueryAdapterInfo) {
-        return 1;
-    }
-
-    if (dxcore_enum_adapters(&lib, libPath)) {
-        return 1;
-    }
-    return 0;
-}
diff --git a/cuda_bindings/cuda/bindings/_bindings/loader.h b/cuda_bindings/cuda/bindings/_bindings/loader.h
deleted file mode 100644
index 2963d9e0..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/loader.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-//
-// Please refer to the NVIDIA end user license agreement (EULA) associated
-// with this source code for terms and conditions that govern your use of
-// this software. Any use, reproduction, disclosure, or distribution of
-// this software and related documentation outside the terms of the EULA
-// is strictly prohibited.
-int getCUDALibraryPath(char *libPath, bool isBit64);
diff --git a/cuda_bindings/cuda/bindings/_bindings/loader.pxd b/cuda_bindings/cuda/bindings/_bindings/loader.pxd
deleted file mode 100644
index 17151580..00000000
--- a/cuda_bindings/cuda/bindings/_bindings/loader.pxd
+++ /dev/null
@@ -1,10 +0,0 @@
-
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cdef extern from "loader.h":
-    int getCUDALibraryPath(char *libPath, bint isBit64)
diff --git a/cuda_bindings/cuda/bindings/_lib/__init__.py b/cuda_bindings/cuda/bindings/_lib/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/__init__.py b/cuda_bindings/cuda/bindings/_lib/cyruntime/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in
deleted file mode 100644
index bb42fc31..00000000
--- a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in
+++ /dev/null
@@ -1,311 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings.cydriver as cydriver
-from cuda.bindings.cyruntime cimport *
-from libc.stdlib cimport malloc, free, calloc
-from libc.string cimport memset, memcpy, strncmp
-from libcpp cimport bool
-
-{{if 'cudaMemcpy' in found_functions}}cdef cudaError_t _cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamCreate' in found_functions}}cdef cudaError_t _cudaStreamCreate(cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventCreate' in found_functions}}cdef cudaError_t _cudaEventCreate(cudaEvent_t* event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventQuery' in found_functions}}cdef cudaError_t _cudaEventQuery(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaCreateChannelDesc' in found_functions}}cdef cudaChannelFormatDesc _cudaCreateChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) noexcept nogil{{endif}}
-{{if 'cudaDriverGetVersion' in found_functions}}cdef cudaError_t _cudaDriverGetVersion(int* driverVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaRuntimeGetVersion' in found_functions}}cdef cudaError_t _cudaRuntimeGetVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetTexture1DLinearMaxWidth' in found_functions}}cdef cudaError_t _cudaDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, const cudaChannelFormatDesc* fmtDesc, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocHost' in found_functions}}cdef cudaError_t _cudaMallocHost(void** ptr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocPitch' in found_functions}}cdef cudaError_t _cudaMallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocMipmappedArray' in found_functions}}cdef cudaError_t _cudaMallocMipmappedArray(cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2D' in found_functions}}cdef cudaError_t _cudaMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DAsync' in found_functions}}cdef cudaError_t _cudaMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyAsync' in found_functions}}cdef cudaError_t _cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddMemcpyNode' in found_functions}}cdef cudaError_t _cudaGraphAddMemcpyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddMemcpyNode1D' in found_functions}}cdef cudaError_t _cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemcpyNodeSetParams1D' in found_functions}}cdef cudaError_t _cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecMemcpyNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecMemcpyNodeSetParams1D' in found_functions}}cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDriverEntryPoint' in found_functions}}cdef cudaError_t _cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddMemsetNode' in found_functions}}cdef cudaError_t _cudaGraphAddMemsetNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecMemsetNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemcpyNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemcpyNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFuncGetAttributes' in found_functions}}cdef cudaError_t _cudaFuncGetAttributes(cudaFuncAttributes* attr, const void* func) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocArray' in found_functions}}cdef cudaError_t _cudaMallocArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMalloc3D' in found_functions}}cdef cudaError_t _cudaMalloc3D(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMalloc3DArray' in found_functions}}cdef cudaError_t _cudaMalloc3DArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetErrorString' in found_functions}}cdef const char* _cudaGetErrorString(cudaError_t error) except ?NULL nogil{{endif}}
-{{if 'cudaStreamAddCallback' in found_functions}}cdef cudaError_t _cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void* userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetCaptureInfo_v2' in found_functions}}cdef cudaError_t _cudaStreamGetCaptureInfo_v2(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaImportExternalSemaphore' in found_functions}}cdef cudaError_t _cudaImportExternalSemaphore(cudaExternalSemaphore_t* extSem_out, const cudaExternalSemaphoreHandleDesc* semHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaSignalExternalSemaphoresAsync_v2' in found_functions}}cdef cudaError_t _cudaSignalExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreSignalParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaWaitExternalSemaphoresAsync_v2' in found_functions}}cdef cudaError_t _cudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreWaitParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaArrayGetInfo' in found_functions}}cdef cudaError_t _cudaArrayGetInfo(cudaChannelFormatDesc* desc, cudaExtent* extent, unsigned int* flags, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DToArray' in found_functions}}cdef cudaError_t _cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DFromArray' in found_functions}}cdef cudaError_t _cudaMemcpy2DFromArray(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DArrayToArray' in found_functions}}cdef cudaError_t _cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DToArrayAsync' in found_functions}}cdef cudaError_t _cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy2DFromArrayAsync' in found_functions}}cdef cudaError_t _cudaMemcpy2DFromArrayAsync(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemset3D' in found_functions}}cdef cudaError_t _cudaMemset3D(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemset3DAsync' in found_functions}}cdef cudaError_t _cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyToArray' in found_functions}}cdef cudaError_t _cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyFromArray' in found_functions}}cdef cudaError_t _cudaMemcpyFromArray(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyToArrayAsync' in found_functions}}cdef cudaError_t _cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyFromArrayAsync' in found_functions}}cdef cudaError_t _cudaMemcpyFromArrayAsync(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaPointerGetAttributes' in found_functions}}cdef cudaError_t _cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDeviceFlags' in found_functions}}cdef cudaError_t _cudaGetDeviceFlags(unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy3D' in found_functions}}cdef cudaError_t _cudaMemcpy3D(const cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy3DAsync' in found_functions}}cdef cudaError_t _cudaMemcpy3DAsync(const cudaMemcpy3DParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolSetAccess' in found_functions}}cdef cudaError_t _cudaMemPoolSetAccess(cudaMemPool_t memPool, const cudaMemAccessDesc* descList, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceReset' in found_functions}}cdef cudaError_t _cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetLastError' in found_functions}}cdef cudaError_t _cudaGetLastError() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaPeekAtLastError' in found_functions}}cdef cudaError_t _cudaPeekAtLastError() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDevice' in found_functions}}cdef cudaError_t _cudaGetDevice(int* device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaSetDevice' in found_functions}}cdef cudaError_t _cudaSetDevice(int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDeviceProperties_v2' in found_functions}}cdef cudaError_t _cudaGetDeviceProperties_v2(cudaDeviceProp* prop, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaChooseDevice' in found_functions}}cdef cudaError_t _cudaChooseDevice(int* device, const cudaDeviceProp* prop) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyArrayToArray' in found_functions}}cdef cudaError_t _cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetChannelDesc' in found_functions}}cdef cudaError_t _cudaGetChannelDesc(cudaChannelFormatDesc* desc, cudaArray_const_t array) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaCreateTextureObject' in found_functions}}cdef cudaError_t _cudaCreateTextureObject(cudaTextureObject_t* pTexObject, const cudaResourceDesc* pResDesc, const cudaTextureDesc* pTexDesc, const cudaResourceViewDesc* pResViewDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetTextureObjectTextureDesc' in found_functions}}cdef cudaError_t _cudaGetTextureObjectTextureDesc(cudaTextureDesc* pTexDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetTextureObjectResourceViewDesc' in found_functions}}cdef cudaError_t _cudaGetTextureObjectResourceViewDesc(cudaResourceViewDesc* pResViewDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetExportTable' in found_functions}}cdef cudaError_t _cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy3DPeer' in found_functions}}cdef cudaError_t _cudaMemcpy3DPeer(const cudaMemcpy3DPeerParms* p) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpy3DPeerAsync' in found_functions}}cdef cudaError_t _cudaMemcpy3DPeerAsync(const cudaMemcpy3DPeerParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'make_cudaPitchedPtr' in found_functions}}cdef cudaPitchedPtr _make_cudaPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) noexcept nogil{{endif}}
-{{if 'make_cudaPos' in found_functions}}cdef cudaPos _make_cudaPos(size_t x, size_t y, size_t z) noexcept nogil{{endif}}
-{{if 'make_cudaExtent' in found_functions}}cdef cudaExtent _make_cudaExtent(size_t w, size_t h, size_t d) noexcept nogil{{endif}}
-{{if 'cudaSetDeviceFlags' in found_functions}}cdef cudaError_t _cudaSetDeviceFlags(unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddMemAllocNode' in found_functions}}cdef cudaError_t _cudaGraphAddMemAllocNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaMemAllocNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemAllocNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, cudaMemAllocNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemFreeNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void* dptr_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemAdvise' in found_functions}}cdef cudaError_t _cudaMemAdvise(const void* devPtr, size_t count, cudaMemoryAdvise advice, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemRangeGetAttribute' in found_functions}}cdef cudaError_t _cudaMemRangeGetAttribute(void* data, size_t dataSize, cudaMemRangeAttribute attribute, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemRangeGetAttributes' in found_functions}}cdef cudaError_t _cudaMemRangeGetAttributes(void** data, size_t* dataSizes, cudaMemRangeAttribute* attributes, size_t numAttributes, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDeviceCount' in found_functions}}cdef cudaError_t _cudaGetDeviceCount(int* count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetAttribute' in found_functions}}cdef cudaError_t _cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSetSharedMemConfig' in found_functions}}cdef cudaError_t _cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetByPCIBusId' in found_functions}}cdef cudaError_t _cudaDeviceGetByPCIBusId(int* device, const char* pciBusId) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetPCIBusId' in found_functions}}cdef cudaError_t _cudaDeviceGetPCIBusId(char* pciBusId, int length, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetP2PAttribute' in found_functions}}cdef cudaError_t _cudaDeviceGetP2PAttribute(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaArrayGetSparseProperties' in found_functions}}cdef cudaError_t _cudaArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMipmappedArrayGetSparseProperties' in found_functions}}cdef cudaError_t _cudaMipmappedArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaMipmappedArray_t mipmap) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceCanAccessPeer' in found_functions}}cdef cudaError_t _cudaDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyPeer' in found_functions}}cdef cudaError_t _cudaMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemcpyPeerAsync' in found_functions}}cdef cudaError_t _cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceEnablePeerAccess' in found_functions}}cdef cudaError_t _cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceDisablePeerAccess' in found_functions}}cdef cudaError_t _cudaDeviceDisablePeerAccess(int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaExternalMemoryGetMappedMipmappedArray' in found_functions}}cdef cudaError_t _cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t* mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc* mipmapDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetSurfaceObjectResourceDesc' in found_functions}}cdef cudaError_t _cudaGetSurfaceObjectResourceDesc(cudaResourceDesc* pResDesc, cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphKernelNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphKernelNodeGetParams(cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaExternalMemoryGetMappedBuffer' in found_functions}}cdef cudaError_t _cudaExternalMemoryGetMappedBuffer(void** devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc* bufferDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaImportExternalMemory' in found_functions}}cdef cudaError_t _cudaImportExternalMemory(cudaExternalMemory_t* extMem_out, const cudaExternalMemoryHandleDesc* memHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaCreateSurfaceObject' in found_functions}}cdef cudaError_t _cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsEGLRegisterImage' in found_functions}}cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEGLStreamProducerPresentFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEGLStreamProducerReturnFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsResourceGetMappedEglFrame' in found_functions}}cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetAttribute' in found_functions}}cdef cudaError_t _cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, cudaStreamAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamSetAttribute' in found_functions}}cdef cudaError_t _cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, const cudaStreamAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphKernelNodeGetAttribute' in found_functions}}cdef cudaError_t _cudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphKernelNodeSetAttribute' in found_functions}}cdef cudaError_t _cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaVDPAUGetDevice(int* device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGraphicsVDPAURegisterVideoSurface(cudaGraphicsResource** resource, VdpVideoSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** resource, VdpOutputSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGLGetDevices(unsigned int* pCudaDeviceCount, int* pCudaDevices, unsigned int cudaDeviceCount, cudaGLDeviceList deviceList) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGraphicsGLRegisterImage(cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGraphicsGLRegisterBuffer(cudaGraphicsResource** resource, GLuint buffer, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSynchronize' in found_functions}}cdef cudaError_t _cudaDeviceSynchronize() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSetLimit' in found_functions}}cdef cudaError_t _cudaDeviceSetLimit(cudaLimit limit, size_t value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetLimit' in found_functions}}cdef cudaError_t _cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetCacheConfig' in found_functions}}cdef cudaError_t _cudaDeviceGetCacheConfig(cudaFuncCache* pCacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetStreamPriorityRange' in found_functions}}cdef cudaError_t _cudaDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSetCacheConfig' in found_functions}}cdef cudaError_t _cudaDeviceSetCacheConfig(cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetSharedMemConfig' in found_functions}}cdef cudaError_t _cudaDeviceGetSharedMemConfig(cudaSharedMemConfig* pConfig) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaIpcGetEventHandle' in found_functions}}cdef cudaError_t _cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaIpcOpenEventHandle' in found_functions}}cdef cudaError_t _cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaIpcGetMemHandle' in found_functions}}cdef cudaError_t _cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaIpcOpenMemHandle' in found_functions}}cdef cudaError_t _cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaIpcCloseMemHandle' in found_functions}}cdef cudaError_t _cudaIpcCloseMemHandle(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceFlushGPUDirectRDMAWrites' in found_functions}}cdef cudaError_t _cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTarget target, cudaFlushGPUDirectRDMAWritesScope scope) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetDefaultMemPool' in found_functions}}cdef cudaError_t _cudaDeviceGetDefaultMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSetMemPool' in found_functions}}cdef cudaError_t _cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetMemPool' in found_functions}}cdef cudaError_t _cudaDeviceGetMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetNvSciSyncAttributes' in found_functions}}cdef cudaError_t _cudaDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, int device, int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamCreateWithFlags' in found_functions}}cdef cudaError_t _cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamCreateWithPriority' in found_functions}}cdef cudaError_t _cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetPriority' in found_functions}}cdef cudaError_t _cudaStreamGetPriority(cudaStream_t hStream, int* priority) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetFlags' in found_functions}}cdef cudaError_t _cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaCtxResetPersistingL2Cache' in found_functions}}cdef cudaError_t _cudaCtxResetPersistingL2Cache() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamCopyAttributes' in found_functions}}cdef cudaError_t _cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamDestroy' in found_functions}}cdef cudaError_t _cudaStreamDestroy(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamWaitEvent' in found_functions}}cdef cudaError_t _cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamSynchronize' in found_functions}}cdef cudaError_t _cudaStreamSynchronize(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamQuery' in found_functions}}cdef cudaError_t _cudaStreamQuery(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamAttachMemAsync' in found_functions}}cdef cudaError_t _cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamBeginCapture' in found_functions}}cdef cudaError_t _cudaStreamBeginCapture(cudaStream_t stream, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamBeginCaptureToGraph' in found_functions}}cdef cudaError_t _cudaStreamBeginCaptureToGraph(cudaStream_t stream, cudaGraph_t graph, const cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaThreadExchangeStreamCaptureMode' in found_functions}}cdef cudaError_t _cudaThreadExchangeStreamCaptureMode(cudaStreamCaptureMode* mode) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamEndCapture' in found_functions}}cdef cudaError_t _cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamIsCapturing' in found_functions}}cdef cudaError_t _cudaStreamIsCapturing(cudaStream_t stream, cudaStreamCaptureStatus* pCaptureStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamUpdateCaptureDependencies' in found_functions}}cdef cudaError_t _cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t* dependencies, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventCreateWithFlags' in found_functions}}cdef cudaError_t _cudaEventCreateWithFlags(cudaEvent_t* event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventRecord' in found_functions}}cdef cudaError_t _cudaEventRecord(cudaEvent_t event, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventRecordWithFlags' in found_functions}}cdef cudaError_t _cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventSynchronize' in found_functions}}cdef cudaError_t _cudaEventSynchronize(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventDestroy' in found_functions}}cdef cudaError_t _cudaEventDestroy(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaEventElapsedTime' in found_functions}}cdef cudaError_t _cudaEventElapsedTime(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDestroyExternalMemory' in found_functions}}cdef cudaError_t _cudaDestroyExternalMemory(cudaExternalMemory_t extMem) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDestroyExternalSemaphore' in found_functions}}cdef cudaError_t _cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFuncSetCacheConfig' in found_functions}}cdef cudaError_t _cudaFuncSetCacheConfig(const void* func, cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFuncSetSharedMemConfig' in found_functions}}cdef cudaError_t _cudaFuncSetSharedMemConfig(const void* func, cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFuncSetAttribute' in found_functions}}cdef cudaError_t _cudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaLaunchHostFunc' in found_functions}}cdef cudaError_t _cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaOccupancyAvailableDynamicSMemPerBlock' in found_functions}}cdef cudaError_t _cudaOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* func, int numBlocks, int blockSize) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocManaged' in found_functions}}cdef cudaError_t _cudaMallocManaged(void** devPtr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMalloc' in found_functions}}cdef cudaError_t _cudaMalloc(void** devPtr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFree' in found_functions}}cdef cudaError_t _cudaFree(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFreeHost' in found_functions}}cdef cudaError_t _cudaFreeHost(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFreeArray' in found_functions}}cdef cudaError_t _cudaFreeArray(cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFreeMipmappedArray' in found_functions}}cdef cudaError_t _cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaHostAlloc' in found_functions}}cdef cudaError_t _cudaHostAlloc(void** pHost, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaHostRegister' in found_functions}}cdef cudaError_t _cudaHostRegister(void* ptr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaHostUnregister' in found_functions}}cdef cudaError_t _cudaHostUnregister(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaHostGetDevicePointer' in found_functions}}cdef cudaError_t _cudaHostGetDevicePointer(void** pDevice, void* pHost, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaHostGetFlags' in found_functions}}cdef cudaError_t _cudaHostGetFlags(unsigned int* pFlags, void* pHost) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetMipmappedArrayLevel' in found_functions}}cdef cudaError_t _cudaGetMipmappedArrayLevel(cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemGetInfo' in found_functions}}cdef cudaError_t _cudaMemGetInfo(size_t* free, size_t* total) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaArrayGetPlane' in found_functions}}cdef cudaError_t _cudaArrayGetPlane(cudaArray_t* pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemset' in found_functions}}cdef cudaError_t _cudaMemset(void* devPtr, int value, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemset2D' in found_functions}}cdef cudaError_t _cudaMemset2D(void* devPtr, size_t pitch, int value, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemsetAsync' in found_functions}}cdef cudaError_t _cudaMemsetAsync(void* devPtr, int value, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemset2DAsync' in found_functions}}cdef cudaError_t _cudaMemset2DAsync(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPrefetchAsync' in found_functions}}cdef cudaError_t _cudaMemPrefetchAsync(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocAsync' in found_functions}}cdef cudaError_t _cudaMallocAsync(void** devPtr, size_t size, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaFreeAsync' in found_functions}}cdef cudaError_t _cudaFreeAsync(void* devPtr, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolTrimTo' in found_functions}}cdef cudaError_t _cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolSetAttribute' in found_functions}}cdef cudaError_t _cudaMemPoolSetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolGetAttribute' in found_functions}}cdef cudaError_t _cudaMemPoolGetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolGetAccess' in found_functions}}cdef cudaError_t _cudaMemPoolGetAccess(cudaMemAccessFlags* flags, cudaMemPool_t memPool, cudaMemLocation* location) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolCreate' in found_functions}}cdef cudaError_t _cudaMemPoolCreate(cudaMemPool_t* memPool, const cudaMemPoolProps* poolProps) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolDestroy' in found_functions}}cdef cudaError_t _cudaMemPoolDestroy(cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMallocFromPoolAsync' in found_functions}}cdef cudaError_t _cudaMallocFromPoolAsync(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolExportToShareableHandle' in found_functions}}cdef cudaError_t _cudaMemPoolExportToShareableHandle(void* shareableHandle, cudaMemPool_t memPool, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolImportFromShareableHandle' in found_functions}}cdef cudaError_t _cudaMemPoolImportFromShareableHandle(cudaMemPool_t* memPool, void* shareableHandle, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolExportPointer' in found_functions}}cdef cudaError_t _cudaMemPoolExportPointer(cudaMemPoolPtrExportData* exportData, void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPoolImportPointer' in found_functions}}cdef cudaError_t _cudaMemPoolImportPointer(void** ptr, cudaMemPool_t memPool, cudaMemPoolPtrExportData* exportData) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsUnregisterResource' in found_functions}}cdef cudaError_t _cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsResourceSetMapFlags' in found_functions}}cdef cudaError_t _cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsMapResources' in found_functions}}cdef cudaError_t _cudaGraphicsMapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsUnmapResources' in found_functions}}cdef cudaError_t _cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsResourceGetMappedPointer' in found_functions}}cdef cudaError_t _cudaGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsSubResourceGetMappedArray' in found_functions}}cdef cudaError_t _cudaGraphicsSubResourceGetMappedArray(cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphicsResourceGetMappedMipmappedArray' in found_functions}}cdef cudaError_t _cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t* mipmappedArray, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDestroyTextureObject' in found_functions}}cdef cudaError_t _cudaDestroyTextureObject(cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDestroySurfaceObject' in found_functions}}cdef cudaError_t _cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphCreate' in found_functions}}cdef cudaError_t _cudaGraphCreate(cudaGraph_t* pGraph, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddKernelNode' in found_functions}}cdef cudaError_t _cudaGraphAddKernelNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphKernelNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphKernelNodeCopyAttributes' in found_functions}}cdef cudaError_t _cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemsetNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphMemsetNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddHostNode' in found_functions}}cdef cudaError_t _cudaGraphAddHostNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphHostNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphHostNodeGetParams(cudaGraphNode_t node, cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphHostNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphHostNodeSetParams(cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddChildGraphNode' in found_functions}}cdef cudaError_t _cudaGraphAddChildGraphNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphChildGraphNodeGetGraph' in found_functions}}cdef cudaError_t _cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddEmptyNode' in found_functions}}cdef cudaError_t _cudaGraphAddEmptyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddEventRecordNode' in found_functions}}cdef cudaError_t _cudaGraphAddEventRecordNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphEventRecordNodeGetEvent' in found_functions}}cdef cudaError_t _cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphEventRecordNodeSetEvent' in found_functions}}cdef cudaError_t _cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddEventWaitNode' in found_functions}}cdef cudaError_t _cudaGraphAddEventWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphEventWaitNodeGetEvent' in found_functions}}cdef cudaError_t _cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphEventWaitNodeSetEvent' in found_functions}}cdef cudaError_t _cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddExternalSemaphoresSignalNode' in found_functions}}cdef cudaError_t _cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreSignalNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddExternalSemaphoresWaitNode' in found_functions}}cdef cudaError_t _cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreWaitNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddMemFreeNode' in found_functions}}cdef cudaError_t _cudaGraphAddMemFreeNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dptr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGraphMemTrim' in found_functions}}cdef cudaError_t _cudaDeviceGraphMemTrim(int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceGetGraphMemAttribute' in found_functions}}cdef cudaError_t _cudaDeviceGetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceSetGraphMemAttribute' in found_functions}}cdef cudaError_t _cudaDeviceSetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphClone' in found_functions}}cdef cudaError_t _cudaGraphClone(cudaGraph_t* pGraphClone, cudaGraph_t originalGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeFindInClone' in found_functions}}cdef cudaError_t _cudaGraphNodeFindInClone(cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetType' in found_functions}}cdef cudaError_t _cudaGraphNodeGetType(cudaGraphNode_t node, cudaGraphNodeType* pType) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphGetNodes' in found_functions}}cdef cudaError_t _cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphGetRootNodes' in found_functions}}cdef cudaError_t _cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphGetEdges' in found_functions}}cdef cudaError_t _cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetDependencies' in found_functions}}cdef cudaError_t _cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetDependentNodes' in found_functions}}cdef cudaError_t _cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddDependencies' in found_functions}}cdef cudaError_t _cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphRemoveDependencies' in found_functions}}cdef cudaError_t _cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphDestroyNode' in found_functions}}cdef cudaError_t _cudaGraphDestroyNode(cudaGraphNode_t node) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphInstantiate' in found_functions}}cdef cudaError_t _cudaGraphInstantiate(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphInstantiateWithFlags' in found_functions}}cdef cudaError_t _cudaGraphInstantiateWithFlags(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecKernelNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecHostNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecChildGraphNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecEventRecordNodeSetEvent' in found_functions}}cdef cudaError_t _cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecEventWaitNodeSetEvent' in found_functions}}cdef cudaError_t _cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeSetEnabled' in found_functions}}cdef cudaError_t _cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetEnabled' in found_functions}}cdef cudaError_t _cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int* isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecUpdate' in found_functions}}cdef cudaError_t _cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo* resultInfo) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphUpload' in found_functions}}cdef cudaError_t _cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphLaunch' in found_functions}}cdef cudaError_t _cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecDestroy' in found_functions}}cdef cudaError_t _cudaGraphExecDestroy(cudaGraphExec_t graphExec) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphDestroy' in found_functions}}cdef cudaError_t _cudaGraphDestroy(cudaGraph_t graph) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphDebugDotPrint' in found_functions}}cdef cudaError_t _cudaGraphDebugDotPrint(cudaGraph_t graph, const char* path, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaUserObjectCreate' in found_functions}}cdef cudaError_t _cudaUserObjectCreate(cudaUserObject_t* object_out, void* ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaUserObjectRetain' in found_functions}}cdef cudaError_t _cudaUserObjectRetain(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaUserObjectRelease' in found_functions}}cdef cudaError_t _cudaUserObjectRelease(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphRetainUserObject' in found_functions}}cdef cudaError_t _cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphReleaseUserObject' in found_functions}}cdef cudaError_t _cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaProfilerStart() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaProfilerStop() except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamConsumerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t* pCudaResource, cudaStream_t* pStream, unsigned int timeout) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t pCudaResource, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamProducerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, EGLint width, EGLint height) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEGLStreamProducerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _cudaEventCreateFromEGLSync(cudaEvent_t* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaInitDevice' in found_functions}}cdef cudaError_t _cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetId' in found_functions}}cdef cudaError_t _cudaStreamGetId(cudaStream_t hStream, unsigned long long* streamId) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphInstantiateWithParams' in found_functions}}cdef cudaError_t _cudaGraphInstantiateWithParams(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams* instantiateParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecGetFlags' in found_functions}}cdef cudaError_t _cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long* flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetKernel' in found_functions}}cdef cudaError_t _cudaGetKernel(cudaKernel_t* kernelPtr, const void* entryFuncAddr) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddNode' in found_functions}}cdef cudaError_t _cudaGraphAddNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphNodeSetParams(cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphExecNodeSetParams' in found_functions}}cdef cudaError_t _cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemPrefetchAsync_v2' in found_functions}}cdef cudaError_t _cudaMemPrefetchAsync_v2(const void* devPtr, size_t count, cudaMemLocation location, unsigned int flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaMemAdvise_v2' in found_functions}}cdef cudaError_t _cudaMemAdvise_v2(const void* devPtr, size_t count, cudaMemoryAdvise advice, cudaMemLocation location) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphConditionalHandleCreate' in found_functions}}cdef cudaError_t _cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pHandle_out, cudaGraph_t graph, unsigned int defaultLaunchValue, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamGetCaptureInfo_v3' in found_functions}}cdef cudaError_t _cudaStreamGetCaptureInfo_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaStreamUpdateCaptureDependencies_v2' in found_functions}}cdef cudaError_t _cudaStreamUpdateCaptureDependencies_v2(cudaStream_t stream, cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphGetEdges_v2' in found_functions}}cdef cudaError_t _cudaGraphGetEdges_v2(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, cudaGraphEdgeData* edgeData, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetDependencies_v2' in found_functions}}cdef cudaError_t _cudaGraphNodeGetDependencies_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, cudaGraphEdgeData* edgeData, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphNodeGetDependentNodes_v2' in found_functions}}cdef cudaError_t _cudaGraphNodeGetDependentNodes_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, cudaGraphEdgeData* edgeData, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddDependencies_v2' in found_functions}}cdef cudaError_t _cudaGraphAddDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphRemoveDependencies_v2' in found_functions}}cdef cudaError_t _cudaGraphRemoveDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGraphAddNode_v2' in found_functions}}cdef cudaError_t _cudaGraphAddNode_v2(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if True}}cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
-{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in
deleted file mode 100644
index 7fb09820..00000000
--- a/cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in
+++ /dev/null
@@ -1,4930 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cuda.bindings.cyruntime cimport *
-from cuda.bindings._lib.cyruntime.utils cimport *
-from libc.stdlib cimport malloc, free, calloc
-from libc.string cimport memset, memcpy, strncmp
-from libcpp cimport bool
-cimport cuda.bindings._bindings.cydriver as cydriver
-
-cdef cudaPythonGlobal m_global = globalGetInstance()
-
-{{if 'cudaMemcpy' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyDispatch(dst, src, count, kind)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaStreamCreate' in found_functions}}
-
-cdef cudaError_t _cudaStreamCreate(cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamCreate(pStream, 0)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaEventCreate' in found_functions}}
-
-cdef cudaError_t _cudaEventCreate(cudaEvent_t* event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventCreate(event, cydriver.CUevent_flags_enum.CU_EVENT_DEFAULT)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaEventQuery' in found_functions}}
-
-cdef cudaError_t _cudaEventQuery(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuEventQuery(<cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaCreateChannelDesc' in found_functions}}
-
-cdef cudaChannelFormatDesc _cudaCreateChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) noexcept nogil:
-    cdef cudaChannelFormatDesc desc
-    desc.x = x
-    desc.y = y
-    desc.z = z
-    desc.w = w
-    desc.f = f
-    return desc
-
-
-{{endif}}
-{{if 'cudaDriverGetVersion' in found_functions}}
-
-cdef cudaError_t _cudaDriverGetVersion(int* driverVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDriverGetVersion(driverVersion)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaRuntimeGetVersion' in found_functions}}
-
-cdef cudaError_t _cudaRuntimeGetVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    runtimeVersion[0] = m_global._CUDART_VERSION
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, const cudaChannelFormatDesc* fmtDesc, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if fmtDesc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUarray_format fmt
-    cdef int numChannels = 0
-
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    err = getDescInfo(fmtDesc, &numChannels, &fmt)
-    if err == cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGetTexture1DLinearMaxWidth(maxWidthInElements, fmt, <unsigned>numChannels, device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMallocHost' in found_functions}}
-
-cdef cudaError_t _cudaMallocHost(void** ptr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if ptr == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocHost(size, ptr, 0)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMallocPitch' in found_functions}}
-
-cdef cudaError_t _cudaMallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if devPtr == NULL or pitch == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocPitch(width, height, 1, devPtr, pitch)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMallocMipmappedArray' in found_functions}}
-
-cdef cudaError_t _cudaMallocMipmappedArray(cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if mipmappedArray == NULL or desc == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocMipmappedArray(mipmappedArray, desc, extent.depth, extent.height, extent.width, numLevels, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2D' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DPtr(<char*>dst, dpitch, <const char*>src, spitch, width, height, kind, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2DAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DPtr(<char*>dst, dpitch, <const char*>src, spitch, width, height, kind, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyAsyncDispatch(dst, src, count, kind, stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphAddMemcpyNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddMemcpyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaError_t err = cudaSuccess
-
-    if pCopyParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = toDriverMemCopy3DParams(pCopyParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphAddMemcpyNode1D' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaMemcpy3DParms copyParams
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)
-
-    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaMemcpy3DParms copyParams
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)
-
-    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuGraphMemcpyNodeSetParams(node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaError_t err = cudaSuccess
-
-    if pNodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = toDriverMemCopy3DParams(pNodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuGraphExecMemcpyNodeSetParams(hGraphExec, node, &driverNodeParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphExecMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaMemcpy3DParms copyParams
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)
-
-    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuGraphExecMemcpyNodeSetParams(hGraphExec, node, &driverNodeParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetDriverEntryPoint' in found_functions}}
-
-cdef cudaError_t _cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuGetProcAddress_v2(symbol, funcPtr, m_global._CUDART_VERSION, flags, <cydriver.CUdriverProcAddressQueryResult*>driverStatus)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphAddMemsetNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddMemsetNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS driverParams
-    cdef cudaError_t err = cudaSuccess
-
-    if pMemsetParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    toDriverMemsetNodeParams(pMemsetParams, &driverParams)
-
-    err = <cudaError_t>cydriver._cuGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, &driverParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUcontext context
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS driverParams
-    cdef cudaError_t err = cudaSuccess
-
-    if pNodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    toDriverMemsetNodeParams(pNodeParams, &driverParams)
-
-    err = <cudaError_t>cydriver._cuGraphExecMemsetNodeSetParams(hGraphExec, node, &driverParams, context)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-    cdef cudaError_t err = cudaSuccess
-
-    if pNodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = toDriverMemCopy3DParams(pNodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemcpyNodeSetParams(node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
-
-    if p == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemcpyNodeGetParams(node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = toCudartMemCopy3DParams(&driverNodeParams, p)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaFuncGetAttributes' in found_functions}}
-
-cdef cudaError_t _cudaFuncGetAttributes(cudaFuncAttributes* attr, const void* func) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    if NULL == attr:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef int bytes = 0
-    memset(attr, 0, sizeof(cudaFuncAttributes))
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].maxThreadsPerBlock,     cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].numRegs,                cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_NUM_REGS, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].ptxVersion,             cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PTX_VERSION, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].binaryVersion,          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_BINARY_VERSION, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    attr[0].sharedSizeBytes = <size_t>bytes
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    attr[0].constSizeBytes = <size_t>bytes
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    attr[0].localSizeBytes = <size_t>bytes
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].cacheModeCA,            cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].preferredShmemCarveout, cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, <cydriver.CUfunction>func)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    attr[0].maxDynamicSharedSizeBytes = <size_t>bytes
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMallocArray' in found_functions}}
-
-cdef cudaError_t _cudaMallocArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if arrayPtr == NULL or desc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocArray(arrayPtr, desc, 0, height, width, 0, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMalloc3D' in found_functions}}
-
-cdef cudaError_t _cudaMalloc3D(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if pitchedDevPtr == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocPitch(extent.width, extent.height, extent.depth, &pitchedDevPtr[0].ptr, &pitchedDevPtr[0].pitch)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    pitchedDevPtr[0].xsize = extent.width
-    pitchedDevPtr[0].ysize = extent.height
-    return err
-
-
-{{endif}}
-{{if 'cudaMalloc3DArray' in found_functions}}
-
-cdef cudaError_t _cudaMalloc3DArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if arrayPtr == NULL or desc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = mallocArray(arrayPtr, desc, extent.depth, extent.height, extent.width, 0, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetErrorString' in found_functions}}
-
-cdef const char* _cudaGetErrorString(cudaError_t error) except ?NULL nogil:
-    cdef const char* pStr = NULL
-    cdef cudaError_t err = cudaSuccess
-
-    err = <cudaError_t>cydriver._cuGetErrorString(<cydriver.CUresult>error, &pStr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    if err == <cudaError_t>cudaErrorInvalidValue:
-        pStr = "unrecognized error code"
-    return pStr
-
-
-{{endif}}
-{{if 'cudaStreamAddCallback' in found_functions}}
-
-cdef cudaError_t _cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void* userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = streamAddCallbackCommon(stream, callback, userData, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetCaptureInfo_v2(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = streamGetCaptureInfoCommon(stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaImportExternalSemaphore' in found_functions}}
-
-cdef cudaError_t _cudaImportExternalSemaphore(cudaExternalSemaphore_t* extSem_out, const cudaExternalSemaphoreHandleDesc* semHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC driverSemHandleDesc
-
-    if semHandleDesc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    memset(&driverSemHandleDesc, 0, sizeof(driverSemHandleDesc))
-
-    if semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD
-        driverSemHandleDesc.handle.fd = semHandleDesc.handle.fd
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC
-        driverSemHandleDesc.handle.nvSciSyncObj = semHandleDesc.handle.nvSciSyncObj
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD
-        driverSemHandleDesc.handle.fd = semHandleDesc.handle.fd
-    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32:
-        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
-        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
-        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
-    driverSemHandleDesc.flags = semHandleDesc.flags
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuImportExternalSemaphore(<cydriver.CUexternalSemaphore *>extSem_out, &driverSemHandleDesc)
-    if err != <cudaError_t>cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaSignalExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t _cudaSignalExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreSignalParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuSignalExternalSemaphoresAsync(<const cydriver.CUexternalSemaphore *>extSemArray, <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *>paramsArray, numExtSems, stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaWaitExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t _cudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreWaitParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuWaitExternalSemaphoresAsync(<const cydriver.CUexternalSemaphore *>extSemArray, <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *>paramsArray, numExtSems, stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaArrayGetInfo' in found_functions}}
-
-cdef cudaError_t _cudaArrayGetInfo(cudaChannelFormatDesc* desc, cudaExtent* extent, unsigned int* flags, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 driverDesc
-    cdef size_t width  = 0
-    cdef size_t height = 0
-    cdef size_t depth  = 0
-
-    # Zero out parameters in case cuArray3DGetDescriptor fails
-    if flags:
-        flags[0] = 0
-
-    if desc:
-        memset(desc, 0, sizeof(desc[0]))
-
-
-    if extent:
-        memset(extent, 0, sizeof(extent[0]))
-
-    err = <cudaError_t>cydriver._cuArray3DGetDescriptor_v2(&driverDesc, <cydriver.CUarray>array)
-    if err != <cudaError_t>cudaSuccess:
-        _setLastError(err)
-        return err
-
-    # Flags are copied directly from the driver API
-    if flags:
-        flags[0] = driverDesc.Flags
-
-    # Convert from driver API types to runtime API types. extent.Depth = 0
-    # indicates a 2D array.
-    if desc:
-        width  = 0
-        height = 0
-        depth  = 0
-
-        err = getChannelFormatDescFromDriverDesc(desc, &depth, &height, &width, &driverDesc)
-        if err != <cudaError_t>cudaSuccess:
-            _setLastError(err)
-            return err
-
-    if extent:
-        extent.width  = driverDesc.Width
-        extent.height = driverDesc.Height
-        extent.depth  = driverDesc.Depth
-
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaMemcpy2DToArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DToArray(dst, hOffset, wOffset, <const char*>src, spitch, width, height, kind, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2DFromArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DFromArray(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DFromArray(<char*>dst, dpitch, src, hOffset, wOffset, width, height, kind, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2DArrayToArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DArrayToArray(dst, hOffsetDst, wOffsetDst, src, hOffsetSrc, wOffsetSrc, width, height, kind)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2DToArrayAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DToArray(dst, hOffset, wOffset, <const char*>src, spitch, width, height, kind, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy2DFromArrayAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy2DFromArrayAsync(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy2DFromArray(<char*>dst, dpitch, src, hOffset, wOffset, width, height, kind, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemset3D' in found_functions}}
-
-cdef cudaError_t _cudaMemset3D(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memset3DPtr(pitchedDevPtr, value, extent, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemset3DAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memset3DPtr(pitchedDevPtr, value, extent, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyToArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyToArray(dst, hOffset, wOffset, <const char*>src, count, kind, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyFromArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyFromArray(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyFromArray(<char*>dst, src, hOffset, wOffset, count, kind, NULL, 0)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyToArrayAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyToArray(dst, hOffset, wOffset, <const char*>src, count, kind, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyFromArrayAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyFromArrayAsync(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyFromArray(<char*>dst, src, hOffset, wOffset, count, kind, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaPointerGetAttributes' in found_functions}}
-
-cdef cudaError_t _cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaPointerAttributes attrib
-    cdef cydriver.CUcontext driverContext = NULL
-    cdef cydriver.CUmemorytype driverMemoryType
-    cdef int isManaged = 0
-    cdef cydriver.CUpointer_attribute[6] query
-    query[0] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_CONTEXT
-    query[1] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
-    query[2] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
-    query[3] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_HOST_POINTER
-    query[4] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_MANAGED
-    query[5] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
-
-    memset(&attrib, 0, sizeof(attrib))
-    memset(&driverMemoryType, 0, sizeof(driverMemoryType))
-
-    cdef void** data = [
-        &driverContext,
-        &driverMemoryType,
-        &attrib.devicePointer,
-        &attrib.hostPointer,
-        &isManaged,
-        &attrib.device
-    ]
-
-    if attributes == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    # Get all the attributes we need
-    err = <cudaError_t>cydriver._cuPointerGetAttributes(<unsigned int>(sizeof(query)/sizeof(query[0])), query, data, <cydriver.CUdeviceptr_v2>ptr)
-    if err != cudaSuccess:
-        if attributes != NULL:
-            memset(attributes, 0, sizeof(attributes[0]))
-            attributes[0].device = -1
-        _setLastError(err)
-        return err
-
-    if driverMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST:
-        if isManaged:
-            attrib.type = cudaMemoryTypeManaged
-        else:
-            attrib.type = cudaMemoryTypeHost
-    elif driverMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE:
-        if isManaged:
-            attrib.type = cudaMemoryTypeManaged
-        else:
-            attrib.type = cudaMemoryTypeDevice
-    else:
-         if driverMemoryType == 0:
-            attrib.type = cudaMemoryTypeUnregistered
-         else:
-            if attributes != NULL:
-                memset(attributes, 0, sizeof(attributes[0]))
-                attributes[0].device = -1
-            _setLastError(cudaErrorInvalidValue)
-            return cudaErrorInvalidValue
-
-    # copy to user structure
-    attributes[0] = attrib
-
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaGetDeviceFlags' in found_functions}}
-
-cdef cudaError_t _cudaGetDeviceFlags(unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-
-    if flags == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUcontext driverContext
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&driverContext)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    # Get the flags from the current context
-    if driverContext != NULL:
-        err = <cudaError_t>cydriver._cuCtxGetFlags(flags)
-        if err != cudaSuccess:
-            _setLastError(err)
-        return err
-
-    # Assume first valid device and get its implicit flags
-    cdef cudaPythonDevice* device
-    cdef unsigned int pcFlags
-    cdef int pcActive
-    device = m_global.getDevice(0)
-    err = <cudaError_t>cydriver._cuDevicePrimaryCtxGetState(device[0].driverDevice, &pcFlags, &pcActive)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    flags[0] = pcFlags | cudaDeviceMapHost
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaMemcpy3D' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy3D(const cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if p == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy3D(p, False, 0, 0, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy3DAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy3DAsync(const cudaMemcpy3DParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if p == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpy3D(p, False, 0, 0, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemPoolSetAccess' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolSetAccess(cudaMemPool_t memPool, const cudaMemAccessDesc* descList, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    cdef size_t MAX_DEVICES = 32
-    cdef cydriver.CUmemAccessDesc localList[32]
-    cdef cydriver.CUmemAccessDesc *cuDescList
-    cdef size_t i = 0
-
-    if (count > MAX_DEVICES):
-        cuDescList = <cydriver.CUmemAccessDesc*>calloc(sizeof(cydriver.CUmemAccessDesc), count)
-    else:
-        cuDescList = localList
-
-    if cuDescList == NULL:
-        _setLastError(cudaErrorMemoryAllocation)
-        return cudaErrorMemoryAllocation
-
-    while i < count:
-        cuDescList[i].location.type = <cydriver.CUmemLocationType>descList[i].location.type
-        cuDescList[i].location.id = descList[i].location.id
-        cuDescList[i].flags = <cydriver.CUmemAccess_flags>descList[i].flags
-        i += 1
-
-    err = <cudaError_t>cydriver._cuMemPoolSetAccess(memPool, cuDescList, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    if count > MAX_DEVICES:
-        free(cuDescList)
-
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaDeviceReset' in found_functions}}
-
-cdef cudaError_t _cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef int deviceOrdinal = 0
-    cdef cudaError_t err = cudaSuccess
-    if not m_global._lazyInitDriver:
-        return cudaSuccess
-
-    cdef cydriver.CUcontext context
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    cdef cudaPythonDevice* device
-    device = m_global.getDeviceFromPrimaryCtx(context)
-    if device != NULL:
-        err = resetPrimaryContext(device)
-        if err != cudaSuccess:
-            _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetLastError' in found_functions}}
-
-cdef cudaError_t _cudaGetLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t last_err = m_global._lastError
-    m_global._lastError = cudaSuccess
-    return last_err
-
-
-{{endif}}
-{{if 'cudaPeekAtLastError' in found_functions}}
-
-cdef cudaError_t _cudaPeekAtLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return m_global._lastError
-
-
-{{endif}}
-{{if 'cudaGetDevice' in found_functions}}
-
-cdef cudaError_t _cudaGetDevice(int* deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUdevice driverDevice = 0
-
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-
-    cdef cudaPythonDevice *cudaDevice
-    err = <cudaError_t>cydriver._cuCtxGetDevice(&driverDevice)
-    if err == cudaSuccess:
-        cudaDevice = m_global.getDeviceFromDriver(driverDevice)
-        deviceOrdinal[0] = cudaDevice[0].deviceOrdinal
-    elif err == cudaErrorDeviceUninitialized:
-        # Like C Runtime, default to first device and let context creation happen in another call
-        # By default, device 0 would initialized
-        deviceOrdinal[0] = 0
-        err = cudaSuccess
-    return err
-
-
-{{endif}}
-{{if 'cudaSetDevice' in found_functions}}
-
-cdef cudaError_t _cudaSetDevice(int deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaPythonDevice *device
-
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-
-    device = m_global.getDevice(deviceOrdinal)
-    if device == NULL:
-        _setLastError(err)
-        return cudaErrorInvalidDevice
-
-    if device.primaryContext == NULL:
-        initPrimaryContext(device)
-
-    err = <cudaError_t>cydriver._cuCtxSetCurrent(device.primaryContext)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetDeviceProperties_v2' in found_functions}}
-
-cdef cudaError_t _cudaGetDeviceProperties_v2(cudaDeviceProp* prop, int deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-
-    device = m_global.getDevice(deviceOrdinal)
-    if device == NULL:
-        _setLastError(err)
-        return cudaErrorInvalidDevice
-
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.kernelExecTimeoutEnabled),  cydriver.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device[0].driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.computeMode),  cydriver.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, device[0].driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.clockRate), cydriver.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device[0].driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.memoryClockRate), cydriver.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device[0].driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.singleToDoublePrecisionPerfRatio), cydriver.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, device[0].driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    prop[0] = device[0].deviceProperties
-
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaChooseDevice' in found_functions}}
-
-cdef cudaError_t _cudaChooseDevice(int* device, const cudaDeviceProp* prop) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if device == NULL or prop == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    cdef int best = -1
-    cdef int maxrank = -1
-    cdef int rank = 0
-    cdef char* dontCare_name = [b'\0']
-    cdef int dontCare_major = -1
-    cdef int dontCare_minor = -1
-    cdef size_t dontCare_totalGlobalMem = 0
-    cdef int deviceOrdinal = 0
-    cdef cudaDeviceProp *devProp
-
-    for deviceOrdinal in range(m_global._numDevices):
-        devProp = &m_global._deviceList[deviceOrdinal].deviceProperties
-        rank = 0
-        if (strncmp(prop[0].name, dontCare_name, sizeof(prop[0].name)) != 0):
-            rank += strncmp(prop[0].name, devProp[0].name, sizeof(prop[0].name)) == 0
-        if (prop[0].major != dontCare_major):
-            rank += prop[0].major <= devProp[0].major
-        if (prop[0].major == devProp[0].major and prop[0].minor != dontCare_minor):
-            rank += prop[0].minor <= devProp[0].minor
-        if (prop[0].totalGlobalMem != dontCare_totalGlobalMem):
-            rank += prop[0].totalGlobalMem <= devProp[0].totalGlobalMem
-        if (rank > maxrank):
-            maxrank = rank
-            best = deviceOrdinal
-
-    device[0] = best
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaMemcpyArrayToArray' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = memcpyArrayToArray(dst, hOffsetDst, wOffsetDst, src, hOffsetSrc, wOffsetSrc, count, kind)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetChannelDesc' in found_functions}}
-
-cdef cudaError_t _cudaGetChannelDesc(cudaChannelFormatDesc* desc, cudaArray_const_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    if desc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = getChannelDesc(array, desc)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaCreateTextureObject' in found_functions}}
-
-cdef cudaError_t _cudaCreateTextureObject(cudaTextureObject_t* pTexObject, const cudaResourceDesc* pResDesc, const cudaTextureDesc* pTexDesc, const cudaResourceViewDesc* pResViewDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    if pResDesc == NULL or pTexDesc == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUDA_RESOURCE_DESC rd
-    cdef cydriver.CUDA_TEXTURE_DESC td
-    cdef cydriver.CUDA_RESOURCE_VIEW_DESC rvd
-    cdef cudaTextureDesc texDesc
-    memcpy(&texDesc, pTexDesc, sizeof(cudaTextureDesc))
-    texDesc.seamlessCubemap = 0
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    if pResViewDesc:
-        err = getDriverResDescFromResDesc(&rd, pResDesc, &td, &texDesc, &rvd, pResViewDesc)
-    else:
-        err = getDriverResDescFromResDesc(&rd, pResDesc, &td, &texDesc, NULL, pResViewDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    if pResViewDesc:
-        err = <cudaError_t>cydriver._cuTexObjectCreate(pTexObject, &rd, &td, &rvd)
-    else:
-        err = <cudaError_t>cydriver._cuTexObjectCreate(pTexObject, &rd, &td, NULL)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetTextureObjectTextureDesc' in found_functions}}
-
-cdef cudaError_t _cudaGetTextureObjectTextureDesc(cudaTextureDesc* pTexDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    cdef cudaResourceDesc resDesc
-    cdef cydriver.CUDA_RESOURCE_DESC rd
-    cdef cydriver.CUDA_TEXTURE_DESC td
-    cdef cudaTextureDesc texDesc
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuTexObjectGetResourceDesc(&rd, texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuTexObjectGetTextureDesc(&td, texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = getResDescFromDriverResDesc(&resDesc, &rd, &texDesc, &td, NULL, NULL)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    memcpy(pTexDesc, &texDesc, sizeof(cudaTextureDesc))
-
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaGetTextureObjectResourceViewDesc' in found_functions}}
-
-cdef cudaError_t _cudaGetTextureObjectResourceViewDesc(cudaResourceViewDesc* pResViewDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaResourceDesc resDesc
-    cdef cydriver.CUDA_RESOURCE_DESC rd
-    cdef cydriver.CUDA_RESOURCE_VIEW_DESC rvd
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err =  <cudaError_t>cydriver.cuTexObjectGetResourceDesc(&rd, texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err =  <cudaError_t>cydriver.cuTexObjectGetResourceViewDesc(&rvd, texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = getResDescFromDriverResDesc(&resDesc, &rd, NULL, NULL, pResViewDesc, &rvd)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaGetExportTable' in found_functions}}
-
-cdef cudaError_t _cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGetExportTable(ppExportTable, <const cydriver.CUuuid*>pExportTableId)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpy3DPeer' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy3DPeer(const cudaMemcpy3DPeerParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if p == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaMemcpy3DParms cp
-    memset(&cp, 0, sizeof(cp))
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cp.srcArray = p[0].srcArray
-    cp.srcPos = p[0].srcPos
-    cp.srcPtr = p[0].srcPtr
-    cp.dstArray = p[0].dstArray
-    cp.dstPos = p[0].dstPos
-    cp.dstPtr = p[0].dstPtr
-    cp.extent = p[0].extent
-    cp.kind = cudaMemcpyKind.cudaMemcpyDeviceToDevice
-
-    err = memcpy3D(&cp, True, p[0].srcDevice, p[0].dstDevice, NULL, False)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemcpy3DPeerAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpy3DPeerAsync(const cudaMemcpy3DPeerParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if p == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaMemcpy3DParms cp
-    memset(&cp, 0, sizeof(cp))
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cp.srcArray = p[0].srcArray
-    cp.srcPos = p[0].srcPos
-    cp.srcPtr = p[0].srcPtr
-    cp.dstArray = p[0].dstArray
-    cp.dstPos = p[0].dstPos
-    cp.dstPtr = p[0].dstPtr
-    cp.extent = p[0].extent
-    cp.kind = cudaMemcpyKind.cudaMemcpyDeviceToDevice
-
-    err = memcpy3D(&cp, True, p[0].srcDevice, p[0].dstDevice, stream, True)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'make_cudaPitchedPtr' in found_functions}}
-
-cdef cudaPitchedPtr _make_cudaPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) noexcept nogil:
-    cdef cudaPitchedPtr s
-    s.ptr   = d
-    s.pitch = p
-    s.xsize = xsz
-    s.ysize = ysz
-    return s
-
-
-{{endif}}
-{{if 'make_cudaPos' in found_functions}}
-
-cdef cudaPos _make_cudaPos(size_t x, size_t y, size_t z) noexcept nogil:
-    cdef cudaPos p
-    p.x = x
-    p.y = y
-    p.z = z
-    return p
-
-
-{{endif}}
-{{if 'make_cudaExtent' in found_functions}}
-
-cdef cudaExtent _make_cudaExtent(size_t w, size_t h, size_t d) noexcept nogil:
-    cdef cudaExtent e
-    e.width  = w
-    e.height = h
-    e.depth  = d
-    return e
-
-
-{{endif}}
-{{if 'cudaSetDeviceFlags' in found_functions}}
-
-cdef cudaError_t _cudaSetDeviceFlags(unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    flags &= ~cudaDeviceMapHost
-    if flags & ~cudaDeviceMask:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef unsigned int scheduleFlags = flags & cudaDeviceScheduleMask
-    if scheduleFlags and (scheduleFlags != cudaDeviceScheduleSpin and
-                          scheduleFlags != cudaDeviceScheduleYield and
-                          scheduleFlags != cudaDeviceScheduleBlockingSync):
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-
-    cdef cydriver.CUcontext context
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    cdef cudaPythonDevice* device
-    device = m_global.getDeviceFromPrimaryCtx(context)
-    if device == NULL:
-        # We don't know if context provided is primary or not
-        # cudaSetDevice may need to be called before retrying call
-        return cudaErrorIncompatibleDriverContext
-
-    err = <cudaError_t>cydriver._cuDevicePrimaryCtxSetFlags_v2(device[0].driverDevice, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphAddMemAllocNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddMemAllocNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaMemAllocNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    if nodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddMemAllocNode(pGraphNode, graph, pDependencies, numDependencies, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS *>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, cudaMemAllocNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    if params_out == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemAllocNodeGetParams(node, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS *>params_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void* dptr_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    if dptr_out == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemFreeNodeGetParams(node, <cydriver.CUdeviceptr *>dptr_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemAdvise' in found_functions}}
-
-cdef cudaError_t _cudaMemAdvise(const void* devPtr, size_t count, cudaMemoryAdvise advice, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAdvise(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUmem_advise>advice, <cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemAdvise_v2' in found_functions}}
-
-cdef cudaError_t _cudaMemAdvise_v2(const void* devPtr, size_t count, cudaMemoryAdvise advice, cudaMemLocation location) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUmemLocation _driver_location
-    _driver_location.type = <cydriver.CUmemLocationType>location.type
-    _driver_location.id = location.id
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAdvise_v2(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUmem_advise>advice, _driver_location)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemRangeGetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaMemRangeGetAttribute(void* data, size_t dataSize, cudaMemRangeAttribute attribute, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuMemRangeGetAttribute(data, dataSize, <cydriver.CUmem_range_attribute>attribute, <cydriver.CUdeviceptr>devPtr, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMemRangeGetAttributes' in found_functions}}
-
-cdef cudaError_t _cudaMemRangeGetAttributes(void** data, size_t* dataSizes, cudaMemRangeAttribute* attributes, size_t numAttributes, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuMemRangeGetAttributes(data, dataSizes, <cydriver.CUmem_range_attribute*>attributes, numAttributes, <cydriver.CUdeviceptr>devPtr, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaGetDeviceCount' in found_functions}}
-
-cdef cudaError_t _cudaGetDeviceCount(int* count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    count[0] = m_global._numDevices
-    return cudaSuccess
-
-
-{{endif}}
-{{if 'cudaDeviceGetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDeviceGetAttribute(value, <cydriver.CUdevice_attribute>attr, <cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxSetSharedMemConfig(<cydriver.CUsharedconfig>config)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceGetByPCIBusId' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetByPCIBusId(int* device, const char* pciBusId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDeviceGetByPCIBusId(<cydriver.CUdevice*>device, pciBusId)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceGetPCIBusId' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetPCIBusId(char* pciBusId, int length, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDeviceGetPCIBusId(pciBusId, length, <cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceGetP2PAttribute' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetP2PAttribute(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDeviceGetP2PAttribute(value, <cydriver.CUdevice_P2PAttribute>attr, <cydriver.CUdevice>srcDevice, <cydriver.CUdevice>dstDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t _cudaArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES _driver_sparseProperties
-    if not sparseProperties:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaError.cudaErrorInvalidValue
-    memset(sparseProperties, 0, sizeof(cudaArraySparseProperties))
-
-    err = <cudaError_t>cydriver._cuArrayGetSparseProperties(&_driver_sparseProperties, <cydriver.CUarray>array)
-    if err == cudaSuccess:
-        sparseProperties[0].miptailFirstLevel = _driver_sparseProperties.miptailFirstLevel
-        sparseProperties[0].miptailSize       = _driver_sparseProperties.miptailSize
-        sparseProperties[0].flags             = _driver_sparseProperties.flags
-        sparseProperties[0].tileExtent.width  = _driver_sparseProperties.tileExtent.width
-        sparseProperties[0].tileExtent.height = _driver_sparseProperties.tileExtent.height
-        sparseProperties[0].tileExtent.depth  = _driver_sparseProperties.tileExtent.depth
-
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t _cudaMipmappedArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaMipmappedArray_t mipmap) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES _driver_sparseProperties
-    if not sparseProperties:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaError.cudaErrorInvalidValue
-    memset(sparseProperties, 0, sizeof(cudaArraySparseProperties))
-
-    err = <cudaError_t>cydriver._cuMipmappedArrayGetSparseProperties(&_driver_sparseProperties, <cydriver.CUmipmappedArray>mipmap)
-    if err == cudaSuccess:
-        sparseProperties[0].miptailFirstLevel = _driver_sparseProperties.miptailFirstLevel
-        sparseProperties[0].miptailSize       = _driver_sparseProperties.miptailSize
-        sparseProperties[0].flags             = _driver_sparseProperties.flags
-        sparseProperties[0].tileExtent.width  = _driver_sparseProperties.tileExtent.width
-        sparseProperties[0].tileExtent.height = _driver_sparseProperties.tileExtent.height
-        sparseProperties[0].tileExtent.depth  = _driver_sparseProperties.tileExtent.depth
-
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceCanAccessPeer' in found_functions}}
-
-cdef cudaError_t _cudaDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-    cdef cudaPythonDevice *driverDevice
-    cdef cudaPythonDevice *driverPeerDevice
-    driverDevice = m_global.getDevice(device)
-    driverPeerDevice = m_global.getDevice(peerDevice)
-    if driverDevice == NULL or driverPeerDevice == NULL:
-        return cudaErrorInvalidDevice
-
-    err = <cudaError_t>cydriver._cuDeviceCanAccessPeer(canAccessPeer, driverDevice.driverDevice, driverPeerDevice.driverDevice)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    if device == peerDevice:
-        canAccessPeer[0] = 0
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyPeer' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaPythonDevice *device
-    cdef cudaPythonDevice *peerDevice
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    if count > 0:
-        peerDevice = m_global.getDevice(dstDevice)
-        device = m_global.getDevice(srcDevice)
-        if device == NULL or peerDevice == NULL:
-            _setLastError(err)
-            return cudaErrorInvalidDevice
-        err = initPrimaryContext(peerDevice)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        err = initPrimaryContext(device)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        err = <cudaError_t>cydriver._cuMemcpyPeer(<cydriver.CUdeviceptr>dst, peerDevice[0].primaryContext, <cydriver.CUdeviceptr>src, device[0].primaryContext, count)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-    return err
-
-
-{{endif}}
-{{if 'cudaMemcpyPeerAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaPythonDevice *device
-    cdef cudaPythonDevice *peerDevice
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    if count > 0:
-        peerDevice = m_global.getDevice(dstDevice)
-        device = m_global.getDevice(srcDevice)
-        if device == NULL or peerDevice == NULL:
-            _setLastError(err)
-            return cudaErrorInvalidDevice
-        err = initPrimaryContext(peerDevice)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        err = initPrimaryContext(device)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        err = <cudaError_t>cydriver._cuMemcpyPeerAsync(<cydriver.CUdeviceptr>dst, peerDevice[0].primaryContext, <cydriver.CUdeviceptr>src, device[0].primaryContext, count, <cydriver.CUstream>stream)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceEnablePeerAccess' in found_functions}}
-
-cdef cudaError_t _cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUcontext context
-    cdef cudaPythonDevice *dev
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    dev = m_global.getDeviceFromPrimaryCtx(context)
-    if dev == NULL:
-        # We don't know if context provided is primary or not
-        # cudaSetDevice may need to be called before retrying call
-        _setLastError(cudaErrorIncompatibleDriverContext)
-        return cudaErrorIncompatibleDriverContext
-    dev = m_global.getDevice(peerDevice)
-    if dev == NULL:
-        return cudaErrorInvalidDevice
-    err = initPrimaryContext(dev)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuCtxEnablePeerAccess(dev.primaryContext, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-
-{{endif}}
-{{if 'cudaDeviceDisablePeerAccess' in found_functions}}
-
-cdef cudaError_t _cudaDeviceDisablePeerAccess(int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUcontext context
-    cdef cudaPythonDevice *dev
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    dev = m_global.getDeviceFromPrimaryCtx(context)
-    if dev == NULL:
-        # We don't know if context provided is primary or not
-        # cudaSetDevice may need to be called before retrying call
-        _setLastError(cudaErrorIncompatibleDriverContext)
-        return cudaErrorIncompatibleDriverContext
-    dev = m_global.getDevice(peerDevice)
-    if dev == NULL:
-        return cudaErrorInvalidDevice
-    err = initPrimaryContext(dev)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuCtxDisablePeerAccess(dev.primaryContext)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t _cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t* mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc* mipmapDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC _driver_mipmapDesc
-    memset(&_driver_mipmapDesc, 0, sizeof(_driver_mipmapDesc))
-    _driver_mipmapDesc.offset = mipmapDesc[0].offset
-    _driver_mipmapDesc.arrayDesc.Width = mipmapDesc[0].extent.width
-    _driver_mipmapDesc.arrayDesc.Height = mipmapDesc[0].extent.height
-    _driver_mipmapDesc.arrayDesc.Depth = mipmapDesc[0].extent.depth
-    err_rt = getDescInfo(&mipmapDesc[0].formatDesc, <int *>&_driver_mipmapDesc.arrayDesc.NumChannels, &_driver_mipmapDesc.arrayDesc.Format)
-    if err_rt != cudaError.cudaSuccess:
-        _setLastError(err_rt)
-        return err_rt
-    _driver_mipmapDesc.arrayDesc.Flags = mipmapDesc[0].flags
-    _driver_mipmapDesc.numLevels = mipmapDesc[0].numLevels
-
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuExternalMemoryGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>mipmap, <cydriver.CUexternalMemory>extMem, &_driver_mipmapDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetSurfaceObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t _cudaGetSurfaceObjectResourceDesc(cudaResourceDesc* pResDesc, cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc
-
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuSurfObjectGetResourceDesc(&_driver_pResDesc, <cydriver.CUsurfObject>surfObject)
-    memset(pResDesc, 0, sizeof(cudaResourceDesc))
-    if _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_ARRAY:
-        pResDesc[0].resType         = cudaResourceType.cudaResourceTypeArray
-        pResDesc[0].res.array.array = <cudaArray_t>_driver_pResDesc.res.array.hArray
-    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY:
-        pResDesc[0].resType = cudaResourceType.cudaResourceTypeMipmappedArray
-        pResDesc[0].res.mipmap.mipmap = <cudaMipmappedArray_t>_driver_pResDesc.res.mipmap.hMipmappedArray
-    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_LINEAR:
-        pResDesc[0].resType                = cudaResourceType.cudaResourceTypeLinear
-        pResDesc[0].res.linear.devPtr      = <void *>_driver_pResDesc.res.linear.devPtr
-        pResDesc[0].res.linear.sizeInBytes = _driver_pResDesc.res.linear.sizeInBytes
-    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_PITCH2D:
-        pResDesc[0].resType                  = cudaResourceType.cudaResourceTypePitch2D
-        pResDesc[0].res.pitch2D.devPtr       = <void *>_driver_pResDesc.res.pitch2D.devPtr
-        pResDesc[0].res.pitch2D.pitchInBytes = _driver_pResDesc.res.pitch2D.pitchInBytes
-        pResDesc[0].res.pitch2D.width        = _driver_pResDesc.res.pitch2D.width
-        pResDesc[0].res.pitch2D.height       = _driver_pResDesc.res.pitch2D.height
-    if _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_LINEAR or _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_PITCH2D:
-        channel_size = 0
-        if _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT8:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-            channel_size = 8
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT16:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-            channel_size = 16
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT32:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-            channel_size = 32
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT8:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-            channel_size = 8
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT16:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-            channel_size = 16
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT32:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-            channel_size = 32
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_HALF:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindFloat
-            channel_size = 16
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_FLOAT:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindFloat
-            channel_size = 32
-        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_NV12:
-            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindNV12
-            channel_size = 8
-        else:
-            _setLastError(cudaErrorInvalidChannelDescriptor)
-            return cudaError.cudaErrorInvalidChannelDescriptor
-        pResDesc[0].res.linear.desc.x = 0
-        pResDesc[0].res.linear.desc.y = 0
-        pResDesc[0].res.linear.desc.z = 0
-        pResDesc[0].res.linear.desc.w = 0
-        if _driver_pResDesc.res.linear.numChannels >= 4:
-            pResDesc[0].res.linear.desc.w = channel_size
-        if _driver_pResDesc.res.linear.numChannels >= 3:
-            pResDesc[0].res.linear.desc.z = channel_size
-        if _driver_pResDesc.res.linear.numChannels >= 2:
-            pResDesc[0].res.linear.desc.y = channel_size
-        if _driver_pResDesc.res.linear.numChannels >= 1:
-            pResDesc[0].res.linear.desc.x = channel_size
-        if _driver_pResDesc.res.linear.numChannels < 1 or _driver_pResDesc.res.linear.numChannels >= 5:
-            _setLastError(cudaErrorInvalidChannelDescriptor)
-            return cudaError.cudaErrorInvalidChannelDescriptor
-
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphKernelNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphKernelNodeGetParams(cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
-
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphKernelNodeGetParams_v2(<cydriver.CUgraphNode>node, &driverNodeParams)
-    pNodeParams[0].func = <void*>driverNodeParams.func
-    pNodeParams[0].gridDim.x = driverNodeParams.gridDimX
-    pNodeParams[0].gridDim.y = driverNodeParams.gridDimY
-    pNodeParams[0].gridDim.z = driverNodeParams.gridDimZ
-    pNodeParams[0].blockDim.x = driverNodeParams.blockDimX
-    pNodeParams[0].blockDim.y = driverNodeParams.blockDimY
-    pNodeParams[0].blockDim.z = driverNodeParams.blockDimZ
-    pNodeParams[0].sharedMemBytes = driverNodeParams.sharedMemBytes
-    pNodeParams[0].kernelParams = driverNodeParams.kernelParams
-    pNodeParams[0].extra = driverNodeParams.extra
-
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef cudaError_t _cudaExternalMemoryGetMappedBuffer(void** devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc* bufferDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC _driver_bufferDesc
-    memset(&_driver_bufferDesc, 0, sizeof(_driver_bufferDesc))
-    _driver_bufferDesc.offset = bufferDesc[0].offset
-    _driver_bufferDesc.size = bufferDesc[0].size
-    _driver_bufferDesc.flags = bufferDesc[0].flags
-
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuExternalMemoryGetMappedBuffer(<cydriver.CUdeviceptr*>devPtr, <cydriver.CUexternalMemory>extMem, &_driver_bufferDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaImportExternalMemory' in found_functions}}
-
-cdef cudaError_t _cudaImportExternalMemory(cudaExternalMemory_t* extMem_out, const cudaExternalMemoryHandleDesc* memHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC _driver_memHandleDesc
-    memset(&_driver_memHandleDesc, 0, sizeof(_driver_memHandleDesc))
-
-    if memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD
-        _driver_memHandleDesc.handle.fd = memHandleDesc[0].handle.fd
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
-        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
-        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
-    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf:
-        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF
-        _driver_memHandleDesc.handle.nvSciBufObject = memHandleDesc[0].handle.nvSciBufObject
-    _driver_memHandleDesc.size = memHandleDesc[0].size
-    _driver_memHandleDesc.flags = memHandleDesc[0].flags
-
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuImportExternalMemory(<cydriver.CUexternalMemory*>extMem_out, &_driver_memHandleDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaCreateSurfaceObject' in found_functions}}
-
-cdef cudaError_t _cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc
-    memset(&_driver_pResDesc, 0, sizeof(_driver_pResDesc))
-    err = toDriverCudaResourceDesc(&_driver_pResDesc, pResDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuSurfObjectCreate(<cydriver.CUsurfObject*>pSurfObject, &_driver_pResDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc
-    memset(&_driver_pResDesc, 0, sizeof(_driver_pResDesc))
-    err = toDriverCudaResourceDesc(&_driver_pResDesc, pResDesc)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    err = <cudaError_t>cydriver._cuTexObjectGetResourceDesc(&_driver_pResDesc, <cydriver.CUtexObject>texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-
-cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUeglFrame cueglFrame
-    err = getDriverEglFrame(&cueglFrame, eglframe)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamProducerPresentFrame(<cydriver.CUeglStreamConnection*>conn, cueglFrame, pStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    if eglframe == NULL:
-        err = cudaErrorInvalidResourceHandle
-        _setLastError(err)
-        return err
-    cdef cydriver.CUeglFrame cueglFrame
-    err = <cudaError_t>cydriver._cuEGLStreamProducerReturnFrame(<cydriver.CUeglStreamConnection*>conn, &cueglFrame, pStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = getRuntimeEglFrame(eglframe, cueglFrame)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    return err
-
-cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUeglFrame cueglFrame
-    memset(&cueglFrame, 0, sizeof(cueglFrame))
-    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedEglFrame(&cueglFrame, <cydriver.CUgraphicsResource>resource, index, mipLevel)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = getRuntimeEglFrame(eglFrame, cueglFrame)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    return err
-
-cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return cudaErrorNotSupported
-
-{{if 'cudaArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS driverMemoryRequirements
-    if memoryRequirements == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    memset(memoryRequirements, 0, sizeof(memoryRequirements[0]))
-    err = <cudaError_t>cydriver._cuArrayGetMemoryRequirements(&driverMemoryRequirements, <cydriver.CUarray>array, device)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    memoryRequirements[0].size = driverMemoryRequirements.size
-    memoryRequirements[0].alignment = driverMemoryRequirements.alignment
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t _cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS driverMemoryRequirements
-    if memoryRequirements == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-
-    memset(memoryRequirements, 0, sizeof(memoryRequirements[0]))
-    err = <cudaError_t>cydriver._cuMipmappedArrayGetMemoryRequirements(&driverMemoryRequirements, <cydriver.CUmipmappedArray>mipmap, device)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    memoryRequirements[0].size = driverMemoryRequirements.size
-    memoryRequirements[0].alignment = driverMemoryRequirements.alignment
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaStreamGetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, cudaStreamAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamGetAttribute(<cydriver.CUstream>hStream, <cydriver.CUstreamAttrID>attr, <cydriver.CUstreamAttrValue*>value_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamSetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, const cudaStreamAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamSetAttribute(<cydriver.CUstream>hStream, <cydriver.CUstreamAttrID>attr, <cydriver.CUstreamAttrValue*>value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphKernelNodeGetAttribute(<cydriver.CUgraphNode>hNode, <cydriver.CUkernelNodeAttrID>attr, <cydriver.CUkernelNodeAttrValue*>value_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphKernelNodeSetAttribute(<cydriver.CUgraphNode>hNode, <cydriver.CUkernelNodeAttrID>attr, <cydriver.CUkernelNodeAttrValue*>value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaVDPAUGetDevice(int* device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuVDPAUGetDevice(<cydriver.CUdevice*>device, vdpDevice, vdpGetProcAddress)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGraphicsVDPAURegisterVideoSurface(cudaGraphicsResource** resource, VdpVideoSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsVDPAURegisterVideoSurface(<cydriver.CUgraphicsResource*>resource, vdpSurface, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** resource, VdpOutputSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsVDPAURegisterOutputSurface(<cydriver.CUgraphicsResource*>resource, vdpSurface, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGLGetDevices(unsigned int* pCudaDeviceCount, int* pCudaDevices, unsigned int cudaDeviceCount, cudaGLDeviceList deviceList) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGLGetDevices_v2(pCudaDeviceCount, <cydriver.CUdevice*>pCudaDevices, cudaDeviceCount, <cydriver.CUGLDeviceList>deviceList)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGraphicsGLRegisterImage(cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsGLRegisterImage(<cydriver.CUgraphicsResource*>resource, image, target, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGraphicsGLRegisterBuffer(cudaGraphicsResource** resource, GLuint buffer, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsGLRegisterBuffer(<cydriver.CUgraphicsResource*>resource, buffer, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceSynchronize' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSynchronize() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxSynchronize()
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceSetLimit' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSetLimit(cudaLimit limit, size_t value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxSetLimit(<cydriver.CUlimit>limit, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetLimit' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetLimit(pValue, <cydriver.CUlimit>limit)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetCacheConfig' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetCacheConfig(cudaFuncCache* pCacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetCacheConfig(<cydriver.CUfunc_cache*>pCacheConfig)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetStreamPriorityRange' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetStreamPriorityRange(leastPriority, greatestPriority)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceSetCacheConfig' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSetCacheConfig(cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxSetCacheConfig(<cydriver.CUfunc_cache>cacheConfig)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetSharedMemConfig(cudaSharedMemConfig* pConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetSharedMemConfig(<cydriver.CUsharedconfig*>pConfig)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaIpcGetEventHandle' in found_functions}}
-
-cdef cudaError_t _cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuIpcGetEventHandle(<cydriver.CUipcEventHandle*>handle, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaIpcOpenEventHandle' in found_functions}}
-
-cdef cudaError_t _cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUipcEventHandle _driver_handle
-    memcpy(&_driver_handle, &handle, sizeof(_driver_handle))
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuIpcOpenEventHandle(<cydriver.CUevent*>event, _driver_handle)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaIpcGetMemHandle' in found_functions}}
-
-cdef cudaError_t _cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuIpcGetMemHandle(<cydriver.CUipcMemHandle*>handle, <cydriver.CUdeviceptr>devPtr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaIpcOpenMemHandle' in found_functions}}
-
-cdef cudaError_t _cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    cdef cydriver.CUipcMemHandle _driver_handle
-    memcpy(&_driver_handle, &handle, sizeof(_driver_handle))
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuIpcOpenMemHandle_v2(<cydriver.CUdeviceptr*>devPtr, _driver_handle, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaIpcCloseMemHandle' in found_functions}}
-
-cdef cudaError_t _cudaIpcCloseMemHandle(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuIpcCloseMemHandle(<cydriver.CUdeviceptr>devPtr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef cudaError_t _cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTarget target, cudaFlushGPUDirectRDMAWritesScope scope) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuFlushGPUDirectRDMAWrites(<cydriver.CUflushGPUDirectRDMAWritesTarget>target, <cydriver.CUflushGPUDirectRDMAWritesScope>scope)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetDefaultMemPool' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetDefaultMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGetDefaultMemPool(<cydriver.CUmemoryPool*>memPool, <cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceSetMemPool' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceSetMemPool(<cydriver.CUdevice>device, <cydriver.CUmemoryPool>memPool)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetMemPool' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGetMemPool(<cydriver.CUmemoryPool*>memPool, <cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, int device, int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, <cydriver.CUdevice>device, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamCreateWithFlags' in found_functions}}
-
-cdef cudaError_t _cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamCreate(<cydriver.CUstream*>pStream, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamCreateWithPriority' in found_functions}}
-
-cdef cudaError_t _cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamCreateWithPriority(<cydriver.CUstream*>pStream, flags, priority)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamGetPriority' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetPriority(cudaStream_t hStream, int* priority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamGetPriority(<cydriver.CUstream>hStream, priority)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamGetFlags' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamGetFlags(<cydriver.CUstream>hStream, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaCtxResetPersistingL2Cache' in found_functions}}
-
-cdef cudaError_t _cudaCtxResetPersistingL2Cache() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxResetPersistingL2Cache()
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamCopyAttributes' in found_functions}}
-
-cdef cudaError_t _cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamCopyAttributes(<cydriver.CUstream>dst, <cydriver.CUstream>src)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamDestroy' in found_functions}}
-
-cdef cudaError_t _cudaStreamDestroy(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamDestroy_v2(<cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamWaitEvent' in found_functions}}
-
-cdef cudaError_t _cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamWaitEvent(<cydriver.CUstream>stream, <cydriver.CUevent>event, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamSynchronize' in found_functions}}
-
-cdef cudaError_t _cudaStreamSynchronize(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamSynchronize(<cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamQuery' in found_functions}}
-
-cdef cudaError_t _cudaStreamQuery(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamQuery(<cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamAttachMemAsync' in found_functions}}
-
-cdef cudaError_t _cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamAttachMemAsync(<cydriver.CUstream>stream, <cydriver.CUdeviceptr>devPtr, length, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamBeginCapture' in found_functions}}
-
-cdef cudaError_t _cudaStreamBeginCapture(cudaStream_t stream, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamBeginCapture_v2(<cydriver.CUstream>stream, <cydriver.CUstreamCaptureMode>mode)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamBeginCaptureToGraph' in found_functions}}
-
-cdef cudaError_t _cudaStreamBeginCaptureToGraph(cudaStream_t stream, cudaGraph_t graph, const cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamBeginCaptureToGraph(<cydriver.CUstream>stream, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>dependencies, <cydriver.CUgraphEdgeData*> dependencyData, numDependencies, <cydriver.CUstreamCaptureMode>mode)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef cudaError_t _cudaThreadExchangeStreamCaptureMode(cudaStreamCaptureMode* mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuThreadExchangeStreamCaptureMode(<cydriver.CUstreamCaptureMode*>mode)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamEndCapture' in found_functions}}
-
-cdef cudaError_t _cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamEndCapture(<cydriver.CUstream>stream, <cydriver.CUgraph*>pGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamIsCapturing' in found_functions}}
-
-cdef cudaError_t _cudaStreamIsCapturing(cudaStream_t stream, cudaStreamCaptureStatus* pCaptureStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamIsCapturing(<cydriver.CUstream>stream, <cydriver.CUstreamCaptureStatus*>pCaptureStatus)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef cudaError_t _cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t* dependencies, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamUpdateCaptureDependencies(<cydriver.CUstream>stream, <cydriver.CUgraphNode*>dependencies, numDependencies, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventCreateWithFlags' in found_functions}}
-
-cdef cudaError_t _cudaEventCreateWithFlags(cudaEvent_t* event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventCreate(<cydriver.CUevent*>event, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventRecord' in found_functions}}
-
-cdef cudaError_t _cudaEventRecord(cudaEvent_t event, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventRecord(<cydriver.CUevent>event, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventRecordWithFlags' in found_functions}}
-
-cdef cudaError_t _cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventRecordWithFlags(<cydriver.CUevent>event, <cydriver.CUstream>stream, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventSynchronize' in found_functions}}
-
-cdef cudaError_t _cudaEventSynchronize(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventSynchronize(<cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventDestroy' in found_functions}}
-
-cdef cudaError_t _cudaEventDestroy(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventDestroy_v2(<cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaEventElapsedTime' in found_functions}}
-
-cdef cudaError_t _cudaEventElapsedTime(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventElapsedTime(ms, <cydriver.CUevent>start, <cydriver.CUevent>end)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDestroyExternalMemory' in found_functions}}
-
-cdef cudaError_t _cudaDestroyExternalMemory(cudaExternalMemory_t extMem) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDestroyExternalMemory(<cydriver.CUexternalMemory>extMem)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDestroyExternalSemaphore' in found_functions}}
-
-cdef cudaError_t _cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDestroyExternalSemaphore(<cydriver.CUexternalSemaphore>extSem)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFuncSetCacheConfig' in found_functions}}
-
-cdef cudaError_t _cudaFuncSetCacheConfig(const void* func, cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuFuncSetCacheConfig(<cydriver.CUfunction>func, <cydriver.CUfunc_cache>cacheConfig)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFuncSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t _cudaFuncSetSharedMemConfig(const void* func, cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuFuncSetSharedMemConfig(<cydriver.CUfunction>func, <cydriver.CUsharedconfig>config)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFuncSetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuFuncSetAttribute(<cydriver.CUfunction>func, <cydriver.CUfunction_attribute>attr, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaLaunchHostFunc' in found_functions}}
-
-cdef cudaError_t _cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = streamAddHostCallbackCommon(stream, fn, userData)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, <cydriver.CUfunction>func, blockSize, dynamicSMemSize)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef cudaError_t _cudaOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* func, int numBlocks, int blockSize) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, <cydriver.CUfunction>func, numBlocks, blockSize)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, <cydriver.CUfunction>func, blockSize, dynamicSMemSize, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMallocManaged' in found_functions}}
-
-cdef cudaError_t _cudaMallocManaged(void** devPtr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAllocManaged(<cydriver.CUdeviceptr*>devPtr, size, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMalloc' in found_functions}}
-
-cdef cudaError_t _cudaMalloc(void** devPtr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAlloc_v2(<cydriver.CUdeviceptr*>devPtr, size)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFree' in found_functions}}
-
-cdef cudaError_t _cudaFree(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemFree_v2(<cydriver.CUdeviceptr>devPtr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFreeHost' in found_functions}}
-
-cdef cudaError_t _cudaFreeHost(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemFreeHost(ptr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFreeArray' in found_functions}}
-
-cdef cudaError_t _cudaFreeArray(cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuArrayDestroy(<cydriver.CUarray>array)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFreeMipmappedArray' in found_functions}}
-
-cdef cudaError_t _cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMipmappedArrayDestroy(<cydriver.CUmipmappedArray>mipmappedArray)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaHostAlloc' in found_functions}}
-
-cdef cudaError_t _cudaHostAlloc(void** pHost, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemHostAlloc(pHost, size, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaHostRegister' in found_functions}}
-
-cdef cudaError_t _cudaHostRegister(void* ptr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemHostRegister_v2(ptr, size, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaHostUnregister' in found_functions}}
-
-cdef cudaError_t _cudaHostUnregister(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemHostUnregister(ptr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaHostGetDevicePointer' in found_functions}}
-
-cdef cudaError_t _cudaHostGetDevicePointer(void** pDevice, void* pHost, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemHostGetDevicePointer_v2(<cydriver.CUdeviceptr*>pDevice, pHost, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaHostGetFlags' in found_functions}}
-
-cdef cudaError_t _cudaHostGetFlags(unsigned int* pFlags, void* pHost) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemHostGetFlags(pFlags, pHost)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetMipmappedArrayLevel' in found_functions}}
-
-cdef cudaError_t _cudaGetMipmappedArrayLevel(cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMipmappedArrayGetLevel(<cydriver.CUarray*>levelArray, <cydriver.CUmipmappedArray>mipmappedArray, level)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemGetInfo' in found_functions}}
-
-cdef cudaError_t _cudaMemGetInfo(size_t* free, size_t* total) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemGetInfo_v2(free, total)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaArrayGetPlane' in found_functions}}
-
-cdef cudaError_t _cudaArrayGetPlane(cudaArray_t* pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuArrayGetPlane(<cydriver.CUarray*>pPlaneArray, <cydriver.CUarray>hArray, planeIdx)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemset' in found_functions}}
-
-cdef cudaError_t _cudaMemset(void* devPtr, int value, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemsetD8_v2(<cydriver.CUdeviceptr>devPtr, <unsigned char>value, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemset2D' in found_functions}}
-
-cdef cudaError_t _cudaMemset2D(void* devPtr, size_t pitch, int value, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemsetD2D8_v2(<cydriver.CUdeviceptr>devPtr, pitch, <unsigned char>value, width, height)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemsetAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemsetAsync(void* devPtr, int value, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemsetD8Async(<cydriver.CUdeviceptr>devPtr, <unsigned char>value, count, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemset2DAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemset2DAsync(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemsetD2D8Async(<cydriver.CUdeviceptr>devPtr, pitch, <unsigned char>value, width, height, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPrefetchAsync' in found_functions}}
-
-cdef cudaError_t _cudaMemPrefetchAsync(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPrefetchAsync(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUdevice>dstDevice, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPrefetchAsync_v2' in found_functions}}
-
-cdef cudaError_t _cudaMemPrefetchAsync_v2(const void* devPtr, size_t count, cudaMemLocation location, unsigned int flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUmemLocation _driver_location
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    _driver_location.type = <cydriver.CUmemLocationType>location.type
-    _driver_location.id = location.id
-    err = <cudaError_t>cydriver._cuMemPrefetchAsync_v2(<cydriver.CUdeviceptr>devPtr, count, _driver_location, flags, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMallocAsync' in found_functions}}
-
-cdef cudaError_t _cudaMallocAsync(void** devPtr, size_t size, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAllocAsync(<cydriver.CUdeviceptr*>devPtr, size, <cydriver.CUstream>hStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaFreeAsync' in found_functions}}
-
-cdef cudaError_t _cudaFreeAsync(void* devPtr, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemFreeAsync(<cydriver.CUdeviceptr>devPtr, <cydriver.CUstream>hStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolTrimTo' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolTrimTo(<cydriver.CUmemoryPool>memPool, minBytesToKeep)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolSetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolSetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolSetAttribute(<cydriver.CUmemoryPool>memPool, <cydriver.CUmemPool_attribute>attr, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolGetAttribute' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolGetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolGetAttribute(<cydriver.CUmemoryPool>memPool, <cydriver.CUmemPool_attribute>attr, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolGetAccess' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolGetAccess(cudaMemAccessFlags* flags, cudaMemPool_t memPool, cudaMemLocation* location) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolGetAccess(<cydriver.CUmemAccess_flags*>flags, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemLocation*>location)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolCreate' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolCreate(cudaMemPool_t* memPool, const cudaMemPoolProps* poolProps) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolCreate(<cydriver.CUmemoryPool*>memPool, <cydriver.CUmemPoolProps*>poolProps)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolDestroy' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolDestroy(cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolDestroy(<cydriver.CUmemoryPool>memPool)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMallocFromPoolAsync' in found_functions}}
-
-cdef cudaError_t _cudaMallocFromPoolAsync(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemAllocFromPoolAsync(<cydriver.CUdeviceptr*>ptr, size, <cydriver.CUmemoryPool>memPool, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolExportToShareableHandle' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolExportToShareableHandle(void* shareableHandle, cudaMemPool_t memPool, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolExportToShareableHandle(shareableHandle, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemAllocationHandleType>handleType, <unsigned long long>flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolImportFromShareableHandle(cudaMemPool_t* memPool, void* shareableHandle, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolImportFromShareableHandle(<cydriver.CUmemoryPool*>memPool, shareableHandle, <cydriver.CUmemAllocationHandleType>handleType, <unsigned long long>flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolExportPointer' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolExportPointer(cudaMemPoolPtrExportData* exportData, void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolExportPointer(<cydriver.CUmemPoolPtrExportData*>exportData, <cydriver.CUdeviceptr>ptr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaMemPoolImportPointer' in found_functions}}
-
-cdef cudaError_t _cudaMemPoolImportPointer(void** ptr, cudaMemPool_t memPool, cudaMemPoolPtrExportData* exportData) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuMemPoolImportPointer(<cydriver.CUdeviceptr*>ptr, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemPoolPtrExportData*>exportData)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsUnregisterResource' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsUnregisterResource(<cydriver.CUgraphicsResource>resource)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsResourceSetMapFlags' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsResourceSetMapFlags_v2(<cydriver.CUgraphicsResource>resource, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsMapResources' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsMapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsMapResources(<unsigned int>count, <cydriver.CUgraphicsResource*>resources, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsUnmapResources' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsUnmapResources(<unsigned int>count, <cydriver.CUgraphicsResource*>resources, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsResourceGetMappedPointer' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedPointer_v2(<cydriver.CUdeviceptr*>devPtr, size, <cydriver.CUgraphicsResource>resource)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsSubResourceGetMappedArray(cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsSubResourceGetMappedArray(<cydriver.CUarray*>array, <cydriver.CUgraphicsResource>resource, arrayIndex, mipLevel)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t _cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t* mipmappedArray, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>mipmappedArray, <cydriver.CUgraphicsResource>resource)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDestroyTextureObject' in found_functions}}
-
-cdef cudaError_t _cudaDestroyTextureObject(cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuTexObjectDestroy(<cydriver.CUtexObject>texObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDestroySurfaceObject' in found_functions}}
-
-cdef cudaError_t _cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuSurfObjectDestroy(<cydriver.CUsurfObject>surfObject)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphCreate' in found_functions}}
-
-cdef cudaError_t _cudaGraphCreate(cudaGraph_t* pGraph, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphCreate(<cydriver.CUgraph*>pGraph, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddKernelNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddKernelNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
-    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddKernelNode_v2(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
-    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphKernelNodeSetParams_v2(<cydriver.CUgraphNode>node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef cudaError_t _cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphKernelNodeCopyAttributes(<cydriver.CUgraphNode>hSrc, <cydriver.CUgraphNode>hDst)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphMemsetNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemsetNodeGetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_MEMSET_NODE_PARAMS*>pNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphMemsetNodeSetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_MEMSET_NODE_PARAMS*>pNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddHostNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddHostNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
-    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
-    err = <cudaError_t>cydriver._cuGraphAddHostNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphHostNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphHostNodeGetParams(cudaGraphNode_t node, cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphHostNodeGetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_HOST_NODE_PARAMS*>pNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphHostNodeSetParams(cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
-    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
-    err = <cudaError_t>cydriver._cuGraphHostNodeSetParams(<cydriver.CUgraphNode>node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddChildGraphNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddChildGraphNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddChildGraphNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUgraph>childGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef cudaError_t _cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphChildGraphNodeGetGraph(<cydriver.CUgraphNode>node, <cydriver.CUgraph*>pGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddEmptyNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddEmptyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddEmptyNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddEventRecordNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddEventRecordNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddEventRecordNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphEventRecordNodeGetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent*>event_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphEventRecordNodeSetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddEventWaitNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddEventWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddEventWaitNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphEventWaitNodeGetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent*>event_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphEventWaitNodeSetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddExternalSemaphoresSignalNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreSignalNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresSignalNodeGetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>params_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresSignalNodeSetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddExternalSemaphoresWaitNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreWaitNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresWaitNodeGetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>params_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresWaitNodeSetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddMemFreeNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddMemFreeNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddMemFreeNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUdeviceptr>dptr)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGraphMemTrim' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGraphMemTrim(int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGraphMemTrim(<cydriver.CUdevice>device)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t _cudaDeviceGetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceGetGraphMemAttribute(<cydriver.CUdevice>device, <cydriver.CUgraphMem_attribute>attr, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t _cudaDeviceSetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuDeviceSetGraphMemAttribute(<cydriver.CUdevice>device, <cydriver.CUgraphMem_attribute>attr, value)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphClone' in found_functions}}
-
-cdef cudaError_t _cudaGraphClone(cudaGraph_t* pGraphClone, cudaGraph_t originalGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphClone(<cydriver.CUgraph*>pGraphClone, <cydriver.CUgraph>originalGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeFindInClone' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeFindInClone(cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeFindInClone(<cydriver.CUgraphNode*>pNode, <cydriver.CUgraphNode>originalNode, <cydriver.CUgraph>clonedGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetType' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetType(cudaGraphNode_t node, cudaGraphNodeType* pType) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetType(<cydriver.CUgraphNode>node, <cydriver.CUgraphNodeType*>pType)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphGetNodes' in found_functions}}
-
-cdef cudaError_t _cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphGetNodes(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>nodes, numNodes)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphGetRootNodes' in found_functions}}
-
-cdef cudaError_t _cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphGetRootNodes(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pRootNodes, pNumRootNodes)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphGetEdges' in found_functions}}
-
-cdef cudaError_t _cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphGetEdges(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numEdges)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetDependencies' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetDependencies(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependencies, pNumDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetDependentNodes' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetDependentNodes(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependentNodes, pNumDependentNodes)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddDependencies' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddDependencies(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphRemoveDependencies' in found_functions}}
-
-cdef cudaError_t _cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphRemoveDependencies(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphDestroyNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphDestroyNode(cudaGraphNode_t node) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphDestroyNode(<cydriver.CUgraphNode>node)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphInstantiate' in found_functions}}
-
-cdef cudaError_t _cudaGraphInstantiate(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphInstantiateWithFlags(pGraphExec, graph, flags)
-
-{{endif}}
-{{if 'cudaGraphInstantiateWithFlags' in found_functions}}
-
-cdef cudaError_t _cudaGraphInstantiateWithFlags(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphInstantiateWithFlags(<cydriver.CUgraphExec*>pGraphExec, <cydriver.CUgraph>graph, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
-    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecKernelNodeSetParams_v2(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
-    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
-    err = <cudaError_t>cydriver._cuGraphExecHostNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecChildGraphNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, <cydriver.CUgraph>childGraph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecEventRecordNodeSetEvent(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecEventWaitNodeSetEvent(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUevent>event)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecExternalSemaphoresSignalNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecExternalSemaphoresWaitNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeSetEnabled' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeSetEnabled(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, isEnabled)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetEnabled' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int* isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetEnabled(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, isEnabled)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecUpdate' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo* resultInfo) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecUpdate_v2(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraph>hGraph, <cydriver.CUgraphExecUpdateResultInfo*>resultInfo)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphUpload' in found_functions}}
-
-cdef cudaError_t _cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphUpload(<cydriver.CUgraphExec>graphExec, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphLaunch' in found_functions}}
-
-cdef cudaError_t _cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphLaunch(<cydriver.CUgraphExec>graphExec, <cydriver.CUstream>stream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecDestroy' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecDestroy(cudaGraphExec_t graphExec) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecDestroy(<cydriver.CUgraphExec>graphExec)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphDestroy' in found_functions}}
-
-cdef cudaError_t _cudaGraphDestroy(cudaGraph_t graph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphDestroy(<cydriver.CUgraph>graph)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphDebugDotPrint' in found_functions}}
-
-cdef cudaError_t _cudaGraphDebugDotPrint(cudaGraph_t graph, const char* path, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphDebugDotPrint(<cydriver.CUgraph>graph, path, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaUserObjectCreate' in found_functions}}
-
-cdef cudaError_t _cudaUserObjectCreate(cudaUserObject_t* object_out, void* ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuUserObjectCreate(<cydriver.CUuserObject*>object_out, ptr, <cydriver.CUhostFn>destroy, initialRefcount, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaUserObjectRetain' in found_functions}}
-
-cdef cudaError_t _cudaUserObjectRetain(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuUserObjectRetain(<cydriver.CUuserObject>object, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaUserObjectRelease' in found_functions}}
-
-cdef cudaError_t _cudaUserObjectRelease(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuUserObjectRelease(<cydriver.CUuserObject>object, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphRetainUserObject' in found_functions}}
-
-cdef cudaError_t _cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphRetainUserObject(<cydriver.CUgraph>graph, <cydriver.CUuserObject>object, count, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphReleaseUserObject' in found_functions}}
-
-cdef cudaError_t _cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphReleaseUserObject(<cydriver.CUgraph>graph, <cydriver.CUuserObject>object, count)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaProfilerStart() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuProfilerStart()
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaProfilerStop() except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuProfilerStop()
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphicsEGLRegisterImage(<cydriver.CUgraphicsResource*>pCudaResource, image, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamConsumerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamConsumerConnect(<cydriver.CUeglStreamConnection*>conn, eglStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamConsumerConnectWithFlags(<cydriver.CUeglStreamConnection*>conn, eglStream, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamConsumerDisconnect(<cydriver.CUeglStreamConnection*>conn)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t* pCudaResource, cudaStream_t* pStream, unsigned int timeout) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamConsumerAcquireFrame(<cydriver.CUeglStreamConnection*>conn, <cydriver.CUgraphicsResource*>pCudaResource, <cydriver.CUstream*>pStream, timeout)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t pCudaResource, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamConsumerReleaseFrame(<cydriver.CUeglStreamConnection*>conn, <cydriver.CUgraphicsResource>pCudaResource, <cydriver.CUstream*>pStream)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamProducerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, EGLint width, EGLint height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamProducerConnect(<cydriver.CUeglStreamConnection*>conn, eglStream, width, height)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEGLStreamProducerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEGLStreamProducerDisconnect(<cydriver.CUeglStreamConnection*>conn)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if True}}
-
-cdef cudaError_t _cudaEventCreateFromEGLSync(cudaEvent_t* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuEventCreateFromEGLSync(<cydriver.CUevent*>phEvent, eglSync, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaInitDevice' in found_functions}}
-
-cdef cudaError_t _cudaInitDevice(int deviceOrdinal, unsigned int deviceFlags, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaPythonDevice *device
-    cdef unsigned int scheduleFlags
-
-    err = m_global.lazyInitDriver()
-    if err != cudaSuccess:
-        return err
-
-    device = m_global.getDevice(deviceOrdinal)
-    if device == NULL:
-        _setLastError(cudaErrorInvalidDevice)
-        return cudaErrorInvalidDevice
-
-    if device.primaryContext == NULL:
-        initPrimaryContext(device)
-
-    if flags & cudaInitDeviceFlagsAreValid:
-        scheduleFlags = deviceFlags & cudaDeviceScheduleMask
-        deviceFlags &= ~cudaDeviceMapHost
-        if deviceFlags & ~cudaDeviceMask:
-            _setLastError(cudaErrorInvalidValue)
-            return cudaErrorInvalidValue
-        if scheduleFlags:
-            if scheduleFlags != cudaDeviceScheduleSpin and scheduleFlags != cudaDeviceScheduleYield and scheduleFlags != cudaDeviceScheduleBlockingSync:
-                _setLastError(cudaErrorInvalidValue)
-                return cudaErrorInvalidValue
-
-        err = <cudaError_t>cydriver._cuDevicePrimaryCtxSetFlags_v2(device[0].driverDevice, deviceFlags)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaStreamGetId' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetId(cudaStream_t hStream, unsigned long long* streamId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamGetId(<cydriver.CUstream>hStream, streamId)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphInstantiateWithParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphInstantiateWithParams(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams* instantiateParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphInstantiateWithParams(<cydriver.CUgraphExec*>pGraphExec, <cydriver.CUgraph>graph, <cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS*>instantiateParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphExecGetFlags' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecGetFlags(<cydriver.CUgraphExec>graphExec, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetKernel' in found_functions}}
-
-cdef cudaError_t _cudaGetKernel(cudaKernel_t* kernelPtr, const void* entryFuncAddr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if kernelPtr == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    if entryFuncAddr == NULL:
-        _setLastError(cudaErrorInvalidDeviceFunction)
-        return cudaErrorInvalidDeviceFunction
-
-    kernelPtr[0] = <cudaKernel_t>entryFuncAddr;
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaGraphAddNode' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if nodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    cdef cydriver.CUgraphNodeParams driverNodeParams
-    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams)
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeSetParams(cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if nodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    cdef cydriver.CUgraphNodeParams driverNodeParams
-    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeSetParams(node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams);
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaGraphExecNodeSetParams' in found_functions}}
-
-cdef cudaError_t _cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if nodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    cdef cydriver.CUgraphNodeParams driverNodeParams
-    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphExecNodeSetParams(graphExec, node, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams);
-    return cudaSuccess
-
-{{endif}}
-{{if 'cudaGraphConditionalHandleCreate' in found_functions}}
-
-cdef cudaError_t _cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pHandle_out, cudaGraph_t graph, unsigned int defaultLaunchValue, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUcontext context
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphConditionalHandleCreate(<cydriver.CUgraphConditionalHandle *>pHandle_out, graph, context, defaultLaunchValue, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef cudaError_t _cudaStreamGetCaptureInfo_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = streamGetCaptureInfoCommon_v3(stream, captureStatus_out, id_out, graph_out, dependencies_out, edgeData_out, numDependencies_out)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef cudaError_t _cudaStreamUpdateCaptureDependencies_v2(cudaStream_t stream, cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuStreamUpdateCaptureDependencies_v2(<cydriver.CUstream>stream, <cydriver.CUgraphNode*>dependencies, <const cydriver.CUgraphEdgeData*>dependencyData, numDependencies, flags)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphGetEdges_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphGetEdges_v2(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, cudaGraphEdgeData* edgeData, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphGetEdges_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <cydriver.CUgraphEdgeData*>edgeData, numEdges)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetDependencies_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, cudaGraphEdgeData* edgeData, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetDependencies_v2(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependencies, <cydriver.CUgraphEdgeData*>edgeData, pNumDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphNodeGetDependentNodes_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, cudaGraphEdgeData* edgeData, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphNodeGetDependentNodes_v2(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependentNodes, <cydriver.CUgraphEdgeData*>edgeData, pNumDependentNodes)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddDependencies_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddDependencies_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <const cydriver.CUgraphEdgeData*>edgeData, numDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphRemoveDependencies_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphRemoveDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        return err
-    err = <cudaError_t>cydriver._cuGraphRemoveDependencies_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <const cydriver.CUgraphEdgeData*>edgeData, numDependencies)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGraphAddNode_v2' in found_functions}}
-
-cdef cudaError_t _cudaGraphAddNode_v2(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if nodeParams == NULL:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    cdef cudaError_t err = cudaSuccess
-    err = m_global.lazyInitContextState()
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    cdef cydriver.CUgraphNodeParams driverNodeParams
-    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    err = <cudaError_t>cydriver._cuGraphAddNode_v2(pGraphNode, graph, pDependencies, <const cydriver.CUgraphEdgeData*>dependencyData, numDependencies, &driverNodeParams)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams)
-    return cudaSuccess
-
-{{endif}}
-{{if True}}
-
-{{if 'Windows' != platform.system()}}
-cimport cuda.bindings._lib.dlfcn as dlfcn
-{{endif}}
-
-cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    {{if 'Windows' == platform.system()}}
-    with gil:
-        raise NotImplementedError('"getLocalRuntimeVersion" is unsupported on Windows')
-    {{else}}
-    # Load
-    handle = dlfcn.dlopen('libcudart.so.12', dlfcn.RTLD_NOW)
-    if handle == NULL:
-        with gil:
-            raise RuntimeError(f'Failed to dlopen libcudart.so.12')
-
-    __cudaRuntimeGetVersion = dlfcn.dlsym(handle, 'cudaRuntimeGetVersion')
-
-    if __cudaRuntimeGetVersion == NULL:
-        with gil:
-            raise RuntimeError(f'Function "cudaRuntimeGetVersion" not found in libcudart.so.12')
-
-    # Call
-    cdef cudaError_t err = cudaSuccess
-    err = (<cudaError_t (*)(int*) except ?cudaErrorCallRequiresNewerDriver nogil> __cudaRuntimeGetVersion)(runtimeVersion)
-
-    # Unload
-    dlfcn.dlclose(handle)
-
-    # Return
-    return err
-    {{endif}}
-{{endif}}
-{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = DeviceRegisterAsyncNotificationCommon(device, callbackFunc, userData, callback)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = DeviceUnregisterAsyncNotificationCommon(device, callback)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
-{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}
-
-cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuGetProcAddress_v2(symbol, funcPtr, cudaVersion, flags, <cydriver.CUdriverProcAddressQueryResult*>driverStatus)
-    if err != cudaSuccess:
-        _setLastError(err)
-    return err
-
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pxd.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pxd.in
deleted file mode 100644
index 7b219cbf..00000000
--- a/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pxd.in
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cuda.bindings.cyruntime cimport *
-from libc.stdlib cimport malloc, free, calloc
-from libc.string cimport memset, memcpy, strncmp
-from libcpp cimport bool
-from libcpp.map cimport map
-cimport cuda.bindings._bindings.cydriver as cydriver
-
-ctypedef struct cudaAsyncCallbackData_st:
-    cudaAsyncCallback callback
-    void *userData
-
-ctypedef cudaAsyncCallbackData_st cudaAsyncCallbackData
-
-cdef struct cudaPythonDevice:
-    cydriver.CUdevice driverDevice
-    cydriver.CUcontext primaryContext
-    bool primaryContextRetained
-    int deviceOrdinal
-    cudaDeviceProp deviceProperties
-
-cdef class cudaPythonGlobal:
-    cdef bint _lazyInitDriver
-    cdef int _numDevices
-    cdef cudaPythonDevice* _deviceList
-    cdef cudaError_t _lastError
-    cdef int _CUDART_VERSION
-    cdef map[cudaAsyncCallbackHandle_t, cudaAsyncCallbackData*] _asyncCallbackDataMap
-
-    cdef cudaError_t lazyInitDriver(self) except ?cudaErrorCallRequiresNewerDriver nogil
-    cdef cudaError_t lazyInitContextState(self) except ?cudaErrorCallRequiresNewerDriver nogil
-    cdef cudaPythonDevice* getDevice(self, int deviceOrdinal) noexcept nogil
-    cdef cudaPythonDevice* getDeviceFromDriver(self, cydriver.CUdevice driverDevice) noexcept nogil
-    cdef cudaPythonDevice* getDeviceFromPrimaryCtx(self, cydriver.CUcontext context) noexcept nogil
-
-cdef cudaError_t initPrimaryContext(cudaPythonDevice *device) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t resetPrimaryContext(cudaPythonDevice* device) except ?cudaErrorCallRequiresNewerDriver nogil
-
-cdef cudaPythonGlobal globalGetInstance()
-cdef cudaError_t _setLastError(cudaError_t err) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getDescInfo(const cudaChannelFormatDesc* d, int *numberOfChannels, cydriver.CUarray_format *format) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t streamAddCallbackCommon(cudaStream_t stream, cudaStreamCallback_t callback, void *userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t streamAddHostCallbackCommon(cudaStream_t stream, cudaHostFn_t callback, void *userData) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t streamGetCaptureInfoCommon(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long *id_out, cudaGraph_t *graph_out, const cudaGraphNode_t **dependencies_out, size_t *numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t streamGetCaptureInfoCommon_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long *id_out, cudaGraph_t *graph_out, const cudaGraphNode_t **dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t *numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getChannelFormatDescFromDriverDesc(cudaChannelFormatDesc* pRuntimeDesc, size_t* pDepth, size_t* pHeight, size_t* pWidth, const cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2* pDriverDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyFromHost2D(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t spitch, size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyFromDevice2D(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t srcOffset,
-                                  size_t spitch, size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyToHost2D(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, char *dst, size_t dpitch, size_t width,
-                              size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyToDevice2D(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *dst, size_t dstOffset, size_t dpitch,
-                                size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyToArray2D(cudaArray_const_t thisArray, size_t hOffsetSrc, size_t wOffsetSrc, cudaArray_t dst,
-                               size_t hOffsetDst, size_t wOffsetDst, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getChannelDesc(cudaArray_const_t thisArray, cudaChannelFormatDesc *outDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getDriverResDescFromResDesc(cydriver.CUDA_RESOURCE_DESC *rdDst, const cudaResourceDesc *rdSrc,
-                                             cydriver.CUDA_TEXTURE_DESC *tdDst, const cudaTextureDesc *tdSrc,
-                                             cydriver.CUDA_RESOURCE_VIEW_DESC *rvdDst, const cudaResourceViewDesc *rvdSrc) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getResDescFromDriverResDesc(cudaResourceDesc *rdDst, const cydriver.CUDA_RESOURCE_DESC *rdSrc,
-                                             cudaTextureDesc *tdDst, const cydriver.CUDA_TEXTURE_DESC *tdSrc,
-                                             cudaResourceViewDesc *rvdDst, const cydriver.CUDA_RESOURCE_VIEW_DESC *rvdSrc) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memsetPtr(char *mem, int c, size_t count, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memset2DPtr(char *mem, size_t pitch, int c, size_t width, size_t height, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyFromHost(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t count,
-                              cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyFromDevice(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset,
-                                const char *src, size_t srcOffset, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyToHost(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, char *dst, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copyToDevice(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset,
-                              const char *dst, size_t dstOffset, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t copy1DConvertTo3DParams(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaMemcpy3DParms *p) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef void toDriverMemsetNodeParams(const cudaMemsetParams *pRuntimeParams, cydriver.CUDA_MEMSET_NODE_PARAMS *pDriverParams) noexcept nogil
-cdef cudaError_t toDriverMemCopy3DParams(const cudaMemcpy3DParms *p, cydriver.CUDA_MEMCPY3D *cd) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t mallocArray(cudaArray_t *arrayPtr, const cudaChannelFormatDesc *desc, size_t depth, size_t height,
-                             size_t width, int corr2D, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpy2DToArray(cudaArray_t dst, size_t hOffset, size_t wOffset, const char *src,
-                                 size_t spitch, size_t width, size_t height, cudaMemcpyKind kind,
-                                 cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpyDispatch(void *dst, const void *src, size_t size, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t mallocHost(size_t size, void **mem, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t mallocPitch(size_t width, size_t height, size_t depth, void **mem, size_t *pitch) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t mallocMipmappedArray(cudaMipmappedArray_t *mipmappedArrayPtr, const cudaChannelFormatDesc *desc,
-                                      size_t depth, size_t height, size_t width, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpy2DPtr(char *dst, size_t dpitch, const char *src, size_t spitch, size_t width,
-                             size_t height, cudaMemcpyKind kind,
-                             cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpy3D(const cudaMemcpy3DParms *p, bool peer, int srcDevice, int dstDevice, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpyAsyncDispatch(void *dst, const void *src, size_t size, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t toCudartMemCopy3DParams(const cydriver.CUDA_MEMCPY3D_v2 *cd, cudaMemcpy3DParms *p) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpy2DFromArray(char *dst, size_t dpitch, cudaArray_const_t src, size_t hOffset,
-                                   size_t wOffset, size_t width, size_t height, cudaMemcpyKind kind,
-                                   cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpy2DArrayToArray(cudaArray_t dst, size_t hOffsetDst, size_t wOffsetDst,
-                                      cudaArray_const_t src, size_t hOffsetSrc, size_t wOffsetSrc,
-                                      size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memset3DPtr(cudaPitchedPtr p, int val, cudaExtent e, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpyToArray(cudaArray_t dst, size_t hOffset, size_t wOffset, const char *src,
-                               size_t count, cudaMemcpyKind kind,
-                               cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpyFromArray(char *dst, cudaArray_const_t src, size_t hOffset, size_t wOffset,
-                                 size_t count, cudaMemcpyKind kind,
-                                 cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t memcpyArrayToArray(cudaArray_t dst, size_t hOffsetDst, size_t wOffsetDst,
-                                    cudaArray_const_t src, size_t hOffsetSrc, size_t wOffsetSrc,
-                                    size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t toDriverCudaResourceDesc(cydriver.CUDA_RESOURCE_DESC *_driver_pResDesc, const cudaResourceDesc *pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getDriverEglFrame(cydriver.CUeglFrame *cuEglFrame, cudaEglFrame eglFrame) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t getRuntimeEglFrame(cudaEglFrame *eglFrame, cydriver.CUeglFrame cueglFrame) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t toDriverGraphNodeParams(const cudaGraphNodeParams *rtParams, cydriver.CUgraphNodeParams *driverParams) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef void toCudartGraphNodeOutParams(const cydriver.CUgraphNodeParams *driverParams, cudaGraphNodeParams *rtParams) noexcept nogil
-cdef cudaError_t toDriverKernelNodeParams(const cudaKernelNodeParams *nodeParams, cydriver.CUDA_KERNEL_NODE_PARAMS *driverNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef void toDriverHostNodeParams(const cudaHostNodeParams *pRuntimeNodeParams, cydriver.CUDA_HOST_NODE_PARAMS *pDriverNodeParams) noexcept nogil
-cdef cudaError_t DeviceRegisterAsyncNotificationCommon(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil
-cdef cudaError_t DeviceUnregisterAsyncNotificationCommon(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil
diff --git a/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pyx.in b/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pyx.in
deleted file mode 100644
index ea47081d..00000000
--- a/cuda_bindings/cuda/bindings/_lib/cyruntime/utils.pyx.in
+++ /dev/null
@@ -1,3548 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import cython
-from cuda.bindings.cyruntime cimport *
-from libc.stdlib cimport malloc, free, calloc
-from libc.string cimport memset, memcpy, strncmp, memcmp
-from libcpp cimport bool
-cimport cuda.bindings._bindings.cydriver as cydriver
-
-cdef struct cudaArrayLocalState:
-    cydriver.CUarray array
-    cudaChannelFormatDesc desc
-    size_t depth
-    size_t height
-    size_t width
-    size_t elementSize
-    size_t widthInBytes
-
-ctypedef struct cudaStreamCallbackData_st:
-    cudaStreamCallback_t callback
-    void *userData
-
-ctypedef cudaStreamCallbackData_st cudaStreamCallbackData
-
-ctypedef struct cudaStreamHostCallbackData_st:
-    cudaHostFn_t callback
-    void *userData
-
-ctypedef cudaStreamHostCallbackData_st cudaStreamHostCallbackData
-
-cdef class cudaPythonGlobal:
-    def __cinit__(self):
-        self._lazyInitDriver = False
-        self._numDevices = 0
-        self._deviceList = NULL
-        self._CUDART_VERSION = CUDART_VERSION
-
-    def __dealloc__(self):
-        if self._deviceList is not NULL:
-            free(self._deviceList)
-        for item in self._asyncCallbackDataMap:
-            free(item.second)
-        self._asyncCallbackDataMap.clear()
-
-    cdef cudaError_t lazyInitDriver(self) except ?cudaErrorCallRequiresNewerDriver nogil:
-        if self._lazyInitDriver:
-            return cudaSuccess
-
-        cdef cudaError_t err = cudaSuccess
-        err = <cudaError_t>cydriver._cuInit(0)
-        if err != cudaSuccess:
-            return err
-        err = <cudaError_t>cydriver._cuDeviceGetCount(&self._numDevices)
-        if err != cudaSuccess:
-            return err
-
-        self._deviceList = <cudaPythonDevice *>calloc(self._numDevices, sizeof(cudaPythonDevice))
-        if self._deviceList == NULL:
-            return cudaErrorMemoryAllocation
-
-        for deviceOrdinal in range(self._numDevices):
-            err = initDevice(&self._deviceList[deviceOrdinal], deviceOrdinal)
-            if err != cudaSuccess:
-                free(self._deviceList)
-                return err
-
-        self._lazyInitDriver = True
-
-    cdef cudaError_t lazyInitContextState(self) except ?cudaErrorCallRequiresNewerDriver nogil:
-        cdef cudaError_t err = cudaSuccess
-        cdef cydriver.CUcontext driverContext
-        cdef cudaPythonDevice *device
-
-        err = self.lazyInitDriver()
-        if err != cudaSuccess:
-            return err
-
-        err = <cudaError_t>cydriver._cuCtxGetCurrent(&driverContext)
-        if err != cudaSuccess:
-            return err
-        device = self.getDeviceFromPrimaryCtx(driverContext)
-
-        # 1. Context + device
-        if driverContext != NULL and device != NULL:
-            err = initPrimaryContext(device)
-            if err != cudaSuccess:
-                return err
-
-        # 2. Context + no device
-        cdef unsigned int version
-        if driverContext != NULL:
-            # If the context exists, but is non-primary, make sure it can be used with the CUDA 3.2 API,
-            # then return immediately
-            err = <cudaError_t>cydriver._cuCtxGetApiVersion(driverContext, &version)
-            if err == cudaErrorContextIsDestroyed:
-                return cudaErrorIncompatibleDriverContext
-            elif err != cudaSuccess:
-                return err
-            elif version < 3020:
-                return cudaErrorIncompatibleDriverContext
-            return cudaSuccess
-
-        # 3. No context + device
-        # (impossible)
-
-        # 4. No context + no device
-        # Default to first device
-        device = self.getDevice(0)
-        err = initPrimaryContext(device)
-        if err != cudaSuccess:
-            return err
-        err = <cudaError_t> cydriver._cuCtxSetCurrent(device.primaryContext)
-        return err
-
-    cdef cudaPythonDevice* getDevice(self, int deviceOrdinal) noexcept nogil:
-        if deviceOrdinal < 0 or deviceOrdinal >= m_global._numDevices:
-            return NULL
-        return &self._deviceList[deviceOrdinal]
-
-    cdef cudaPythonDevice* getDeviceFromDriver(self, cydriver.CUdevice driverDevice) noexcept nogil:
-        for i in range(self._numDevices):
-            if self._deviceList[i].driverDevice == driverDevice:
-                return &self._deviceList[i]
-        return NULL
-
-    cdef cudaPythonDevice* getDeviceFromPrimaryCtx(self, cydriver.CUcontext context) noexcept nogil:
-        if context == NULL:
-            return NULL
-        for i in range(self._numDevices):
-            if self._deviceList[i].primaryContext == context:
-                return &self._deviceList[i]
-        return NULL
-
-cdef cudaPythonGlobal m_global = cudaPythonGlobal()
-
-
-cdef cudaError_t initDevice(cudaPythonDevice *device, int deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
-    # cydriver.CUcontext primaryContext
-    device[0].primaryContext = NULL
-    # bool primaryContextRetained
-    device[0].primaryContextRetained = False
-    # int deviceOrdinal
-    device[0].deviceOrdinal = deviceOrdinal
-
-    # cydriver.CUdevice driverDevice
-    err = cydriver._cuDeviceGet(&device[0].driverDevice, deviceOrdinal)
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    # cudaDeviceProp deviceProperties
-    err = cydriver._cuDeviceGetName(device[0].deviceProperties.name, sizeof(device[0].deviceProperties.name), <cydriver.CUdevice>deviceOrdinal)
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceTotalMem_v2(&(device[0].deviceProperties.totalGlobalMem), <cydriver.CUdevice>deviceOrdinal)
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceTotalMem_v2(&(device[0].deviceProperties.totalGlobalMem), <cydriver.CUdevice>deviceOrdinal)
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.major), cydriver.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.minor), cydriver.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.deviceOverlap), cydriver.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.asyncEngineCount), cydriver.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.multiProcessorCount), cydriver.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.kernelExecTimeoutEnabled), cydriver.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.integrated), cydriver.CU_DEVICE_ATTRIBUTE_INTEGRATED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.canMapHostMemory), cydriver.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture1D), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture1DMipmap), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture1DLinear), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2D[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2D[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DMipmap[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DMipmap[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLinear[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLinear[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLinear[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DGather[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DGather[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3D[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3D[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3D[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3DAlt[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3DAlt[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture3DAlt[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTextureCubemap), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture1DLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture1DLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTexture2DLayered[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTextureCubemapLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxTextureCubemapLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface1D), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface2D[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface2D[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface3D[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface3D[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface3D[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface1DLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface1DLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface2DLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface2DLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurface2DLayered[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurfaceCubemap), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurfaceCubemapLayered[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxSurfaceCubemapLayered[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.concurrentKernels), cydriver.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.ECCEnabled), cydriver.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.pciBusID), cydriver.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.pciDeviceID), cydriver.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.pciDomainID), cydriver.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.tccDriver), cydriver.CU_DEVICE_ATTRIBUTE_TCC_DRIVER, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.unifiedAddressing), cydriver.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.memoryClockRate), cydriver.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.memoryBusWidth), cydriver.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.l2CacheSize), cydriver.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.persistingL2CacheMaxSize), cydriver.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxThreadsPerMultiProcessor), cydriver.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int surfaceAlignment
-    err = cydriver._cuDeviceGetAttribute(&(surfaceAlignment), cydriver.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.surfaceAlignment = surfaceAlignment
-
-    cdef int texturePitchAlignment
-    err = cydriver._cuDeviceGetAttribute(&texturePitchAlignment, cydriver.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.texturePitchAlignment = texturePitchAlignment
-
-    cdef int sharedMemPerBlock
-    err = cydriver._cuDeviceGetAttribute(&sharedMemPerBlock, cydriver.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.sharedMemPerBlock = sharedMemPerBlock
-
-    cdef int sharedMemPerBlockOptin
-    err = cydriver._cuDeviceGetAttribute(&sharedMemPerBlockOptin, cydriver.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.sharedMemPerBlockOptin = sharedMemPerBlockOptin
-
-    cdef int sharedMemPerMultiprocessor
-    err = cydriver._cuDeviceGetAttribute(&sharedMemPerMultiprocessor, cydriver.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.sharedMemPerMultiprocessor = sharedMemPerMultiprocessor
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.regsPerBlock), cydriver.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.regsPerMultiprocessor), cydriver.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.warpSize), cydriver.CU_DEVICE_ATTRIBUTE_WARP_SIZE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int memPitch
-    err = cydriver._cuDeviceGetAttribute(&memPitch, cydriver.CU_DEVICE_ATTRIBUTE_MAX_PITCH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.memPitch = memPitch
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxThreadsPerBlock), cydriver.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxThreadsDim[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxThreadsDim[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxThreadsDim[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxGridSize[0]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxGridSize[1]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxGridSize[2]), cydriver.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int totalConstMem
-    err = cydriver._cuDeviceGetAttribute(&totalConstMem, cydriver.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.totalConstMem = totalConstMem
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.clockRate), cydriver.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int textureAlignment
-    err = cydriver._cuDeviceGetAttribute(&textureAlignment, cydriver.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.textureAlignment = textureAlignment
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.streamPrioritiesSupported), cydriver.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.globalL1CacheSupported), cydriver.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.localL1CacheSupported), cydriver.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.managedMemory), cydriver.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.isMultiGpuBoard), cydriver.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.multiGpuBoardGroupID), cydriver.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.hostNativeAtomicSupported), cydriver.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.singleToDoublePrecisionPerfRatio), cydriver.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.pageableMemoryAccess), cydriver.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.concurrentManagedAccess), cydriver.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.computePreemptionSupported), cydriver.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.canUseHostPointerForRegisteredMem), cydriver.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.cooperativeLaunch), cydriver.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.cooperativeMultiDeviceLaunch), cydriver.CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.pageableMemoryAccessUsesHostPageTables), cydriver.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.directManagedMemAccessFromHost), cydriver.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetUuid(<cydriver.CUuuid_st*>(&(device[0].deviceProperties.uuid)), <cydriver.CUdevice>deviceOrdinal)
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.maxBlocksPerMultiProcessor), cydriver.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.accessPolicyMaxWindowSize), cydriver.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.hostRegisterSupported), cydriver.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.sparseCudaArraySupported), cydriver.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.hostRegisterReadOnlySupported), cydriver.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.timelineSemaphoreInteropSupported), cydriver.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.memoryPoolsSupported), cydriver.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.gpuDirectRDMASupported), cydriver.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int gpuDirectRDMAFlushWritesOptions
-    err = cydriver._cuDeviceGetAttribute(&gpuDirectRDMAFlushWritesOptions, cydriver.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.gpuDirectRDMAFlushWritesOptions = gpuDirectRDMAFlushWritesOptions
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.gpuDirectRDMAWritesOrdering), cydriver.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int memoryPoolSupportedHandleTypes
-    err = cydriver._cuDeviceGetAttribute(&memoryPoolSupportedHandleTypes, cydriver.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.memoryPoolSupportedHandleTypes = memoryPoolSupportedHandleTypes;
-
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.deferredMappingCudaArraySupported), cydriver.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.ipcEventSupported), cydriver.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    err = cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.clusterLaunch), cydriver.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-
-    cdef int reservedSharedMemPerBlock
-    err = cydriver._cuDeviceGetAttribute(&reservedSharedMemPerBlock, cydriver.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK, <cydriver.CUdevice>(deviceOrdinal))
-    if err != cydriver.cudaError_enum.CUDA_SUCCESS:
-        return cudaErrorInitializationError
-    device[0].deviceProperties.reservedSharedMemPerBlock = reservedSharedMemPerBlock
-
-    return cudaSuccess
-
-
-cdef cudaError_t initPrimaryContext(cudaPythonDevice *device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    # If we have context retained we need to check if it is not reset
-    cdef unsigned int version
-    if device[0].primaryContextRetained:
-        err = <cudaError_t>cydriver._cuCtxGetApiVersion(device[0].primaryContext, &version)
-        if err == cudaErrorDeviceUninitialized:
-            err = <cudaError_t>cydriver.cuDevicePrimaryCtxRelease(device[0].driverDevice)
-            if err != cudaSuccess:
-                return err
-            device[0].primaryContextRetained = False
-        elif err != cudaSuccess:
-            return err
-
-    # If we don't or it is invalid we need to recreate it
-    if not device[0].primaryContextRetained:
-        err = <cudaError_t>cydriver._cuDevicePrimaryCtxRetain(&device[0].primaryContext, device[0].driverDevice)
-        if err != cudaSuccess:
-            return err
-        device[0].primaryContextRetained = True
-    return err
-
-cdef cudaError_t resetPrimaryContext(cudaPythonDevice* device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef unsigned int version
-
-    err = <cudaError_t>cydriver._cuCtxGetApiVersion(device[0].primaryContext, &version)
-    if err == cudaSuccess:
-        if not device[0].primaryContextRetained:
-            err = <cudaError_t>cydriver._cuDevicePrimaryCtxRetain(&device[0].primaryContext, device[0].driverDevice)
-            if err != cudaSuccess:
-                return err
-            device[0].primaryContextRetained = True
-        cydriver._cuDevicePrimaryCtxReset_v2(device[0].driverDevice)
-        return cudaSuccess
-    elif err == cudaErrorDeviceUninitialized:
-        return cudaSuccess
-    else:
-        return err
-
-
-cdef cudaPythonGlobal globalGetInstance():
-    return m_global
-
-
-cdef cudaError_t _setLastError(cudaError_t err) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if err != cudaSuccess:
-        m_global._lastError = err
-
-
-cdef int case_desc(const cudaChannelFormatDesc* d, int x, int y, int z, int w, int f) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return d[0].x == x and d[0].y == y and d[0].z == z and d[0].w == w and d[0].f == f
-
-
-cdef cudaError_t getDescInfo(const cudaChannelFormatDesc* d, int *numberOfChannels, cydriver.CUarray_format *format) except ?cudaErrorCallRequiresNewerDriver nogil:
-    # Check validity
-    if d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindSigned,
-                  cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        if (d[0].x != 8) and (d[0].x != 16) and (d[0].x != 32):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindFloat,):
-        if (d[0].x != 16) and (d[0].x != 32):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindNV12,):
-        if (d[0].x != 8) or (d[0].y != 8) or (d[0].z != 8) or (d[0].w != 0):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4,):
-        if (d[0].x != 8):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4,):
-        if (d[0].x != 16):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7,
-                    cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB,):
-        if (d[0].x != 8):
-            return cudaErrorInvalidChannelDescriptor
-    elif d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H,
-                    cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H,):
-        if (d[0].x != 16) or (d[0].y != 16) or (d[0].z != 16) or (d[0].w != 0):
-            return cudaErrorInvalidChannelDescriptor
-    else:
-        return cudaErrorInvalidChannelDescriptor
-
-    # If Y is non-zero, it must match X
-    # If Z is non-zero, it must match Y
-    # If W is non-zero, it must match Z
-    if (((d[0].y != 0) and (d[0].y != d[0].x)) or
-        ((d[0].z != 0) and (d[0].z != d[0].y)) or
-        ((d[0].w != 0) and (d[0].w != d[0].z))):
-        return cudaErrorInvalidChannelDescriptor
-    if case_desc(d, 8, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT8
-    elif case_desc(d, 8, 8, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT8
-    elif case_desc(d, 8, 8, 8, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT8
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT8
-    elif case_desc(d, 8, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT8
-    elif case_desc(d, 8, 8, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT8
-    elif case_desc(d, 8, 8, 8, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT8
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT8
-    elif case_desc(d, 16, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT16
-    elif case_desc(d, 16, 16, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT16
-    elif case_desc(d, 16, 16, 16, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT16
-    elif case_desc(d, 16, 16, 16, 16, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT16
-    elif case_desc(d, 16, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT16
-    elif case_desc(d, 16, 16, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT16
-    elif case_desc(d, 16, 16, 16, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT16
-    elif case_desc(d, 16, 16, 16, 16, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT16
-    elif case_desc(d, 32, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT32
-    elif case_desc(d, 32, 32, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT32
-    elif case_desc(d, 32, 32, 32, 0, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT32
-    elif case_desc(d, 32, 32, 32, 32, cudaChannelFormatKind.cudaChannelFormatKindSigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT32
-    elif case_desc(d, 32, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT32
-    elif case_desc(d, 32, 32, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT32
-    elif case_desc(d, 32, 32, 32, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT32
-    elif case_desc(d, 32, 32, 32, 32, cudaChannelFormatKind.cudaChannelFormatKindUnsigned):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT32
-    elif case_desc(d, 16, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_HALF
-    elif case_desc(d, 16, 16, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_HALF
-    elif case_desc(d, 16, 16, 16, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_HALF
-    elif case_desc(d, 16, 16, 16, 16, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_HALF
-    elif case_desc(d, 32, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_FLOAT
-    elif case_desc(d, 32, 32, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_FLOAT
-    elif case_desc(d, 32, 32, 32, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_FLOAT
-    elif case_desc(d, 32, 32, 32, 32, cudaChannelFormatKind.cudaChannelFormatKindFloat):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_FLOAT
-    elif case_desc(d, 8, 8, 8, 0, cudaChannelFormatKind.cudaChannelFormatKindNV12):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_NV12
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC1_UNORM
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC1_UNORM_SRGB
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC2_UNORM
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC2_UNORM_SRGB
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC3_UNORM
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC3_UNORM_SRGB
-    elif case_desc(d, 8, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC4_UNORM
-    elif case_desc(d, 8, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4):
-        numberOfChannels[0] = 1
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC4_SNORM
-    elif case_desc(d, 8, 8, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC5_UNORM
-    elif case_desc(d, 8, 8, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5):
-        numberOfChannels[0] = 2
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC5_SNORM
-    elif case_desc(d, 16, 16, 16, 0, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC6H_UF16
-    elif case_desc(d, 16, 16, 16, 0, cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H):
-        numberOfChannels[0] = 3
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC6H_SF16
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC7_UNORM
-    elif case_desc(d, 8, 8, 8, 8, cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB):
-        numberOfChannels[0] = 4
-        format[0] = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC7_UNORM_SRGB
-    else:
-        return cudaErrorInvalidChannelDescriptor
-
-    if d[0].f in (cudaChannelFormatKind.cudaChannelFormatKindNV12,
-                  cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H,
-                  cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H,):
-        if numberOfChannels[0] != 3:
-            return cudaErrorInvalidChannelDescriptor
-    else:
-        if (numberOfChannels[0] != 1) and (numberOfChannels[0] != 2) and (numberOfChannels[0] != 4):
-            return cudaErrorInvalidChannelDescriptor
-    return cudaSuccess
-
-
-@cython.show_performance_hints(False)
-cdef void cudaStreamRtCallbackWrapper(cydriver.CUstream stream, cydriver.CUresult status, void *data) nogil:
-    cdef cudaStreamCallbackData *cbData = <cudaStreamCallbackData *>data
-    cdef cudaError_t err = <cudaError_t>status
-    with gil:
-        cbData.callback(stream, err, cbData.userData)
-    free(cbData)
-
-
-cdef cudaError_t streamAddCallbackCommon(
-  cudaStream_t stream,
-  cudaStreamCallback_t callback,
-  void *userData,
-  unsigned int flags
-) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if callback == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaStreamCallbackData *cbData = NULL
-    cdef cudaError_t err = cudaSuccess
-    cbData = <cudaStreamCallbackData *>malloc(sizeof(cbData[0]))
-
-    if cbData == NULL:
-        return cudaErrorMemoryAllocation
-
-    cbData.callback = callback
-    cbData.userData = userData
-    err = <cudaError_t>cydriver._cuStreamAddCallback(stream, <cydriver.CUstreamCallback>cudaStreamRtCallbackWrapper, <void *>cbData, flags)
-    if err != cudaSuccess:
-        free(cbData)
-    return err
-
-
-@cython.show_performance_hints(False)
-cdef void cudaStreamRtHostCallbackWrapper(void *data) nogil:
-    cdef cudaStreamHostCallbackData *cbData = <cudaStreamHostCallbackData *>data
-    with gil:
-        cbData.callback(cbData.userData)
-    free(cbData)
-
-
-cdef cudaError_t streamAddHostCallbackCommon(
-  cudaStream_t stream,
-  cudaHostFn_t callback,
-  void *userData
-) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if callback == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cudaStreamHostCallbackData *cbData = NULL
-    cdef cudaError_t err = cudaSuccess
-    cbData = <cudaStreamHostCallbackData *>malloc(sizeof(cbData[0]))
-
-    if cbData == NULL:
-        return cudaErrorMemoryAllocation
-
-    cbData.callback = callback
-    cbData.userData = userData
-    err = <cudaError_t>cydriver._cuLaunchHostFunc(<cydriver.CUstream>stream, <cydriver.CUhostFn>cudaStreamRtHostCallbackWrapper, <void *>cbData)
-    if err != cudaSuccess:
-        free(cbData)
-    return err
-
-
-cdef cudaError_t toRuntimeStreamCaptureStatus(cydriver.CUstreamCaptureStatus driverCaptureStatus, cudaStreamCaptureStatus *runtimeStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if driverCaptureStatus == cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_NONE:
-        runtimeStatus[0] = cudaStreamCaptureStatus.cudaStreamCaptureStatusNone
-    elif driverCaptureStatus == cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_ACTIVE:
-        runtimeStatus[0] = cudaStreamCaptureStatus.cudaStreamCaptureStatusActive
-    elif driverCaptureStatus == cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_INVALIDATED:
-        runtimeStatus[0] = cudaStreamCaptureStatus.cudaStreamCaptureStatusInvalidated
-    else:
-         return cudaErrorUnknown
-    return cudaSuccess
-
-
-cdef cudaError_t streamGetCaptureInfoCommon(
-        cudaStream_t stream,
-        cudaStreamCaptureStatus* captureStatus_out,
-        unsigned long long *id_out,
-        cudaGraph_t *graph_out,
-        const cudaGraphNode_t **dependencies_out,
-        size_t *numDependencies_out)  except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    if captureStatus_out == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUstreamCaptureStatus driverCaptureStatus
-
-    err = <cudaError_t>cydriver._cuStreamGetCaptureInfo_v2(stream, &driverCaptureStatus, <cydriver.cuuint64_t*>id_out,
-            graph_out, dependencies_out, numDependencies_out)
-    if err != cudaSuccess:
-        return err
-
-    return toRuntimeStreamCaptureStatus(driverCaptureStatus, captureStatus_out)
-
-
-cdef cudaError_t streamGetCaptureInfoCommon_v3(
-        cudaStream_t stream,
-        cudaStreamCaptureStatus* captureStatus_out,
-        unsigned long long *id_out,
-        cudaGraph_t *graph_out,
-        const cudaGraphNode_t **dependencies_out,
-        const cudaGraphEdgeData** edgeData_out,
-        size_t *numDependencies_out)  except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-
-    if captureStatus_out == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUstreamCaptureStatus driverCaptureStatus
-
-    err = <cudaError_t>cydriver._cuStreamGetCaptureInfo_v3(stream, &driverCaptureStatus, <cydriver.cuuint64_t*>id_out,
-            graph_out, dependencies_out, <const cydriver.CUgraphEdgeData**>edgeData_out, numDependencies_out)
-    if err != cudaSuccess:
-        return err
-
-    return toRuntimeStreamCaptureStatus(driverCaptureStatus, captureStatus_out)
-
-
-cdef cydriver.CUDA_MEMCPY3D_v2 memCopy3DInit(cydriver.CUmemorytype_enum dstType, cydriver.CUmemorytype_enum srcType) noexcept nogil:
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp
-    memset(&cp, 0, sizeof(cp))
-    cp.dstMemoryType = dstType
-    cp.srcMemoryType = srcType
-    cp.WidthInBytes = 0
-    cp.Height = 1
-    cp.Depth = 1
-    return cp
-
-
-cdef cydriver.CUDA_MEMCPY2D_v2 memCopy2DInit(cydriver.CUmemorytype_enum dstType, cydriver.CUmemorytype_enum srcType) noexcept nogil:
-    cdef cydriver.CUDA_MEMCPY2D_v2 cp
-    memset(&cp, 0, sizeof(cp))
-    cp.dstMemoryType = dstType
-    cp.srcMemoryType = srcType
-    cp.WidthInBytes = 0
-    cp.Height = 1
-    return cp
-
-
-cdef cudaError_t bytesPerElement(size_t *bytes, int numberOfChannels, cydriver.CUarray_format format) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if format in (cydriver.CU_AD_FORMAT_FLOAT,
-                  cydriver.CU_AD_FORMAT_UNSIGNED_INT32,
-                  cydriver.CU_AD_FORMAT_SIGNED_INT32):
-        bytes[0] = numberOfChannels * 4
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_HALF,
-                    cydriver.CU_AD_FORMAT_SIGNED_INT16,
-                    cydriver.CU_AD_FORMAT_UNSIGNED_INT16):
-        bytes[0] = numberOfChannels * 2
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_SIGNED_INT8,
-                    cydriver.CU_AD_FORMAT_UNSIGNED_INT8,
-                    cydriver.CU_AD_FORMAT_NV12):
-        bytes[0] = numberOfChannels
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_SNORM_INT8X1,
-                    cydriver.CU_AD_FORMAT_UNORM_INT8X1):
-        bytes[0] = 1
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_SNORM_INT8X2,
-                    cydriver.CU_AD_FORMAT_UNORM_INT8X2,
-                    cydriver.CU_AD_FORMAT_SNORM_INT16X1,
-                    cydriver.CU_AD_FORMAT_UNORM_INT16X1):
-        bytes[0] = 2
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_SNORM_INT8X4,
-                    cydriver.CU_AD_FORMAT_UNORM_INT8X4,
-                    cydriver.CU_AD_FORMAT_SNORM_INT16X2,
-                    cydriver.CU_AD_FORMAT_UNORM_INT16X2):
-        bytes[0] = 4
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_SNORM_INT16X4,
-                    cydriver.CU_AD_FORMAT_UNORM_INT16X4):
-        bytes[0] = 8
-        return cudaSuccess
-    elif format in (cydriver.CU_AD_FORMAT_BC2_UNORM,
-                    cydriver.CU_AD_FORMAT_BC2_UNORM_SRGB,
-                    cydriver.CU_AD_FORMAT_BC3_UNORM,
-                    cydriver.CU_AD_FORMAT_BC3_UNORM_SRGB,
-                    cydriver.CU_AD_FORMAT_BC5_UNORM,
-                    cydriver.CU_AD_FORMAT_BC5_SNORM,
-                    cydriver.CU_AD_FORMAT_BC6H_UF16,
-                    cydriver.CU_AD_FORMAT_BC6H_SF16,
-                    cydriver.CU_AD_FORMAT_BC7_UNORM,
-                    cydriver.CU_AD_FORMAT_BC7_UNORM_SRGB):
-        bytes[0] = 16
-        return cudaSuccess
-    return cudaErrorInvalidChannelDescriptor
-
-
-cdef cudaError_t getChannelFormatDescFromDriverDesc(
-    cudaChannelFormatDesc* pRuntimeDesc, size_t* pDepth, size_t* pHeight, size_t* pWidth,
-    const cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2* pDriverDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-
-    cdef int channel_size = 0
-    if pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT8:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT16:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT32:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-        channel_size = 32
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SIGNED_INT8:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SIGNED_INT16:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SIGNED_INT32:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSigned
-        channel_size = 32
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_HALF:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindFloat
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_FLOAT:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindFloat
-        channel_size = 32
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_NV12:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindNV12
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT8X1:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT8X2:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT8X4:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT8X1:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT8X2:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT8X4:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT16X1:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT16X2:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_UNORM_INT16X4:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT16X1:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT16X2:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_SNORM_INT16X4:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC1_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC1_UNORM_SRGB:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC2_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC2_UNORM_SRGB:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC3_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC3_UNORM_SRGB:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC4_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC4_SNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC5_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC5_SNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC6H_UF16:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC6H_SF16:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H
-        channel_size = 16
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC7_UNORM:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7
-        channel_size = 8
-    elif pDriverDesc[0].Format == cydriver.CU_AD_FORMAT_BC7_UNORM_SRGB:
-        pRuntimeDesc[0].f = cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB
-        channel_size = 8
-    else:
-        return cudaErrorInvalidChannelDescriptor
-
-    # populate bits per channel
-    pRuntimeDesc[0].x = 0
-    pRuntimeDesc[0].y = 0
-    pRuntimeDesc[0].z = 0
-    pRuntimeDesc[0].w = 0
-
-    if pDriverDesc[0].NumChannels >= 4:
-        pRuntimeDesc[0].w = channel_size
-    if pDriverDesc[0].NumChannels >= 3:
-        pRuntimeDesc[0].z = channel_size
-    if pDriverDesc[0].NumChannels >= 2:
-        pRuntimeDesc[0].y = channel_size
-    if pDriverDesc[0].NumChannels >= 1:
-        pRuntimeDesc[0].x = channel_size
-
-    if pDriverDesc[0].NumChannels not in (4, 3, 2, 1):
-        return cudaErrorInvalidChannelDescriptor
-
-    # populate dimensions
-    if pDepth != NULL:
-        pDepth[0]  = pDriverDesc[0].Depth
-    if pHeight != NULL:
-        pHeight[0] = pDriverDesc[0].Height
-    if pWidth != NULL:
-        pWidth[0]  = pDriverDesc[0].Width
-    return cudaSuccess
-
-
-cdef cudaError_t getArrayBlockExtent(cudaExtent *blockExtent, cydriver.CUarray_format format) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if format in (cydriver.CU_AD_FORMAT_FLOAT,
-                  cydriver.CU_AD_FORMAT_UNSIGNED_INT32,
-                  cydriver.CU_AD_FORMAT_SIGNED_INT32,
-                  cydriver.CU_AD_FORMAT_HALF,
-                  cydriver.CU_AD_FORMAT_SIGNED_INT16,
-                  cydriver.CU_AD_FORMAT_UNSIGNED_INT16,
-                  cydriver.CU_AD_FORMAT_SIGNED_INT8,
-                  cydriver.CU_AD_FORMAT_UNSIGNED_INT8,
-                  cydriver.CU_AD_FORMAT_NV12,
-                  cydriver.CU_AD_FORMAT_SNORM_INT8X1,
-                  cydriver.CU_AD_FORMAT_UNORM_INT8X1,
-                  cydriver.CU_AD_FORMAT_SNORM_INT8X2,
-                  cydriver.CU_AD_FORMAT_UNORM_INT8X2,
-                  cydriver.CU_AD_FORMAT_SNORM_INT16X1,
-                  cydriver.CU_AD_FORMAT_UNORM_INT16X1,
-                  cydriver.CU_AD_FORMAT_SNORM_INT8X4,
-                  cydriver.CU_AD_FORMAT_UNORM_INT8X4,
-                  cydriver.CU_AD_FORMAT_SNORM_INT16X2,
-                  cydriver.CU_AD_FORMAT_UNORM_INT16X2,
-                  cydriver.CU_AD_FORMAT_SNORM_INT16X4,
-                  cydriver.CU_AD_FORMAT_UNORM_INT16X4):
-        blockExtent[0].width = 1
-        blockExtent[0].height = 1
-        blockExtent[0].depth = 1
-    elif format in (cydriver.CU_AD_FORMAT_BC1_UNORM,
-                    cydriver.CU_AD_FORMAT_BC1_UNORM_SRGB,
-                    cydriver.CU_AD_FORMAT_BC4_UNORM,
-                    cydriver.CU_AD_FORMAT_BC4_SNORM,
-                    cydriver.CU_AD_FORMAT_BC2_UNORM,
-                    cydriver.CU_AD_FORMAT_BC2_UNORM_SRGB,
-                    cydriver.CU_AD_FORMAT_BC3_UNORM,
-                    cydriver.CU_AD_FORMAT_BC3_UNORM_SRGB,
-                    cydriver.CU_AD_FORMAT_BC5_UNORM,
-                    cydriver.CU_AD_FORMAT_BC5_SNORM,
-                    cydriver.CU_AD_FORMAT_BC6H_UF16,
-                    cydriver.CU_AD_FORMAT_BC6H_SF16,
-                    cydriver.CU_AD_FORMAT_BC7_UNORM,
-                    cydriver.CU_AD_FORMAT_BC7_UNORM_SRGB):
-        blockExtent[0].width = 4
-        blockExtent[0].height = 4
-        blockExtent[0].depth = 1
-    else:
-        return cudaErrorInvalidChannelDescriptor
-    return cudaSuccess
-
-
-cdef cudaError_t getLocalState(cudaArrayLocalState *state, cudaArray_const_t thisArray) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    memset(&arrayState, 0, sizeof(arrayState))
-    arrayState.array = <cydriver.CUarray>thisArray
-
-    cdef cudaExtent compBlockExtent
-    compBlockExtent.width = 1
-    compBlockExtent.height = 1
-    compBlockExtent.depth = 1
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 driverDesc
-    memset(&driverDesc, 0, sizeof(driverDesc))
-    err = <cudaError_t>cydriver._cuArray3DGetDescriptor_v2(&driverDesc, <cydriver.CUarray>arrayState.array)
-    if err != cudaSuccess:
-        return err
-    err = getChannelFormatDescFromDriverDesc(&arrayState.desc, &arrayState.depth, &arrayState.height, &arrayState.width, &driverDesc)
-    if err != cudaSuccess:
-        return err
-    err = bytesPerElement(&arrayState.elementSize, driverDesc.NumChannels, driverDesc.Format)
-    if err != cudaSuccess:
-        return err
-    err = getArrayBlockExtent(&compBlockExtent, driverDesc.Format)
-    if err != cudaSuccess:
-        return err
-    arrayState.widthInBytes = <size_t>((arrayState.width + compBlockExtent.width - 1) / compBlockExtent.width) * arrayState.elementSize
-
-    state[0] = arrayState
-    return cudaSuccess
-
-
-cdef cudaError_t copyFromHost2D(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t spitch, size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaArrayLocalState arrayState
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST)
-
-    cp.dstArray      = arrayState.array
-    cp.dstXInBytes   = wOffset
-    cp.dstY          = hOffset
-
-    cp.srcHost       = src
-    cp.srcPitch      = spitch
-    cp.srcXInBytes   = 0
-    cp.srcY          = 0
-
-    cp.Height        = height
-    cp.WidthInBytes  = width
-
-    err = driverMemcpy3D(&cp, stream, async)
-    return err
-
-
-cdef cudaError_t copyFromDevice2D(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t srcOffset,
-        size_t spitch, size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cudaArrayLocalState arrayState
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY, type)
-
-    cp.dstArray      = arrayState.array
-    cp.dstXInBytes   = wOffset
-    cp.dstY          = hOffset
-
-    cp.srcDevice     = <cydriver.CUdeviceptr_v2>src
-    cp.srcPitch      = spitch
-    cp.srcXInBytes   = srcOffset % spitch
-    cp.srcY          = <size_t>(srcOffset / spitch)
-
-    cp.Height        = height
-    cp.WidthInBytes  = width
-
-    err = driverMemcpy3D(&cp, stream, async)
-    if err != cudaSuccess:
-        return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyToHost2D(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, char *dst, size_t dpitch, size_t width,
-        size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-
-    cp.dstHost       = dst
-    cp.dstPitch      = dpitch
-    cp.dstXInBytes   = 0
-    cp.dstY          = 0
-
-    cp.srcArray      = arrayState.array
-    cp.srcXInBytes   = wOffset
-    cp.srcY          = hOffset
-
-    cp.Height        = height
-    cp.WidthInBytes  = width
-
-    err = driverMemcpy3D(&cp, stream, async)
-    if err != cudaSuccess:
-        return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyToDevice2D(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *dst, size_t dstOffset, size_t dpitch,
-        size_t width, size_t height, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(type, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-
-    cp.dstDevice     = <cydriver.CUdeviceptr_v2>dst
-    cp.dstPitch      = dpitch
-    cp.dstXInBytes   = dstOffset % dpitch
-    cp.dstY          = <size_t>(dstOffset / dpitch)
-
-    cp.srcArray      = arrayState.array
-    cp.srcXInBytes   = wOffset
-    cp.srcY          = hOffset
-
-    cp.Height        = height
-    cp.WidthInBytes  = width
-
-    err = driverMemcpy3D(&cp, stream, async)
-    if err != cudaSuccess:
-        return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyToArray2D(cudaArray_const_t thisArray, size_t hOffsetSrc, size_t wOffsetSrc, cudaArray_t dst,
-        size_t hOffsetDst, size_t wOffsetDst, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-
-    cp.dstArray      = <cydriver.CUarray>dst
-    cp.dstXInBytes   = wOffsetDst
-    cp.dstY          = hOffsetDst
-
-    cp.srcArray      = arrayState.array
-    cp.srcXInBytes   = wOffsetSrc
-    cp.srcY          = hOffsetSrc
-
-    cp.Height        = height
-    cp.WidthInBytes  = width
-
-    err = driverMemcpy3D(&cp, NULL, False)
-    if err != cudaSuccess:
-        return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyToArray(cudaArray_const_t thisArray, size_t hOffsetSrc, size_t wOffsetSrc, cudaArray_t dst, size_t hOffsetDst,
-        size_t wOffsetDst, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef void *tmp
-    cdef cudaError_t err = cudaSuccess
-    err = cudaMalloc(&tmp, count)
-    if err != cudaSuccess:
-        return err
-
-    err = cudaMemcpyFromArray(tmp, thisArray, wOffsetSrc, hOffsetSrc, count, cudaMemcpyDeviceToDevice)
-    if err != cudaSuccess:
-        return err
-    err = cudaMemcpyToArray(dst, wOffsetDst, hOffsetDst, tmp, count, cudaMemcpyDeviceToDevice)
-    if err != cudaSuccess:
-        return err
-    err = cudaFree(tmp)
-    if err != cudaSuccess:
-        return err
-    return cudaSuccess
-
-
-cdef cudaError_t memcpyArrayToArray(cudaArray_t dst, size_t hOffsetDst, size_t wOffsetDst,
-                                    cudaArray_const_t src, size_t hOffsetSrc, size_t wOffsetSrc,
-                                    size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if count == 0:
-        return cudaSuccess
-    if kind != cudaMemcpyDeviceToDevice and kind != cudaMemcpyDefault:
-        return cudaErrorInvalidMemcpyDirection
-    return copyToArray(src, hOffsetSrc, wOffsetSrc, dst, hOffsetDst, wOffsetDst, count)
-
-
-cdef cudaError_t getChannelDesc(cudaArray_const_t thisArray, cudaChannelFormatDesc *outDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)  
-    if err != cudaSuccess:
-        return err
-    outDesc[0] = arrayState.desc
-    return cudaSuccess
-
-
-cdef cudaError_t getFormat(cudaArray_const_t thisArray, int &numberOfChannels, cydriver.CUarray_format *format) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    return getDescInfo(&arrayState.desc, <int*>&numberOfChannels, <cydriver.CUarray_format*>format)
-
-
-cdef cudaError_t getDriverResDescFromResDesc(cydriver.CUDA_RESOURCE_DESC *rdDst, const cudaResourceDesc *rdSrc,
-                                             cydriver.CUDA_TEXTURE_DESC *tdDst, const cudaTextureDesc *tdSrc,
-                                             cydriver.CUDA_RESOURCE_VIEW_DESC *rvdDst, const cudaResourceViewDesc *rvdSrc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef int i = 0
-    cdef int numChannels = 0
-    cdef cydriver.CUarray_format format
-    cdef cydriver.CUarray hArray = NULL
-    cdef cudaError_t err = cudaSuccess
-    i = 0
-
-    memset(rdDst, 0, sizeof(rdDst[0]))
-
-    if rdSrc[0].resType == cudaResourceType.cudaResourceTypeArray:
-        rdDst[0].resType          = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_ARRAY
-        rdDst[0].res.array.hArray = <cydriver.CUarray>rdSrc[0].res.array.array
-        err = getFormat(rdSrc[0].res.array.array, numChannels, &format)
-        if err != cudaSuccess:
-            return err
-    elif rdSrc[0].resType == cudaResourceType.cudaResourceTypeMipmappedArray:
-        rdDst[0].resType                    = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
-        rdDst[0].res.mipmap.hMipmappedArray = <cydriver.CUmipmappedArray>rdSrc[0].res.mipmap.mipmap
-        err = <cudaError_t>cydriver._cuMipmappedArrayGetLevel(&hArray, rdDst[0].res.mipmap.hMipmappedArray, 0)
-        if err != cudaSuccess:
-            return err
-        err = getFormat(<cudaArray_t>hArray, numChannels, &format)
-        if err != cudaSuccess:
-            return err
-    elif rdSrc[0].resType == cudaResourceType.cudaResourceTypeLinear:
-        rdDst[0].resType                = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_LINEAR
-        rdDst[0].res.linear.devPtr      = <cydriver.CUdeviceptr_v2>rdSrc[0].res.linear.devPtr
-        rdDst[0].res.linear.sizeInBytes = rdSrc[0].res.linear.sizeInBytes
-        err = getDescInfo(&rdSrc[0].res.linear.desc, <int*>&numChannels, <cydriver.CUarray_format*>&format)
-        if err != cudaSuccess:
-            return err
-        rdDst[0].res.linear.format      = format
-        rdDst[0].res.linear.numChannels = numChannels
-    elif rdSrc[0].resType == cudaResourceType.cudaResourceTypePitch2D:
-        rdDst[0].resType                  = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_PITCH2D
-        rdDst[0].res.pitch2D.devPtr       = <cydriver.CUdeviceptr_v2>rdSrc[0].res.pitch2D.devPtr
-        rdDst[0].res.pitch2D.pitchInBytes = rdSrc[0].res.pitch2D.pitchInBytes
-        rdDst[0].res.pitch2D.width        = rdSrc[0].res.pitch2D.width
-        rdDst[0].res.pitch2D.height       = rdSrc[0].res.pitch2D.height
-        err = getDescInfo(&rdSrc[0].res.linear.desc, <int*>&numChannels, <cydriver.CUarray_format*>&format)
-        if err != cudaSuccess:
-            return err
-        rdDst[0].res.pitch2D.format       = format
-        rdDst[0].res.pitch2D.numChannels  = numChannels
-    else:
-        return cudaErrorInvalidValue
-
-
-    rdDst[0].flags = 0
-
-    if tdDst and tdSrc:
-        memset(tdDst, 0, sizeof(tdDst[0]))
-
-        while (i < 3):
-            tdDst[0].addressMode[i] = <cydriver.CUaddress_mode>tdSrc[0].addressMode[i]
-            i += 1
-
-        tdDst[0].filterMode          = <cydriver.CUfilter_mode>tdSrc[0].filterMode
-        tdDst[0].mipmapFilterMode    = <cydriver.CUfilter_mode>tdSrc[0].mipmapFilterMode
-        tdDst[0].mipmapLevelBias     = tdSrc[0].mipmapLevelBias
-        tdDst[0].minMipmapLevelClamp = tdSrc[0].minMipmapLevelClamp
-        tdDst[0].maxMipmapLevelClamp = tdSrc[0].maxMipmapLevelClamp
-        tdDst[0].maxAnisotropy       = tdSrc[0].maxAnisotropy
-        i = 0
-        while (i < 4):
-            tdDst[0].borderColor[i] = tdSrc[0].borderColor[i]
-            i += 1
-
-        if tdSrc[0].sRGB:
-            tdDst[0].flags |= cydriver.CU_TRSF_SRGB
-        else:
-            tdDst[0].flags |= 0
-
-        if tdSrc[0].normalizedCoords:
-            tdDst[0].flags |= cydriver.CU_TRSF_NORMALIZED_COORDINATES
-        else:
-            tdDst[0].flags |= 0
-
-        if tdSrc[0].disableTrilinearOptimization:
-            tdDst[0].flags |= cydriver.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION
-        else:
-            tdDst[0].flags |= 0
-
-        if tdSrc[0].seamlessCubemap:
-            tdDst[0].flags |= cydriver.CU_TRSF_SEAMLESS_CUBEMAP
-        else:
-            tdDst[0].flags |= 0
-
-        if format in (cydriver.CU_AD_FORMAT_SNORM_INT8X1,
-                      cydriver.CU_AD_FORMAT_SNORM_INT8X2,
-                      cydriver.CU_AD_FORMAT_SNORM_INT8X4,
-                      cydriver.CU_AD_FORMAT_UNORM_INT8X1,
-                      cydriver.CU_AD_FORMAT_UNORM_INT8X2,
-                      cydriver.CU_AD_FORMAT_UNORM_INT8X4,
-                      cydriver.CU_AD_FORMAT_SNORM_INT16X1,
-                      cydriver.CU_AD_FORMAT_SNORM_INT16X2,
-                      cydriver.CU_AD_FORMAT_SNORM_INT16X4,
-                      cydriver.CU_AD_FORMAT_UNORM_INT16X1,
-                      cydriver.CU_AD_FORMAT_UNORM_INT16X2,
-                      cydriver.CU_AD_FORMAT_UNORM_INT16X4,
-                      cydriver.CU_AD_FORMAT_BC1_UNORM,
-                      cydriver.CU_AD_FORMAT_BC1_UNORM_SRGB,
-                      cydriver.CU_AD_FORMAT_BC2_UNORM,
-                      cydriver.CU_AD_FORMAT_BC2_UNORM_SRGB,
-                      cydriver.CU_AD_FORMAT_BC3_UNORM,
-                      cydriver.CU_AD_FORMAT_BC3_UNORM_SRGB,
-                      cydriver.CU_AD_FORMAT_BC4_UNORM,
-                      cydriver.CU_AD_FORMAT_BC4_SNORM,
-                      cydriver.CU_AD_FORMAT_BC5_UNORM,
-                      cydriver.CU_AD_FORMAT_BC5_SNORM,
-                      cydriver.CU_AD_FORMAT_BC7_UNORM,
-                      cydriver.CU_AD_FORMAT_BC7_UNORM_SRGB):
-            if tdSrc[0].readMode != cudaTextureReadMode.cudaReadModeNormalizedFloat:
-                return cudaErrorInvalidNormSetting
-        elif format in (cydriver.CU_AD_FORMAT_SIGNED_INT8,
-                        cydriver.CU_AD_FORMAT_SIGNED_INT16,
-                        cydriver.CU_AD_FORMAT_UNSIGNED_INT8,
-                        cydriver.CU_AD_FORMAT_UNSIGNED_INT16):
-            if tdSrc[0].readMode == cudaReadModeElementType:
-                if tdSrc[0].filterMode == cudaTextureFilterMode.cudaFilterModeLinear:
-                    return cudaErrorInvalidFilterSetting
-                tdDst[0].flags |= cydriver.CU_TRSF_READ_AS_INTEGER
-        elif format == cydriver.CU_AD_FORMAT_NV12:
-            return cudaErrorInvalidValue
-        elif format == cydriver.CU_AD_FORMAT_SIGNED_INT32 or format == cydriver.CU_AD_FORMAT_UNSIGNED_INT32:
-            if tdSrc[0].filterMode == cudaTextureFilterMode.cudaFilterModeLinear:
-                return cudaErrorInvalidFilterSetting
-            if tdSrc[0].readMode == cudaTextureReadMode.cudaReadModeNormalizedFloat:
-                return cudaErrorInvalidNormSetting
-        else:
-            if tdSrc[0].readMode == cudaTextureReadMode.cudaReadModeNormalizedFloat:
-                return cudaErrorInvalidNormSetting
-
-    if rvdDst and rvdSrc:
-        memset(rvdDst, 0, sizeof(rvdDst[0]))
-
-        rvdDst[0].format           = <cydriver.CUresourceViewFormat>rvdSrc[0].format
-        rvdDst[0].width            = rvdSrc[0].width
-        rvdDst[0].height           = rvdSrc[0].height
-        rvdDst[0].depth            = rvdSrc[0].depth
-        rvdDst[0].firstMipmapLevel = rvdSrc[0].firstMipmapLevel
-        rvdDst[0].lastMipmapLevel  = rvdSrc[0].lastMipmapLevel
-        rvdDst[0].firstLayer       = rvdSrc[0].firstLayer
-        rvdDst[0].lastLayer        = rvdSrc[0].lastLayer
-
-    return cudaSuccess
-
-
-cdef cudaError_t getResDescFromDriverResDesc(cudaResourceDesc *rdDst, const cydriver.CUDA_RESOURCE_DESC *rdSrc,
-                                             cudaTextureDesc *tdDst, const cydriver.CUDA_TEXTURE_DESC *tdSrc,
-                                             cudaResourceViewDesc *rvdDst, const cydriver.CUDA_RESOURCE_VIEW_DESC *rvdSrc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef int i = 0
-    cdef int numChannels = 0
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 ad
-    cdef cydriver.CUarray hArray
-
-    memset(rdDst, 0, sizeof(rdDst[0]))
-    memset(&ad, 0, sizeof(ad))
-    memset(&hArray, 0, sizeof(hArray))
-
-    if rdSrc[0].resType == cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_ARRAY:
-        rdDst[0].resType         = cudaResourceType.cudaResourceTypeArray
-        rdDst[0].res.array.array = <cudaArray_t>rdSrc[0].res.array.hArray
-        err = getFormat(rdDst[0].res.array.array, numChannels, &ad.Format)
-        if err != cudaSuccess:
-            return err
-    elif rdSrc[0].resType == cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY:
-        rdDst[0].resType = cudaResourceType.cudaResourceTypeMipmappedArray
-        rdDst[0].res.mipmap.mipmap = <cudaMipmappedArray_t>rdSrc[0].res.mipmap.hMipmappedArray
-        err = <cudaError_t>cydriver._cuMipmappedArrayGetLevel(&hArray, rdSrc[0].res.mipmap.hMipmappedArray, 0)
-        if err != cudaSuccess:
-            return err
-        err = getFormat(<cudaArray_t>hArray, numChannels, &ad.Format)
-        if err != cudaSuccess:
-            return err
-    elif rdSrc[0].resType == cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_LINEAR:
-        rdDst[0].resType                = cudaResourceType.cudaResourceTypeLinear
-        rdDst[0].res.linear.devPtr      = <void *>rdSrc[0].res.linear.devPtr
-        rdDst[0].res.linear.sizeInBytes = rdSrc[0].res.linear.sizeInBytes
-        ad.Format      = rdSrc[0].res.linear.format
-        ad.NumChannels = rdSrc[0].res.linear.numChannels
-        err = getChannelFormatDescFromDriverDesc(&rdDst[0].res.linear.desc,
-                                                 NULL, NULL, NULL,
-                                                 &ad)
-        if err != cudaSuccess:
-            return err
-    elif rdSrc[0].resType == cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_PITCH2D:
-        rdDst[0].resType                  = cudaResourceType.cudaResourceTypePitch2D
-        rdDst[0].res.pitch2D.devPtr       = <void *>rdSrc[0].res.pitch2D.devPtr
-        rdDst[0].res.pitch2D.pitchInBytes = rdSrc[0].res.pitch2D.pitchInBytes
-        rdDst[0].res.pitch2D.width        = rdSrc[0].res.pitch2D.width
-        rdDst[0].res.pitch2D.height       = rdSrc[0].res.pitch2D.height
-        ad.Format      = rdSrc[0].res.linear.format
-        ad.NumChannels = rdSrc[0].res.linear.numChannels
-        err = getChannelFormatDescFromDriverDesc(&rdDst[0].res.linear.desc,
-                                                NULL, NULL, NULL,
-                                                &ad)
-        if err != cudaSuccess:
-            return err
-    else:
-        return cudaErrorInvalidValue
-
-    if tdDst and tdSrc:
-        memset(tdDst, 0, sizeof(tdDst[0]))
-        i = 0
-        while i < 3:
-            tdDst[0].addressMode[i] = <cudaTextureAddressMode>tdSrc[0].addressMode[i]
-            i += 1
-
-        tdDst[0].filterMode          = <cudaTextureFilterMode>tdSrc[0].filterMode
-        tdDst[0].mipmapFilterMode    = <cudaTextureFilterMode>tdSrc[0].mipmapFilterMode
-        tdDst[0].mipmapLevelBias     = tdSrc[0].mipmapLevelBias
-        tdDst[0].minMipmapLevelClamp = tdSrc[0].minMipmapLevelClamp
-        tdDst[0].maxMipmapLevelClamp = tdSrc[0].maxMipmapLevelClamp
-        tdDst[0].maxAnisotropy       = tdSrc[0].maxAnisotropy
-        i = 0
-        while i < 4:
-            tdDst[0].borderColor[i] = tdSrc[0].borderColor[i]
-            i += 1
-
-        if tdSrc[0].flags & cydriver.CU_TRSF_SRGB:
-            tdDst[0].sRGB                         = 1
-        else:
-            tdDst[0].sRGB                         = 0
-
-        if tdSrc[0].flags & cydriver.CU_TRSF_NORMALIZED_COORDINATES:
-            tdDst[0].normalizedCoords             = 1
-        else:
-            tdDst[0].normalizedCoords             = 0
-
-        if tdSrc[0].flags & cydriver.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION:
-            tdDst[0].disableTrilinearOptimization = 1
-        else:
-            tdDst[0].disableTrilinearOptimization = 0
-
-        if tdSrc[0].flags & cydriver.CU_TRSF_SEAMLESS_CUBEMAP:
-            tdDst[0].seamlessCubemap |= 1
-        else:
-            tdDst[0].seamlessCubemap |= 0
-
-        if ad.Format in (cydriver.CU_AD_FORMAT_SNORM_INT8X1,
-                         cydriver.CU_AD_FORMAT_SNORM_INT8X2,
-                         cydriver.CU_AD_FORMAT_SNORM_INT8X4,
-                         cydriver.CU_AD_FORMAT_UNORM_INT8X1,
-                         cydriver.CU_AD_FORMAT_UNORM_INT8X2,
-                         cydriver.CU_AD_FORMAT_UNORM_INT8X4,
-                         cydriver.CU_AD_FORMAT_SNORM_INT16X1,
-                         cydriver.CU_AD_FORMAT_SNORM_INT16X2,
-                         cydriver.CU_AD_FORMAT_SNORM_INT16X4,
-                         cydriver.CU_AD_FORMAT_UNORM_INT16X1,
-                         cydriver.CU_AD_FORMAT_UNORM_INT16X2,
-                         cydriver.CU_AD_FORMAT_UNORM_INT16X4,
-                         cydriver.CU_AD_FORMAT_BC1_UNORM,
-                         cydriver.CU_AD_FORMAT_BC1_UNORM_SRGB,
-                         cydriver.CU_AD_FORMAT_BC2_UNORM,
-                         cydriver.CU_AD_FORMAT_BC2_UNORM_SRGB,
-                         cydriver.CU_AD_FORMAT_BC3_UNORM,
-                         cydriver.CU_AD_FORMAT_BC3_UNORM_SRGB,
-                         cydriver.CU_AD_FORMAT_BC4_UNORM,
-                         cydriver.CU_AD_FORMAT_BC4_SNORM,
-                         cydriver.CU_AD_FORMAT_BC5_UNORM,
-                         cydriver.CU_AD_FORMAT_BC5_SNORM,
-                         cydriver.CU_AD_FORMAT_BC7_UNORM,
-                         cydriver.CU_AD_FORMAT_BC7_UNORM_SRGB):
-            tdDst[0].readMode = cudaTextureReadMode.cudaReadModeNormalizedFloat
-        elif ad.Format in (cydriver.CU_AD_FORMAT_SIGNED_INT8,
-                           cydriver.CU_AD_FORMAT_SIGNED_INT16,
-                           cydriver.CU_AD_FORMAT_UNSIGNED_INT8,
-                           cydriver.CU_AD_FORMAT_UNSIGNED_INT16):
-            with gil:
-                if (tdSrc[0].flags & cydriver.CU_TRSF_READ_AS_INTEGER):
-                    tdDst[0].readMode = cudaTextureReadMode.cudaReadModeElementType
-                else:
-                    tdDst[0].readMode = cudaTextureReadMode.cudaReadModeNormalizedFloat
-        else:
-            tdDst[0].readMode = cudaTextureReadMode.cudaReadModeElementType
-
-    if rvdDst and rvdSrc:
-        memset(rvdDst, 0, sizeof(rvdDst[0]))
-
-        rvdDst[0].format           = <cudaResourceViewFormat>rvdSrc[0].format
-        rvdDst[0].width            = rvdSrc[0].width
-        rvdDst[0].height           = rvdSrc[0].height
-        rvdDst[0].depth            = rvdSrc[0].depth
-        rvdDst[0].firstMipmapLevel = rvdSrc[0].firstMipmapLevel
-        rvdDst[0].lastMipmapLevel  = rvdSrc[0].lastMipmapLevel
-        rvdDst[0].firstLayer       = rvdSrc[0].firstLayer
-        rvdDst[0].lastLayer        = rvdSrc[0].lastLayer
-
-    return cudaSuccess
-
-
-cdef cudaError_t memsetPtr(char *mem, int c, size_t count, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if count == 0:
-        return cudaSuccess
-
-    if not async:
-        return <cudaError_t>cydriver._cuMemsetD8_v2(<cydriver.CUdeviceptr_v2>mem, <unsigned char>c, count)
-    else:
-        return <cudaError_t>cydriver._cuMemsetD8Async(<cydriver.CUdeviceptr_v2>mem, <unsigned char>c, count, sid)
-
-
-cdef cudaError_t memset2DPtr(char *mem, size_t pitch, int c, size_t width, size_t height, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if width == 0 or height == 0:
-        return cudaSuccess
-
-    if not async:
-        return <cudaError_t>cydriver._cuMemsetD2D8_v2(<cydriver.CUdeviceptr_v2>mem, pitch, <unsigned char>c, width, height)
-    else:
-        return <cudaError_t>cydriver._cuMemsetD2D8Async(<cydriver.CUdeviceptr_v2>mem, pitch, <unsigned char>c, width, height, sid)
-
-
-cdef cudaError_t copyFromHost(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef size_t copied = 0
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST)
-
-    if (wOffset > 0) and (count >= arrayState.widthInBytes - wOffset):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcHost       = src
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = 0
-        cp.srcY          = 0
-
-        cp.Height        = 1
-        cp.WidthInBytes  = arrayState.widthInBytes - wOffset
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied >= arrayState.widthInBytes):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcHost       = src + copied
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = 0
-        cp.srcY          = 0
-
-        cp.Height        = <size_t>((count - copied) / arrayState.widthInBytes)
-        cp.WidthInBytes  = arrayState.widthInBytes
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied > 0):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcHost       = src + copied
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = 0
-        cp.srcY          = 0
-
-        cp.Height        = 1
-        cp.WidthInBytes  = count - copied
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyFromDevice(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *src, size_t srcOffset, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef size_t copied = 0
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY, type)
-
-    if (wOffset > 0) and (count >= arrayState.widthInBytes - wOffset):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcDevice     = <cydriver.CUdeviceptr_v2>src
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = srcOffset
-        cp.srcY          = 0
-
-        cp.Height        = 1
-        cp.WidthInBytes  = arrayState.widthInBytes - wOffset
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied >= arrayState.widthInBytes):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcDevice     = <cydriver.CUdeviceptr_v2>(src + copied)
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = srcOffset
-        cp.srcY          = 0
-
-        cp.Height        = <size_t>((count - copied) / arrayState.widthInBytes)
-        cp.WidthInBytes  = arrayState.widthInBytes
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied > 0):
-        cp.dstArray      = arrayState.array
-        cp.dstXInBytes   = wOffset
-        cp.dstY          = hOffset
-
-        cp.srcDevice     = <cydriver.CUdeviceptr_v2>(src + copied)
-        cp.srcPitch      = arrayState.widthInBytes
-        cp.srcXInBytes   = srcOffset
-        cp.srcY          = 0
-
-        cp.Height        = 1
-        cp.WidthInBytes  = count - copied
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copyToHost(cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, char *dst, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef size_t copied = 0
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-
-    if (wOffset > 0) and (count >= arrayState.widthInBytes - wOffset):
-        cp.dstHost       = dst
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = 0
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = 1
-        cp.WidthInBytes  = arrayState.widthInBytes - wOffset
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied >= arrayState.widthInBytes):
-        cp.dstHost       = dst + copied
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = 0
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = <size_t>((count - copied) / arrayState.widthInBytes)
-        cp.WidthInBytes  = arrayState.widthInBytes
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied > 0):
-        cp.dstHost       = dst + copied
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = 0
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = 1
-        cp.WidthInBytes  = count - copied
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t driverMemcpy3DPeer(cydriver.CUDA_MEMCPY3D_PEER *cp, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if async:
-        return <cudaError_t>cydriver._cuMemcpy3DPeerAsync(cp, stream)
-    else:
-        return <cudaError_t>cydriver._cuMemcpy3DPeer(cp)
-
-
-cdef cudaError_t driverMemcpy3D(cydriver.CUDA_MEMCPY3D_v2 *cp, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if async:
-        return <cudaError_t>cydriver._cuMemcpy3DAsync_v2(cp, stream)
-    else:
-        return <cudaError_t>cydriver._cuMemcpy3D_v2(cp)
-
-
-cdef cudaError_t memcpy3D(const cudaMemcpy3DParms *p, bool peer, int srcDevice, int dstDevice, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUDA_MEMCPY3D_v2 cd
-    cdef cydriver.CUDA_MEMCPY3D_PEER cdPeer
-    cdef cudaPythonDevice *srcDev
-    cdef cudaPythonDevice *dstDev
-
-    memset(&cdPeer, 0, sizeof(cdPeer))
-
-    cdef cudaError_t err = toDriverMemCopy3DParams(p, &cd)
-    if err != cudaSuccess:
-        return err
-
-    # Execute the copy
-    if peer:
-        srcDev = m_global.getDevice(srcDevice)
-        dstDev = m_global.getDevice(dstDevice)
-        if srcDev == NULL or dstDev == NULL:
-            return cudaErrorInvalidDevice
-        cdPeer.srcXInBytes = cd.srcXInBytes
-        cdPeer.srcY = cd.srcY
-        cdPeer.srcZ = cd.srcZ
-        cdPeer.srcLOD = cd.srcLOD
-        cdPeer.srcMemoryType = cd.srcMemoryType
-        cdPeer.srcHost = cd.srcHost
-        cdPeer.srcDevice = cd.srcDevice
-        cdPeer.srcArray = cd.srcArray
-        cdPeer.srcContext = srcDev.primaryContext
-        cdPeer.srcPitch = cd.srcPitch
-        cdPeer.srcHeight = cd.srcHeight
-        cdPeer.dstXInBytes = cd.dstXInBytes
-        cdPeer.dstY = cd.dstY
-        cdPeer.dstZ = cd.dstZ
-        cdPeer.dstLOD = cd.dstLOD
-        cdPeer.dstMemoryType = cd.dstMemoryType
-        cdPeer.dstHost = cd.dstHost
-        cdPeer.dstDevice = cd.dstDevice
-        cdPeer.dstArray = cd.dstArray
-        cdPeer.dstContext = dstDev.primaryContext
-        cdPeer.dstPitch = cd.dstPitch
-        cdPeer.dstHeight = cd.dstHeight
-        cdPeer.WidthInBytes = cd.WidthInBytes
-        cdPeer.Height = cd.Height
-        cdPeer.Depth = cd.Depth
-        err = driverMemcpy3DPeer(&cdPeer, sid, async)
-    else:
-        err = driverMemcpy3D(&cd, sid, async)
-    return err
-
-
-cdef cudaError_t copyToDevice(cydriver.CUmemorytype type, cudaArray_const_t thisArray, size_t hOffset, size_t wOffset, const char *dst, size_t dstOffset, size_t count, cydriver.CUstream stream, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaArrayLocalState arrayState
-    cdef cudaError_t err = cudaSuccess
-    memset(&arrayState, 0, sizeof(arrayState))
-    err = getLocalState(&arrayState, thisArray)
-    if err != cudaSuccess:
-        return err
-    cdef size_t copied = 0
-    cdef cydriver.CUDA_MEMCPY3D_v2 cp = memCopy3DInit(type, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-
-    if (wOffset > 0) and (count >= arrayState.widthInBytes - wOffset):
-        cp.dstDevice     = <cydriver.CUdeviceptr_v2>dst
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = dstOffset
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = 1
-        cp.WidthInBytes  = arrayState.widthInBytes - wOffset
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied >= arrayState.widthInBytes):
-        cp.dstDevice     = <cydriver.CUdeviceptr_v2>(dst + copied)
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = dstOffset
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = <size_t>((count - copied) / arrayState.widthInBytes)
-        cp.WidthInBytes  = arrayState.widthInBytes
-
-        copied  += cp.Height * cp.WidthInBytes
-        hOffset += cp.Height
-        wOffset  = 0
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    if (count - copied > 0):
-        cp.dstDevice     = <cydriver.CUdeviceptr_v2>(dst + copied)
-        cp.dstPitch      = arrayState.widthInBytes
-        cp.dstXInBytes   = dstOffset
-        cp.dstY          = 0
-
-        cp.srcArray      = arrayState.array
-        cp.srcXInBytes   = wOffset
-        cp.srcY          = hOffset
-
-        cp.Height        = 1
-        cp.WidthInBytes  = count - copied
-
-        err = driverMemcpy3D(&cp, stream, async)
-        if err != cudaSuccess:
-            return err
-
-    return cudaSuccess
-
-
-cdef cudaError_t copy1DConvertTo3DParams(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaMemcpy3DParms *p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    memset(p, 0, sizeof(cudaMemcpy3DParms))
-    p[0].extent.width = count
-    p[0].extent.height = 1
-    p[0].extent.depth = 1
-    p[0].dstPtr.ptr = dst
-    p[0].srcPtr.ptr = <void *>src
-    p[0].kind = kind
-
-
-cdef void toDriverMemsetNodeParams(const cudaMemsetParams *pRuntimeParams, cydriver.CUDA_MEMSET_NODE_PARAMS *pDriverParams) noexcept nogil:
-    pDriverParams[0].dst = <cydriver.CUdeviceptr_v2>pRuntimeParams[0].dst
-    pDriverParams[0].pitch = pRuntimeParams[0].pitch
-    pDriverParams[0].value = pRuntimeParams[0].value
-    pDriverParams[0].elementSize = pRuntimeParams[0].elementSize
-    pDriverParams[0].width = pRuntimeParams[0].width
-    pDriverParams[0].height = pRuntimeParams[0].height
-
-
-cdef cudaError_t getElementSize(size_t *elementSize, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR driverDesc
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuArray3DGetDescriptor_v2(&driverDesc, <cydriver.CUarray>array)
-    if err != cudaSuccess:
-        return err
-    if (driverDesc.Format == cydriver.CU_AD_FORMAT_FLOAT or
-        driverDesc.Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT32 or
-        driverDesc.Format == cydriver.CU_AD_FORMAT_SIGNED_INT32):
-        elementSize[0] = driverDesc.NumChannels * 4
-        return cudaSuccess
-    elif (driverDesc.Format == cydriver.CU_AD_FORMAT_HALF or
-          driverDesc.Format == cydriver.CU_AD_FORMAT_SIGNED_INT16 or
-          driverDesc.Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT16):
-        elementSize[0] = driverDesc.NumChannels * 2
-        return cudaSuccess
-    elif (driverDesc.Format == cydriver.CU_AD_FORMAT_SIGNED_INT8 or
-          driverDesc.Format == cydriver.CU_AD_FORMAT_UNSIGNED_INT8 or
-          driverDesc.Format == cydriver.CU_AD_FORMAT_NV12):
-        elementSize[0] = driverDesc.NumChannels
-        return cudaSuccess
-    return cudaErrorInvalidChannelDescriptor
-
-
-cdef cudaError_t toDriverMemCopy3DParams(const cudaMemcpy3DParms *p, cydriver.CUDA_MEMCPY3D *cd) except ?cudaErrorCallRequiresNewerDriver nogil:
-    memset(cd, 0, sizeof(cydriver.CUDA_MEMCPY3D))
-    cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-    cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-    cd[0].WidthInBytes = 0
-    cd[0].Height = 1
-    cd[0].Depth = 1
-    cdef size_t srcElementSize = 0
-    cdef size_t dstElementSize = 0
-    cdef cudaError_t err = cudaSuccess
-
-    cdef cudaExtent srcBlockExtent
-    cdef cudaExtent dstBlockExtent
-    cdef cudaExtent copyBlockExtent
-    cdef cydriver.CUarray_format srcFmt
-    cdef cydriver.CUarray_format dstFmt
-    cdef int numChannels = 0
-    srcBlockExtent.width = srcBlockExtent.height = srcBlockExtent.depth = 1
-    dstBlockExtent.width = dstBlockExtent.height = dstBlockExtent.depth = 1
-    copyBlockExtent.width = copyBlockExtent.height = copyBlockExtent.depth = 1
-
-    if p[0].extent.width == 0 or p[0].extent.height == 0 or p[0].extent.depth == 0:
-        return cudaSuccess
-
-    if p[0].kind == cudaMemcpyHostToHost:
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST
-    elif p[0].kind == cudaMemcpyHostToDevice:
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-    elif p[0].kind == cudaMemcpyDeviceToHost:
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST
-    elif p[0].kind == cudaMemcpyDeviceToDevice:
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-    elif p[0].kind == cudaMemcpyDefault:
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED
-    else:
-        return cudaErrorInvalidMemcpyDirection
-
-    if p[0].srcArray:
-        err = getFormat(p[0].srcArray, numChannels, &srcFmt)
-        if err != cudaSuccess:
-            return err
-        err = getArrayBlockExtent(&srcBlockExtent, srcFmt)
-        if err != cudaSuccess:
-            return err
-        copyBlockExtent = srcBlockExtent
-    if p[0].dstArray:
-        err = getFormat(p[0].dstArray, numChannels, &dstFmt)
-        if err != cudaSuccess:
-            return err
-        err = getArrayBlockExtent(&dstBlockExtent, dstFmt)
-        if err != cudaSuccess:
-            return err
-        if not p[0].srcArray:
-            copyBlockExtent = dstBlockExtent
-
-    if p[0].srcArray:
-        if NULL != p[0].srcPtr.ptr or cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST == cd[0].srcMemoryType:
-            return cudaErrorInvalidValue
-        cd[0].srcMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY
-        cd[0].srcArray = <cydriver.CUarray>p[0].srcArray
-        err = getElementSize(&srcElementSize, p[0].srcArray)
-        if err != cudaSuccess:
-            return err
-    else:
-        if NULL == p[0].srcPtr.ptr:
-            return cudaErrorInvalidValue
-        if (p[0].extent.height > 1 or p[0].extent.depth > 1) and (p[0].extent.width > p[0].srcPtr.pitch):
-            return cudaErrorInvalidPitchValue
-        if p[0].extent.depth > 1:
-            adjustedSrcHeight = p[0].srcPtr.ysize * copyBlockExtent.height
-            if p[0].extent.height > adjustedSrcHeight:
-                return cudaErrorInvalidPitchValue
-
-        if cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST == cd[0].srcMemoryType:
-            cd[0].srcHost = p[0].srcPtr.ptr
-        else:
-            cd[0].srcDevice = <cydriver.CUdeviceptr_v2>(p[0].srcPtr.ptr)
-        cd[0].srcPitch = p[0].srcPtr.pitch
-        cd[0].srcHeight = p[0].srcPtr.ysize
-
-    if p[0].dstArray:
-        if NULL != p[0].dstPtr.ptr:
-            return cudaErrorInvalidValue
-        cd[0].dstMemoryType = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY
-        cd[0].dstArray = <cydriver.CUarray>p[0].dstArray
-        err = getElementSize(&dstElementSize, p[0].dstArray)
-        if err != cudaSuccess:
-            return err
-    else:
-        if NULL == p[0].dstPtr.ptr:
-            return cudaErrorInvalidValue
-        if (p[0].extent.height > 1 or p[0].extent.depth > 1) and (p[0].extent.width > p[0].dstPtr.pitch):
-            return cudaErrorInvalidPitchValue
-        if p[0].extent.depth > 1:
-            adjustedDstHeight = p[0].dstPtr.ysize * copyBlockExtent.height
-            if p[0].extent.height > adjustedDstHeight:
-                return cudaErrorInvalidPitchValue
-
-        if cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST == cd[0].dstMemoryType:
-            cd[0].dstHost = p[0].dstPtr.ptr
-        else:
-            cd[0].dstDevice = <cydriver.CUdeviceptr_v2>(p[0].dstPtr.ptr)
-        cd[0].dstPitch = p[0].dstPtr.pitch
-        cd[0].dstHeight = p[0].dstPtr.ysize
-
-    if srcElementSize and dstElementSize and srcElementSize != dstElementSize:
-        return cudaErrorInvalidValue
-
-    cdef size_t elementSize = sizeof(char)
-    if srcElementSize:
-        elementSize = srcElementSize
-    if dstElementSize:
-        elementSize = dstElementSize
-    srcElementSize = elementSize
-    dstElementSize = elementSize
-
-    # Determine the extent of the transfer
-    cd[0].WidthInBytes = <size_t>((p[0].extent.width + copyBlockExtent.width - 1) / copyBlockExtent.width)  * elementSize
-    cd[0].Height       = <size_t>((p[0].extent.height + copyBlockExtent.height - 1) / copyBlockExtent.height)
-    cd[0].Depth        = p[0].extent.depth
-
-    # Populate bloated src copy origin
-    cd[0].srcXInBytes  = <size_t>(p[0].srcPos.x / srcBlockExtent.width) * elementSize
-    cd[0].srcY         = <size_t>(p[0].srcPos.y / srcBlockExtent.height)
-    cd[0].srcZ         = p[0].srcPos.z
-
-    # Populate bloated dst copy origin
-    cd[0].dstXInBytes  = <size_t>(p[0].dstPos.x / dstBlockExtent.width) * elementSize
-    cd[0].dstY         = <size_t>(p[0].dstPos.y / dstBlockExtent.height)
-    cd[0].dstZ         = p[0].dstPos.z
-
-    return cudaSuccess
-
-
-cdef cudaError_t mallocArray(cudaArray_t *arrayPtr, const cudaChannelFormatDesc *desc,
-        size_t depth, size_t height, size_t width, int corr2D, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if arrayPtr == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUarray array = NULL
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 ad
-    cdef cudaError_t err = cudaSuccess
-    arrayPtr[0] = NULL
-    if (((width == 0)) or
-        ((height == 0) and (depth != 0) and not (flags & cudaArrayLayered)) or
-        ((flags & cudaArrayLayered) and (depth == 0)) or
-        ((flags & cudaArrayCubemap) and not (flags & cudaArrayLayered) and ((width != height) or (depth != 6))) or
-        ((flags & cudaArrayLayered) and (flags & cudaArrayCubemap) and ((width != height) or (depth % 6 != 0)))):
-        return cudaErrorInvalidValue
-    else:
-        memset(&ad, 0, sizeof(ad))
-        err = getDescInfo(desc, <int*>&ad.NumChannels, <cydriver.CUarray_format*>&ad.Format)
-        if err != cudaSuccess:
-            return err
-        ad.Height = <unsigned int>height
-        ad.Width  = <unsigned int>width
-        ad.Depth  = <unsigned int>(depth - corr2D)
-        ad.Flags  = flags
-        err = <cudaError_t>cydriver._cuArray3DCreate_v2(&array, &ad)
-        if err != cudaSuccess:
-            return err
-
-        arrayPtr[0] = <cudaArray_t>array
-    return cudaSuccess
-
-
-cdef cudaError_t memcpy2DToArray(cudaArray_t dst, size_t hOffset, size_t wOffset, const char *src,
-                                 size_t spitch, size_t width, size_t height, cudaMemcpyKind kind,
-                                 cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if width == 0 or height == 0:
-        return cudaSuccess
-    if height > 1 and width > spitch:
-        return cudaErrorInvalidPitchValue
-
-    cdef cudaError_t err = cudaSuccess
-    if kind == cudaMemcpyKind.cudaMemcpyHostToDevice:
-       err = copyFromHost2D(dst, hOffset, wOffset, src, spitch, width, height, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-       err = copyFromDevice2D(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, dst, hOffset, wOffset, src, 0, spitch, width, height, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-       err = copyFromDevice2D(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED, dst, hOffset, wOffset, src, 0, spitch, width, height, sid, async)
-    else:
-        return cudaErrorInvalidMemcpyDirection
-    return err
-
-
-cdef cudaError_t memcpy2DPtr(char *dst, size_t dpitch, const char *src, size_t spitch, size_t width,
-                             size_t height, cudaMemcpyKind kind,
-                             cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if width == 0 or height == 0:
-        return cudaSuccess
-    if height > 1 and width > dpitch:
-        return cudaErrorInvalidPitchValue
-    if height > 1 and width > spitch:
-        return cudaErrorInvalidPitchValue
-
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUDA_MEMCPY2D_v2 cp
-    memset(&cp, 0, sizeof(cp))
-
-    if kind == cudaMemcpyKind.cudaMemcpyHostToHost:
-        cp = memCopy2DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST)
-        cp.dstHost = dst
-        cp.srcHost = src
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        cp = memCopy2DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE)
-        cp.dstHost = dst
-        cp.srcDevice = <cydriver.CUdeviceptr_v2>src
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToDevice:
-        cp = memCopy2DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST)
-        cp.dstDevice = <cydriver.CUdeviceptr_v2>dst
-        cp.srcHost = src
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        cp = memCopy2DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE)
-        cp.dstDevice = <cydriver.CUdeviceptr_v2>dst
-        cp.srcDevice = <cydriver.CUdeviceptr_v2>src
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        cp = memCopy2DInit(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED, cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED)
-        cp.dstDevice = <cydriver.CUdeviceptr_v2>dst
-        cp.srcDevice = <cydriver.CUdeviceptr_v2>src
-    else:
-        err = cudaErrorInvalidMemcpyDirection
-
-    if err != cudaSuccess:
-        return err
-
-    cp.dstPitch      = dpitch
-    cp.srcPitch      = spitch
-    cp.WidthInBytes  = width
-    cp.Height        = height
-
-    if async:
-        err = <cudaError_t>cydriver._cuMemcpy2DAsync_v2(&cp, sid)
-    else:
-        err = <cudaError_t>cydriver._cuMemcpy2DUnaligned_v2(&cp)
-    return err
-
-
-cdef cudaError_t memcpyDispatch(void *dst, const void *src, size_t size, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if size == 0:
-        return cudaSuccess
-
-    cdef cudaError_t err = cudaSuccess
-    if kind == cudaMemcpyKind.cudaMemcpyHostToHost:
-        return memcpy2DPtr(<char*>dst, size, <const char*>src, size, size, 1, kind, NULL, 0)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        err = <cudaError_t>cydriver._cuMemcpyDtoH_v2(dst, <cydriver.CUdeviceptr_v2>src, size)
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToDevice:
-        err = <cudaError_t>cydriver._cuMemcpyHtoD_v2(<cydriver.CUdeviceptr_v2>dst, src, size)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        err = <cudaError_t>cydriver._cuMemcpyDtoD_v2(<cydriver.CUdeviceptr_v2>dst, <cydriver.CUdeviceptr_v2>src, size)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        err = <cudaError_t>cydriver._cuMemcpy(<cydriver.CUdeviceptr_v2>dst, <cydriver.CUdeviceptr_v2>src, size)
-    else:
-        return cudaErrorInvalidMemcpyDirection
-
-
-cdef cudaError_t mallocHost(size_t size, void **mem, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if size == 0:
-        if mem == NULL:
-            return cudaErrorInvalidValue
-        mem[0] = NULL
-        return cudaSuccess
-    else:
-        return <cudaError_t>cydriver._cuMemHostAlloc(mem, size, flags)
-
-
-cdef cudaError_t mallocPitch(size_t width, size_t height, size_t depth, void **mem, size_t *pitch) except ?cudaErrorCallRequiresNewerDriver nogil:
-    height *= depth
-
-    if width == 0 or height == 0:
-        if mem == NULL or pitch == NULL:
-            return cudaErrorInvalidValue
-        mem[0]   = NULL
-        pitch[0] = 0
-    else:
-        return <cudaError_t>cydriver._cuMemAllocPitch_v2(<cydriver.CUdeviceptr_v2*>mem, pitch, width, height, 4)
-    return cudaSuccess
-
-
-cdef cudaError_t mallocMipmappedArray(cudaMipmappedArray_t *mipmappedArray, const cudaChannelFormatDesc *desc,
-                                      size_t depth, size_t height, size_t width, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if mipmappedArray == NULL:
-        return cudaErrorInvalidValue
-
-    cdef cydriver.CUmipmappedArray mipmap = NULL
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 ad
-    memset(&ad, 0, sizeof(ad))
-
-    mipmappedArray[0] = NULL
-    if (((width == 0)) or
-        ((height == 0) and (depth != 0) and not (flags & cudaArrayLayered)) or
-        ((flags & cudaArrayLayered) and (depth == 0)) or
-        ((flags & cudaArrayCubemap) and not (flags & cudaArrayLayered) and ((width != height) or (depth != 6))) or
-        ((flags & cudaArrayLayered) and (flags & cudaArrayCubemap) and ((width != height) or (depth % 6 != 0)))):
-        return cudaErrorInvalidValue
-    else:
-        err = getDescInfo(desc, <int*>&ad.NumChannels, &ad.Format)
-        if err != cudaSuccess:
-            return err
-        ad.Height = <unsigned int>height
-        ad.Width  = <unsigned int>width
-        ad.Depth  = <unsigned int>depth
-        ad.Flags  = flags
-        err = <cudaError_t>cydriver._cuMipmappedArrayCreate(&mipmap, &ad, numLevels)
-        if err != cudaSuccess:
-            return err
-        mipmappedArray[0] = <cudaMipmappedArray_t>mipmap
-    return cudaSuccess
-
-
-cdef cudaError_t memcpyAsyncDispatch(void *dst, const void *src, size_t size, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if size == 0:
-        return cudaSuccess
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToHost:
-        return memcpy2DPtr(<char*>dst, size, <const char*>src, size, size, 1, kind, stream, True)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        return <cudaError_t>cydriver._cuMemcpyDtoHAsync_v2(dst, <cydriver.CUdeviceptr_v2>src, size, stream)
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToDevice:
-        return<cudaError_t>cydriver._cuMemcpyHtoDAsync_v2(<cydriver.CUdeviceptr_v2>dst, src, size, stream)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        return<cudaError_t>cydriver._cuMemcpyDtoDAsync_v2(<cydriver.CUdeviceptr_v2>dst, <cydriver.CUdeviceptr_v2>src, size, stream)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        return<cudaError_t>cydriver._cuMemcpyAsync(<cydriver.CUdeviceptr_v2>dst, <cydriver.CUdeviceptr_v2>src, size, stream)
-    return cudaErrorInvalidMemcpyDirection
-
-
-cdef cudaError_t toCudartMemCopy3DParams(const cydriver.CUDA_MEMCPY3D_v2 *cd, cudaMemcpy3DParms *p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaExtent srcBlockExtent
-    cdef cudaExtent dstBlockExtent
-    cdef cudaExtent copyBlockExtent
-    cdef cydriver.CUarray_format srcFmt
-    cdef cydriver.CUarray_format dstFmt
-    cdef int numChannels = 0
-    srcBlockExtent.width = srcBlockExtent.height = srcBlockExtent.depth = 1
-    dstBlockExtent.width = dstBlockExtent.height = dstBlockExtent.depth = 1
-    copyBlockExtent.width = copyBlockExtent.height = copyBlockExtent.depth = 1
-
-    memset(p, 0, sizeof(cudaMemcpy3DParms))
-    p[0].srcPtr.xsize = 0
-    p[0].dstPtr.xsize = 0
-
-    if (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST and cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST):
-        p[0].kind = cudaMemcpyHostToHost
-
-        p[0].srcPtr.ptr = <void*>cd[0].srcHost
-        p[0].srcPtr.pitch = cd[0].srcPitch
-        p[0].srcPtr.ysize = cd[0].srcHeight
-
-        p[0].dstPtr.ptr = cd[0].dstHost
-        p[0].dstPtr.pitch = cd[0].dstPitch
-        p[0].dstPtr.ysize = cd[0].dstHeight
-    elif (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST
-            and (cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE
-                or cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)):
-        p[0].kind = cudaMemcpyHostToDevice
-
-        p[0].srcPtr.ptr = <void*>cd[0].srcHost
-        p[0].srcPtr.pitch = cd[0].srcPitch
-        p[0].srcPtr.ysize = cd[0].srcHeight
-
-        if (cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-            p[0].dstArray = <cudaArray_t>cd[0].dstArray
-        else:
-            p[0].dstPtr.ptr = <void*>cd[0].dstDevice
-            p[0].dstPtr.pitch = cd[0].dstPitch
-            p[0].dstPtr.ysize = cd[0].dstHeight
-    elif ((cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE or cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-            and cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST):
-        p[0].kind = cudaMemcpyDeviceToHost
-
-        if (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-            p[0].srcArray = <cudaArray_t>cd[0].srcArray
-        else:
-            p[0].srcPtr.ptr = <void*>cd[0].srcDevice
-            p[0].srcPtr.pitch = cd[0].srcPitch
-            p[0].srcPtr.ysize = cd[0].srcHeight
-
-        p[0].dstPtr.ptr = cd[0].dstHost
-        p[0].dstPtr.pitch = cd[0].dstPitch
-        p[0].dstPtr.ysize = cd[0].dstHeight
-    elif ((cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE or cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)
-            and (cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE or cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY)):
-        p[0].kind = cudaMemcpyDeviceToDevice
-
-        if (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-            p[0].srcArray = <cudaArray_t>cd[0].srcArray
-        else:
-            p[0].srcPtr.ptr = <void*>cd[0].srcDevice
-            p[0].srcPtr.pitch = cd[0].srcPitch
-            p[0].srcPtr.ysize = cd[0].srcHeight
-
-        if (cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-            p[0].dstArray = <cudaArray_t>cd[0].dstArray
-        else:
-            p[0].dstPtr.ptr = <void*>cd[0].dstDevice
-            p[0].dstPtr.pitch = cd[0].dstPitch
-            p[0].dstPtr.ysize = cd[0].dstHeight
-    elif (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED and cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED):
-        p[0].kind = cudaMemcpyDefault
-
-        p[0].srcPtr.ptr = <void*>cd[0].srcDevice
-        p[0].srcPtr.pitch = cd[0].srcPitch
-        p[0].srcPtr.ysize = cd[0].srcHeight
-
-        p[0].dstPtr.ptr = <void*>cd[0].dstDevice
-        p[0].dstPtr.pitch = cd[0].dstPitch
-        p[0].dstPtr.ysize = cd[0].dstHeight
-    elif (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED and cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-        p[0].kind = cudaMemcpyDefault
-
-        p[0].srcPtr.ptr = <void*>cd[0].srcDevice
-        p[0].srcPtr.pitch = cd[0].srcPitch
-        p[0].srcPtr.ysize = cd[0].srcHeight
-
-        p[0].dstArray = <cudaArray_t>cd[0].dstArray
-    elif (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY and cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED):
-        p[0].kind = cudaMemcpyDefault
-
-        p[0].srcArray = <cudaArray_t>cd[0].srcArray
-
-        p[0].dstPtr.ptr = <void*>cd[0].dstDevice
-        p[0].dstPtr.pitch = cd[0].dstPitch
-        p[0].dstPtr.ysize = cd[0].dstHeight
-    else:
-        return cudaErrorUnknown
-
-    cdef size_t srcElementSize = 0
-    cdef size_t dstElementSize = 0
-    cdef cudaError_t err = cudaSuccess
-
-    if (cd[0].srcMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-        err = getFormat(<cudaArray_t>cd[0].srcArray, numChannels, &srcFmt)
-        if err != cudaSuccess:
-            return err
-        err = getArrayBlockExtent(&srcBlockExtent, srcFmt)
-        if err != cudaSuccess:
-            return err
-        err = getElementSize(&srcElementSize, <cudaArray_t>cd[0].srcArray)
-        if err != cudaSuccess:
-            return err
-        copyBlockExtent = srcBlockExtent
-
-    if (cd[0].dstMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY):
-        err = getFormat(<cudaArray_t>cd[0].dstArray, numChannels, &dstFmt)
-        if err != cudaSuccess:
-            return err
-        err = getArrayBlockExtent(&dstBlockExtent, dstFmt)
-        if err != cudaSuccess:
-            return err
-        err = getElementSize(&dstElementSize, <cudaArray_t>cd[0].dstArray)
-        if err != cudaSuccess:
-            return err
-        if cd[0].srcMemoryType != cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY:
-            copyBlockExtent = dstBlockExtent
-
-    if (srcElementSize and dstElementSize and srcElementSize != dstElementSize):
-        return cudaErrorInvalidValue
-
-    cdef size_t elementSize = sizeof(char)
-    if (srcElementSize):
-        elementSize = srcElementSize
-    if (dstElementSize):
-        elementSize = dstElementSize
-    srcElementSize = elementSize
-    dstElementSize = elementSize
-
-    p[0].extent.width = <size_t>(cd[0].WidthInBytes / elementSize) * copyBlockExtent.width
-    p[0].extent.height = cd[0].Height * copyBlockExtent.height
-    p[0].extent.depth = cd[0].Depth
-
-    p[0].srcPos.x = <size_t>(cd[0].srcXInBytes / elementSize) * srcBlockExtent.width
-    p[0].srcPos.y = cd[0].srcY * srcBlockExtent.height
-    p[0].srcPos.z = cd[0].srcZ
-
-    p[0].dstPos.x = <size_t>(cd[0].dstXInBytes / elementSize) * dstBlockExtent.width
-    p[0].dstPos.y = cd[0].dstY * dstBlockExtent.height
-    p[0].dstPos.z = cd[0].dstZ
-    return cudaSuccess
-
-
-cdef cudaError_t memcpy2DFromArray(char *dst, size_t dpitch, cudaArray_const_t src, size_t hOffset,
-        size_t wOffset, size_t width, size_t height, cudaMemcpyKind kind,
-        cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    if width == 0 or height == 0:
-        return cudaSuccess
-    if height > 1 and width > dpitch:
-        return cudaErrorInvalidPitchValue
-
-    if kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        err = copyToHost2D(src, hOffset, wOffset, dst, dpitch, width, height, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        err = copyToDevice2D(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, src, hOffset, wOffset, dst, 0, dpitch, width, height, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        err = copyToDevice2D(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED, src, hOffset, wOffset, dst, 0, dpitch, width, height, sid, async)
-    else:
-        return cudaErrorInvalidMemcpyDirection
-    return err
-
-
-cdef cudaError_t memcpy2DArrayToArray(cudaArray_t dst, size_t hOffsetDst, size_t wOffsetDst,
-                                      cudaArray_const_t src, size_t hOffsetSrc, size_t wOffsetSrc,
-                                      size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if width == 0 or height == 0:
-        return cudaSuccess
-    if kind != cudaMemcpyKind.cudaMemcpyDeviceToDevice and kind != cudaMemcpyKind.cudaMemcpyDefault:
-        return cudaErrorInvalidMemcpyDirection
-    return copyToArray2D(src, hOffsetSrc, wOffsetSrc, dst, hOffsetDst, wOffsetDst, width, height)
-
-
-cdef cudaError_t memset3DPtr(cudaPitchedPtr p, int val, cudaExtent e, cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if e.width == 0 or e.height == 0 or e.depth == 0:
-        return cudaSuccess
-
-    if (e.height > 1 or e.depth > 1) and e.width > p.pitch:
-        return cudaErrorInvalidValue
-
-    if e.depth > 0 and e.height > p.ysize:
-        return cudaErrorInvalidValue
-
-    cdef char *ptr = <char*>p.ptr
-    cdef size_t d
-    cdef cudaError_t err = cudaSuccess
-
-    if e.width >= p.xsize and e.height == p.ysize and e.width == p.pitch:
-        return memsetPtr(ptr, val, e.width * e.height * e.depth, sid, async)
-    elif e.height == p.ysize:
-        return memset2DPtr(ptr, p.pitch, val, e.width, e.height * e.depth, sid, async)
-    else:
-        d = 0
-        while (d != e.depth):
-            err = memset2DPtr(ptr, p.pitch, val, e.width, e.height, sid, async)
-            if err != cudaSuccess:
-                return err
-            ptr += p.pitch * p.ysize
-            d += 1
-    return cudaSuccess
-
-
-cdef cudaError_t memcpyToArray(cudaArray_t dst, size_t hOffset, size_t wOffset, const char *src,
-                               size_t count, cudaMemcpyKind kind,
-                               cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if count == 0:
-        return cudaSuccess
-
-    if kind == cudaMemcpyKind.cudaMemcpyHostToDevice:
-        return copyFromHost(dst, hOffset, wOffset, src, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        return copyFromDevice(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, dst, hOffset, wOffset, src, 0, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        return copyFromDevice(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED, dst, hOffset, wOffset, src, 0, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToHost or kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        return cudaErrorInvalidMemcpyDirection
-    return cudaSuccess
-
-
-cdef cudaError_t memcpyFromArray(char *dst, cudaArray_const_t src, size_t hOffset, size_t wOffset,
-                                 size_t count, cudaMemcpyKind kind,
-                                 cudaStream_t sid, bool async) except ?cudaErrorCallRequiresNewerDriver nogil:
-    if count == 0:
-        return cudaSuccess
-
-    if kind == cudaMemcpyKind.cudaMemcpyDeviceToHost:
-        return copyToHost(src, hOffset, wOffset, dst, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDeviceToDevice:
-        return copyToDevice(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, src, hOffset, wOffset, dst, 0, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyDefault:
-        return copyToDevice(cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED, src, hOffset, wOffset, dst, 0, count, sid, async)
-    elif kind == cudaMemcpyKind.cudaMemcpyHostToDevice or kind == cudaMemcpyKind.cudaMemcpyHostToHost:
-        return cudaErrorInvalidMemcpyDirection
-    return cudaSuccess
-
-
-cdef cudaError_t toDriverCudaResourceDesc(cydriver.CUDA_RESOURCE_DESC *_driver_pResDesc, const cudaResourceDesc *pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef int numChannels
-    cdef cydriver.CUarray_format format
-
-    if pResDesc[0].resType == cudaResourceType.cudaResourceTypeArray:
-        _driver_pResDesc[0].resType          = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_ARRAY
-        _driver_pResDesc[0].res.array.hArray = <cydriver.CUarray>pResDesc[0].res.array.array
-    elif pResDesc[0].resType == cudaResourceType.cudaResourceTypeMipmappedArray:
-        _driver_pResDesc[0].resType                    = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
-        _driver_pResDesc[0].res.mipmap.hMipmappedArray = <cydriver.CUmipmappedArray>pResDesc[0].res.mipmap.mipmap
-    elif pResDesc[0].resType == cudaResourceType.cudaResourceTypeLinear:
-        _driver_pResDesc[0].resType                = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_LINEAR
-        _driver_pResDesc[0].res.linear.devPtr      = <cydriver.CUdeviceptr>pResDesc[0].res.linear.devPtr
-        _driver_pResDesc[0].res.linear.sizeInBytes = pResDesc[0].res.linear.sizeInBytes
-        err = getDescInfo(&pResDesc[0].res.linear.desc, &numChannels, &format)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        _driver_pResDesc[0].res.linear.format      = format
-        _driver_pResDesc[0].res.linear.numChannels = numChannels
-    elif pResDesc[0].resType == cudaResourceType.cudaResourceTypePitch2D:
-        _driver_pResDesc[0].resType                  = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_PITCH2D
-        _driver_pResDesc[0].res.pitch2D.devPtr       = <cydriver.CUdeviceptr>pResDesc[0].res.pitch2D.devPtr
-        _driver_pResDesc[0].res.pitch2D.pitchInBytes = pResDesc[0].res.pitch2D.pitchInBytes
-        _driver_pResDesc[0].res.pitch2D.width        = pResDesc[0].res.pitch2D.width
-        _driver_pResDesc[0].res.pitch2D.height       = pResDesc[0].res.pitch2D.height
-        err = getDescInfo(&pResDesc[0].res.linear.desc, &numChannels, &format)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        _driver_pResDesc[0].res.pitch2D.format       = format
-        _driver_pResDesc[0].res.pitch2D.numChannels  = numChannels
-    else:
-        _setLastError(cudaErrorInvalidValue)
-        return cudaErrorInvalidValue
-    _driver_pResDesc[0].flags = 0
-
-    return err
-
-
-cdef cudaError_t getDriverEglFrame(cydriver.CUeglFrame *cuEglFrame, cudaEglFrame eglFrame) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef unsigned int i = 0
-
-    err = getDescInfo(&eglFrame.planeDesc[0].channelDesc, <int*>&cuEglFrame[0].numChannels, &cuEglFrame[0].cuFormat)
-    if err != cudaSuccess:
-        return err
-    for i in range(eglFrame.planeCount):
-        if eglFrame.frameType == cudaEglFrameTypeArray:
-            cuEglFrame[0].frame.pArray[i] = <cydriver.CUarray>eglFrame.frame.pArray[i]
-        else:
-            cuEglFrame[0].frame.pPitch[i] = eglFrame.frame.pPitch[i].ptr
-    cuEglFrame[0].width = eglFrame.planeDesc[0].width
-    cuEglFrame[0].height = eglFrame.planeDesc[0].height
-    cuEglFrame[0].depth = eglFrame.planeDesc[0].depth
-    cuEglFrame[0].pitch = eglFrame.planeDesc[0].pitch
-    cuEglFrame[0].planeCount = eglFrame.planeCount
-    if eglFrame.eglColorFormat == cudaEglColorFormatYUV420Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV422Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV422SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV444Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV444SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUYV422:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_422
-    elif eglFrame.eglColorFormat == cudaEglColorFormatUYVY422:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_422
-    elif eglFrame.eglColorFormat == cudaEglColorFormatARGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ARGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatRGBA:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RGBA
-    elif eglFrame.eglColorFormat == cudaEglColorFormatABGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ABGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBGRA:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BGRA
-    elif eglFrame.eglColorFormat == cudaEglColorFormatL:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_L
-    elif eglFrame.eglColorFormat == cudaEglColorFormatR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_R
-    elif eglFrame.eglColorFormat == cudaEglColorFormatA:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_A
-    elif eglFrame.eglColorFormat == cudaEglColorFormatRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatAYUV:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU444SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU422SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_444SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_420SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_444SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_420SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatVYUY_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatUYVY_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUYV_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVYU_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUVA_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatAYUV_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV444Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV422Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV444SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV422SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU444Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU422Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420Planar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU444SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU422SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerRGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerBGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerGRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerGBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer10RGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer10BGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer10GRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer10GBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12RGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12BGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12GRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12GBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer14RGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer14BGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer14GRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer14GBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer20RGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer20BGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer20GRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer20GBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerIspRGGB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerIspBGGR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerIspGRBG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerIspGBRG:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU444Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU422Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420Planar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerBCCR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BCCR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerRCCB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RCCB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerCRBC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CRBC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayerCBRC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CBRC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer10CCCC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_CCCC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12BCCR:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BCCR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12RCCB:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RCCB
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12CRBC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CRBC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12CBRC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CBRC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatBayer12CCCC:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CCCC
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420SemiPlanar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420SemiPlanar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420Planar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420Planar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420SemiPlanar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420SemiPlanar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUV420Planar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVU420Planar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_420SemiPlanar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_420SemiPlanar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_422SemiPlanar_2020:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_422SemiPlanar:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_422SemiPlanar_709:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYUVA:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA
-    elif eglFrame.eglColorFormat == cudaEglColorFormatYVYU:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU
-    elif eglFrame.eglColorFormat == cudaEglColorFormatVYUY:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_420SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_444SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat =  cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_420SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_444SemiPlanar_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER
-    elif eglFrame.eglColorFormat == cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER:
-        cuEglFrame[0].eglColorFormat = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER
-    else:
-        return cudaErrorInvalidValue
-    if eglFrame.frameType == cudaEglFrameTypeArray:
-        cuEglFrame[0].frameType = cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_ARRAY
-    elif eglFrame.frameType == cudaEglFrameTypePitch:
-        cuEglFrame[0].frameType = cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_PITCH
-    else:
-        return cudaErrorInvalidValue
-
-
-@cython.show_performance_hints(False)
-cdef cudaError_t getRuntimeEglFrame(cudaEglFrame *eglFrame, cydriver.CUeglFrame cueglFrame) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef unsigned int i
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 ad
-    cdef cudaPitchedPtr pPtr
-    memset(eglFrame, 0, sizeof(eglFrame[0]))
-    memset(&ad, 0, sizeof(ad))
-    for i in range(cueglFrame.planeCount):
-        ad.Depth = cueglFrame.depth
-        ad.Flags = 0
-        ad.Format = cueglFrame.cuFormat
-        ad.Height = cueglFrame.height
-        ad.NumChannels = cueglFrame.numChannels
-        ad.Width = cueglFrame.width
-
-        err = getChannelFormatDescFromDriverDesc(&eglFrame[0].planeDesc[i].channelDesc, NULL, NULL, NULL, &ad)
-        if err != cudaSuccess:
-            return err
-
-        eglFrame[0].planeDesc[i].depth = cueglFrame.depth
-        eglFrame[0].planeDesc[i].numChannels = cueglFrame.numChannels
-        if i == 0:
-            eglFrame[0].planeDesc[i].width = cueglFrame.width
-            eglFrame[0].planeDesc[i].height = cueglFrame.height
-            eglFrame[0].planeDesc[i].pitch = cueglFrame.pitch
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709):
-            eglFrame[0].planeDesc[i].width = <unsigned int>(cueglFrame.width / 2)
-            eglFrame[0].planeDesc[i].height = <unsigned int>(cueglFrame.height / 2)
-            eglFrame[0].planeDesc[i].pitch = <unsigned int>(cueglFrame.pitch / 2)
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709 or 
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER):
-            eglFrame[0].planeDesc[i].width = <unsigned int>(cueglFrame.width / 2)
-            eglFrame[0].planeDesc[i].height = <unsigned int>(cueglFrame.height / 2)
-            eglFrame[0].planeDesc[i].pitch = <unsigned int>(cueglFrame.pitch / 2)
-            eglFrame[0].planeDesc[1].channelDesc.y = 8
-            if (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709 or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020 or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER):
-                eglFrame[0].planeDesc[1].channelDesc.y = 16
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER):
-            eglFrame[0].planeDesc[i].height = cueglFrame.height
-            eglFrame[0].planeDesc[i].width = <unsigned int>(cueglFrame.width / 2)
-            eglFrame[0].planeDesc[i].pitch = <unsigned int>(cueglFrame.pitch / 2)
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020 or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709):
-            eglFrame[0].planeDesc[i].width = <unsigned int>(cueglFrame.width / 2)
-            eglFrame[0].planeDesc[i].height = cueglFrame.height
-            eglFrame[0].planeDesc[i].pitch = <unsigned int>(cueglFrame.pitch / 2)
-            eglFrame[0].planeDesc[1].channelDesc.y = 8
-            if (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020 or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709):
-                eglFrame[0].planeDesc[1].channelDesc.y = 16
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER):
-            eglFrame[0].planeDesc[i].height = cueglFrame.height
-            eglFrame[0].planeDesc[i].width = cueglFrame.width
-            eglFrame[0].planeDesc[i].pitch = cueglFrame.pitch
-        elif (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER or
-              cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER):
-            eglFrame[0].planeDesc[i].height = cueglFrame.height
-            eglFrame[0].planeDesc[i].width = cueglFrame.width
-            eglFrame[0].planeDesc[i].pitch = cueglFrame.pitch
-            eglFrame[0].planeDesc[1].channelDesc.y = 8
-            if (cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER or
-                cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER):
-                eglFrame[0].planeDesc[1].channelDesc.y = 16
-        if cueglFrame.frameType == cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_ARRAY:
-            eglFrame[0].frame.pArray[i] = <cudaArray_t>cueglFrame.frame.pArray[i]
-        else:
-            pPtr = make_cudaPitchedPtr(cueglFrame.frame.pPitch[i], eglFrame[0].planeDesc[i].pitch,
-                    eglFrame[0].planeDesc[i].width, eglFrame[0].planeDesc[i].height)
-            eglFrame[0].frame.pPitch[i] = pPtr
-
-    eglFrame[0].planeCount = cueglFrame.planeCount
-    if cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV422Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV422SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV444Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV444SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_422:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUYV422
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_422:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatUYVY422
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ARGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatARGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RGBA:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatRGBA
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ABGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatABGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BGRA:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBGRA
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_L:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatL
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_R:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_A:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatA
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatAYUV
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU444SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU422SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_444SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_420SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_444SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_420SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatVYUY_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatUYVY_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUYV_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVYU_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUVA_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatAYUV_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV444Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV422Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV444SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV422SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU444Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU422Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420Planar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU444SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU422SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerRGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerBGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerGRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerGBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer10RGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer10BGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer10GRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer10GBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12RGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12BGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12GRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12GBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer14RGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer14BGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer14GRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer14GBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer20RGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer20BGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer20GRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer20GBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerIspRGGB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerIspBGGR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerIspGRBG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerIspGBRG
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU444Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU422Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420Planar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BCCR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerBCCR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RCCB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerRCCB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CRBC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerCRBC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CBRC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayerCBRC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_CCCC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer10CCCC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BCCR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12BCCR
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RCCB:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12RCCB
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CRBC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12CRBC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CBRC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12CBRC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CCCC:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatBayer12CCCC
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420SemiPlanar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420SemiPlanar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420Planar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420Planar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420SemiPlanar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420SemiPlanar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUV420Planar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVU420Planar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_420SemiPlanar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_420SemiPlanar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_422SemiPlanar_2020
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_422SemiPlanar
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_422SemiPlanar_709
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYUVA
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatYVYU
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatVYUY
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_420SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_444SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_420SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_444SemiPlanar_ER
-    elif cueglFrame.eglColorFormat == cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER:
-        eglFrame[0].eglColorFormat = cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER
-    else:
-        return cudaErrorInvalidValue
-    if cueglFrame.frameType == cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_ARRAY:
-        eglFrame[0].frameType = cudaEglFrameTypeArray
-    elif cueglFrame.frameType == cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_PITCH:
-        eglFrame[0].frameType = cudaEglFrameTypePitch
-    else:
-        return cudaErrorInvalidValue
-
-
-cdef cudaError_t toDriverGraphNodeParams(const cudaGraphNodeParams *rtParams, cydriver.CUgraphNodeParams *driverParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err
-    cdef cydriver.CUcontext context
-    memset(driverParams, 0, sizeof(driverParams[0]))
-
-    if rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeKernel:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_KERNEL
-        err = toDriverKernelNodeParams(<const cudaKernelNodeParams *>&rtParams[0].kernel, <cydriver.CUDA_KERNEL_NODE_PARAMS *>&driverParams[0].kernel)
-        if err != cudaSuccess:
-            return err
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeMemcpy:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEMCPY
-        err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        err = toDriverMemCopy3DParams(&rtParams[0].memcpy.copyParams, &driverParams[0].memcpy.copyParams)
-        if err != cudaSuccess:
-            return err
-        driverParams[0].memcpy.copyCtx = context
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeMemset:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEMSET
-        err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        toDriverMemsetNodeParams(<const cudaMemsetParams *>&rtParams[0].memset, <cydriver.CUDA_MEMSET_NODE_PARAMS *>&driverParams[0].memset)
-        driverParams[0].memset.ctx = context
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeHost:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_HOST
-        toDriverHostNodeParams(<const cudaHostNodeParams *>&rtParams[0].host, <cydriver.CUDA_HOST_NODE_PARAMS *>&driverParams[0].host)
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeGraph:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_GRAPH
-        driverParams[0].graph.graph = rtParams[0].graph.graph
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeEmpty:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EMPTY
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeWaitEvent:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_WAIT_EVENT
-        driverParams[0].eventWait.event = rtParams[0].eventWait.event
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeEventRecord:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EVENT_RECORD
-        driverParams[0].eventRecord.event = rtParams[0].eventRecord.event
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreSignal:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL
-        driverParams[0].extSemSignal = (<cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2 *>(&rtParams[0].extSemSignal))[0]
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreWait:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT
-        driverParams[0].extSemWait = (<cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2 *>&rtParams[0].extSemWait)[0]
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeMemAlloc:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEM_ALLOC
-        driverParams[0].alloc = (<cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2 *>&rtParams[0].alloc)[0]
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeMemFree:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEM_FREE
-        driverParams[0].free.dptr = <cydriver.CUdeviceptr>rtParams[0].free.dptr
-    elif rtParams[0].type == cudaGraphNodeType.cudaGraphNodeTypeConditional:
-        driverParams[0].type = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_CONDITIONAL
-        # RT params mirror the driver params except the RT struct lacks the ctx at the end.
-        memcpy(&driverParams[0].conditional, &rtParams[0].conditional, sizeof(rtParams[0].conditional))
-        err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-        if err != cudaSuccess:
-            _setLastError(err)
-            return err
-        driverParams[0].conditional.ctx = context
-    else:
-        return cudaErrorInvalidValue
-    return cudaSuccess
-
-
-cdef void toCudartGraphNodeOutParams(const cydriver.CUgraphNodeParams *driverParams, cudaGraphNodeParams *rtParams) noexcept nogil:
-    if driverParams[0].type == cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEM_ALLOC:
-        rtParams[0].alloc.dptr = <void *>driverParams[0].alloc.dptr
-    elif driverParams[0].type == cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_CONDITIONAL:
-        rtParams[0].conditional.phGraph_out = <cudaGraph_t *>driverParams[0].conditional.phGraph_out
-
-
-cdef cudaError_t toDriverKernelNodeParams(const cudaKernelNodeParams nodeParams[0], cydriver.CUDA_KERNEL_NODE_PARAMS *driverNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    cdef cydriver.CUcontext context
-    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-    driverNodeParams[0].func = <cydriver.CUfunction>nodeParams[0].func
-    driverNodeParams[0].kern = NULL
-    driverNodeParams[0].ctx = context
-    driverNodeParams[0].gridDimX = nodeParams[0].gridDim.x
-    driverNodeParams[0].gridDimY = nodeParams[0].gridDim.y
-    driverNodeParams[0].gridDimZ = nodeParams[0].gridDim.z
-    driverNodeParams[0].blockDimX = nodeParams[0].blockDim.x
-    driverNodeParams[0].blockDimY = nodeParams[0].blockDim.y
-    driverNodeParams[0].blockDimZ = nodeParams[0].blockDim.z
-    driverNodeParams[0].sharedMemBytes = nodeParams[0].sharedMemBytes
-    driverNodeParams[0].kernelParams = nodeParams[0].kernelParams
-    driverNodeParams[0].extra = nodeParams[0].extra
-    return err
-
-
-cdef void toDriverHostNodeParams(const cudaHostNodeParams *pRuntimeNodeParams, cydriver.CUDA_HOST_NODE_PARAMS *pDriverNodeParams) noexcept nogil:
-    pDriverNodeParams[0].fn = pRuntimeNodeParams[0].fn
-    pDriverNodeParams[0].userData = pRuntimeNodeParams[0].userData
-
-
-@cython.show_performance_hints(False)
-cdef void cudaAsyncNotificationCallbackWrapper(cudaAsyncNotificationInfo_t *info, void *data, cudaAsyncCallbackHandle_t handle) nogil:
-    cdef cudaAsyncCallbackData *cbData = <cudaAsyncCallbackData *>data
-    with gil:
-        cbData.callback(info, cbData.userData, handle)
-
-
-cdef cudaError_t DeviceRegisterAsyncNotificationCommon(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaAsyncCallbackData *cbData = NULL
-    cdef cudaError_t err = cudaSuccess
-    cbData = <cudaAsyncCallbackData *>malloc(sizeof(cbData[0]))
-
-    if cbData == NULL:
-        return cudaErrorMemoryAllocation
-
-    cbData.callback = callbackFunc
-    cbData.userData = userData
-    err = <cudaError_t>cydriver._cuDeviceRegisterAsyncNotification(<cydriver.CUdevice>device, <cydriver.CUasyncCallback>cudaAsyncNotificationCallbackWrapper, <void*>cbData, <cydriver.CUasyncCallbackHandle*>callback)
-    if err != cudaSuccess:
-        free(cbData)
-
-    m_global._asyncCallbackDataMap[callback[0]] = cbData
-
-    return err
-
-cdef cudaError_t DeviceUnregisterAsyncNotificationCommon(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    cdef cudaError_t err = cudaSuccess
-    err = <cudaError_t>cydriver._cuDeviceUnregisterAsyncNotification(<cydriver.CUdevice>device, <cydriver.CUasyncCallbackHandle>callback)
-    if err != cudaSuccess:
-        _setLastError(err)
-        return err
-
-    free(m_global._asyncCallbackDataMap[callback])
-    m_global._asyncCallbackDataMap.erase(callback)
-
-    return err
diff --git a/cuda_bindings/cuda/bindings/_lib/dlfcn.pxd b/cuda_bindings/cuda/bindings/_lib/dlfcn.pxd
deleted file mode 100644
index 808c7186..00000000
--- a/cuda_bindings/cuda/bindings/_lib/dlfcn.pxd
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cdef extern from "<dlfcn.h>" nogil:
-    void *dlopen(const char *, int)
-    char *dlerror()
-    void *dlsym(void *, const char *)
-    int dlclose(void *)
-
-    enum:
-        RTLD_LAZY
-        RTLD_NOW
-        RTLD_GLOBAL
-        RTLD_LOCAL
\ No newline at end of file
diff --git a/cuda_bindings/cuda/bindings/_lib/param_packer.cpp b/cuda_bindings/cuda/bindings/_lib/param_packer.cpp
deleted file mode 100644
index 63386a17..00000000
--- a/cuda_bindings/cuda/bindings/_lib/param_packer.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-//
-// Please refer to the NVIDIA end user license agreement (EULA) associated
-// with this source code for terms and conditions that govern your use of
-// this software. Any use, reproduction, disclosure, or distribution of
-// this software and related documentation outside the terms of the EULA
-// is strictly prohibited.
-#include <Python.h>
-#include "param_packer.h"
-
-#include <map>
-#include <functional>
-#include <stdexcept>
-#include <string>
-
-PyObject* enum_module = nullptr;
-PyTypeObject* enum_Enum = nullptr;
-
-PyObject* ctypes_module = nullptr;
-PyObject* ctypes_addressof = nullptr;
-PyObject* addressof_param_tuple = nullptr;
-
-PyTypeObject* ctypes_c_char = nullptr;
-PyTypeObject* ctypes_c_bool = nullptr;
-PyTypeObject* ctypes_c_wchar = nullptr;
-PyTypeObject* ctypes_c_byte = nullptr;
-PyTypeObject* ctypes_c_ubyte = nullptr;
-PyTypeObject* ctypes_c_short = nullptr;
-PyTypeObject* ctypes_c_ushort = nullptr;
-PyTypeObject* ctypes_c_int = nullptr;
-PyTypeObject* ctypes_c_uint = nullptr;
-PyTypeObject* ctypes_c_long = nullptr;
-PyTypeObject* ctypes_c_ulong = nullptr;
-PyTypeObject* ctypes_c_longlong = nullptr;
-PyTypeObject* ctypes_c_ulonglong = nullptr;
-PyTypeObject* ctypes_c_size_t = nullptr;
-PyTypeObject* ctypes_c_float = nullptr;
-PyTypeObject* ctypes_c_double = nullptr;
-PyTypeObject* ctypes_c_void_p = nullptr;
-
-PyTypeObject* ctypes_c_ssize_t = nullptr;
-PyTypeObject* ctypes_c_longdouble = nullptr;
-PyTypeObject* ctypes_c_char_p = nullptr;
-PyTypeObject* ctypes_c_wchar_p = nullptr;
-PyTypeObject* ctypes_c_structure = nullptr;
-
-void fetch_ctypes()
-{
-    ctypes_module = PyImport_ImportModule("ctypes");
-    if (ctypes_module == nullptr)
-        throw std::runtime_error("Cannot import ctypes module");
-    // get method addressof
-    PyObject* ctypes_dict = PyModule_GetDict(ctypes_module);
-    if (ctypes_dict == nullptr)
-        throw std::runtime_error(std::string("FAILURE @ ") + std::string(__FILE__) + " : " + std::to_string(__LINE__));
-    // supportedtypes
-    ctypes_c_int = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_int");
-    ctypes_c_char = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_char");
-    ctypes_c_bool = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_bool");
-    ctypes_c_wchar = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_wchar");
-    ctypes_c_byte = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_byte");
-    ctypes_c_ubyte = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_ubyte");
-    ctypes_c_short = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_short");
-    ctypes_c_ushort = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_ushort");
-    ctypes_c_int = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_int");
-    ctypes_c_uint = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_uint");
-    ctypes_c_long = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_long");
-    ctypes_c_ulong = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_ulong");
-    ctypes_c_longlong = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_longlong");
-    ctypes_c_ulonglong = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_ulonglong");
-    ctypes_c_size_t = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_size_t");
-    ctypes_c_float = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_float");
-    ctypes_c_double = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_double");
-    ctypes_c_void_p = (PyTypeObject*) PyDict_GetItemString(ctypes_dict, "c_void_p"); // == c_voidp
-}
-
-
-// (target type, source type)
-std::map<std::pair<PyTypeObject*,PyTypeObject*>, std::function<int(void*, PyObject*)>> m_feeders;
-
-void populate_feeders(PyTypeObject* target_t, PyTypeObject* source_t)
-{
-    if (target_t == ctypes_c_int)
-    {
-        if (source_t == &PyLong_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((int*)ptr) = (int)PyLong_AsLong(value);
-                return sizeof(int);
-            };
-            return;
-        }
-    } else if (target_t == ctypes_c_bool) {
-        if (source_t == &PyBool_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((bool*)ptr) = (value == Py_True);
-                return sizeof(bool);
-            };
-            return;
-        }
-    } else if (target_t == ctypes_c_byte) {
-        if (source_t == &PyLong_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((int8_t*)ptr) = (int8_t)PyLong_AsLong(value);
-                return sizeof(int8_t);
-            };
-            return;
-        }
-    } else if (target_t == ctypes_c_double) {
-        if (source_t == &PyFloat_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((double*)ptr) = (double)PyFloat_AsDouble(value);
-                return sizeof(double);
-            };
-            return;
-        }
-    } else if (target_t == ctypes_c_float) {
-        if (source_t == &PyFloat_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((float*)ptr) = (float)PyFloat_AsDouble(value);
-                return sizeof(float);
-            };
-            return;
-        }
-    } else if (target_t == ctypes_c_longlong) {
-        if (source_t == &PyLong_Type)
-        {
-            m_feeders[{target_t,source_t}] = [](void* ptr, PyObject* value) -> int
-            {
-                *((long long*)ptr) = (long long)PyLong_AsLongLong(value);
-                return sizeof(long long);
-            };
-            return;
-        }
-    }
-}
-
-int feed(void* ptr, PyObject* value, PyObject* type)
-{
-    PyTypeObject* pto = (PyTypeObject*)type;
-    if (ctypes_c_int == nullptr)
-        fetch_ctypes();
-    auto found = m_feeders.find({pto,value->ob_type});
-    if (found == m_feeders.end())
-    {
-        populate_feeders(pto, value->ob_type);
-        found = m_feeders.find({pto,value->ob_type});
-    }
-    if (found != m_feeders.end())
-    {
-        return found->second(ptr, value);
-    }
-    return 0;
-}
diff --git a/cuda_bindings/cuda/bindings/_lib/param_packer.h b/cuda_bindings/cuda/bindings/_lib/param_packer.h
deleted file mode 100644
index 2dfa4deb..00000000
--- a/cuda_bindings/cuda/bindings/_lib/param_packer.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-//
-// Please refer to the NVIDIA end user license agreement (EULA) associated
-// with this source code for terms and conditions that govern your use of
-// this software. Any use, reproduction, disclosure, or distribution of
-// this software and related documentation outside the terms of the EULA
-// is strictly prohibited.
-#pragma once
-#include <Python.h>
-
-int feed(void* ptr,  PyObject* value, PyObject* type);
diff --git a/cuda_bindings/cuda/bindings/_lib/param_packer.pxd b/cuda_bindings/cuda/bindings/_lib/param_packer.pxd
deleted file mode 100644
index 440006f5..00000000
--- a/cuda_bindings/cuda/bindings/_lib/param_packer.pxd
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cdef extern from "param_packer.h":
-    int feed(void* ptr, object o, object ct)
diff --git a/cuda_bindings/cuda/bindings/_lib/utils.pxd.in b/cuda_bindings/cuda/bindings/_lib/utils.pxd.in
deleted file mode 100644
index e2022a36..00000000
--- a/cuda_bindings/cuda/bindings/_lib/utils.pxd.in
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings.driver as driver
-cimport cuda.bindings.cydriver as cydriver
-from libcpp.vector cimport vector
-
-cdef class HelperKernelParams:
-    cdef Py_buffer _pybuffer
-    cdef bint _pyobj_acquired
-    cdef void** _ckernelParams
-    cdef char* _ckernelParamsData
-    cdef int _length
-    cdef bint _malloc_list_created
-
-cdef class HelperInputVoidPtr:
-    cdef Py_buffer _pybuffer
-    cdef void* _cptr
-    cdef bint _pyobj_acquired
-{{if 'CUmemPool_attribute_enum' in found_types}}
-
-cdef class HelperCUmemPool_attribute:
-    cdef void* _cptr
-    cdef cydriver.CUmemPool_attribute_enum _attr
-    cdef bint _is_getter
-
-    # Return values
-    cdef int _int_val
-    cdef driver.cuuint64_t _cuuint64_t_val
-{{endif}}
-{{if 'CUmem_range_attribute_enum' in found_types}}
-
-cdef class HelperCUmem_range_attribute:
-    cdef void* _cptr
-    cdef cydriver.CUmem_range_attribute_enum _attr
-    cdef size_t _data_size
-
-    # Return values
-    cdef int _int_val # 32 bit integer
-    cdef int* _int_val_list # 32 bit integer array
-{{endif}}
-{{if 'CUpointer_attribute_enum' in found_types}}
-
-cdef class HelperCUpointer_attribute:
-    cdef void* _cptr
-    cdef cydriver.CUpointer_attribute_enum _attr
-    cdef bint _is_getter
-
-    # Return values
-    cdef driver.CUcontext _ctx
-    cdef unsigned int _uint
-    cdef driver.CUdeviceptr _devptr
-    cdef void** _void
-    cdef driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS _token
-    cdef bint _bool
-    cdef unsigned long long _ull
-    cdef size_t _size
-    cdef driver.CUmemoryPool _mempool
-{{endif}}
-{{if 'CUgraphMem_attribute_enum' in found_types}}
-
-cdef class HelperCUgraphMem_attribute:
-    cdef void* _cptr
-    cdef cydriver.CUgraphMem_attribute_enum _attr
-    cdef bint _is_getter
-
-    # Return values
-    cdef driver.cuuint64_t _cuuint64_t_val
-{{endif}}
-{{if 'CUjit_option_enum' in found_types}}
-
-cdef class HelperCUjit_option:
-    cdef void* _cptr
-    cdef cydriver.CUjit_option_enum _attr
-
-    # Return values
-    cdef unsigned int _uint
-    cdef float _float
-    cdef char* _charstar
-    cdef cydriver.CUjit_target_enum _target
-    cdef cydriver.CUjit_fallback_enum _fallback
-    cdef int _int
-    cdef cydriver.CUjit_cacheMode_enum _cacheMode
-    cdef vector[char*] _charstarstar # list of names
-    cdef InputVoidPtrPtrHelper _voidstarstar # list of addresses
-{{endif}}
-{{if 'CUlibraryOption_enum' in found_types}}
-
-cdef class HelperCUlibraryOption:
-    cdef void* _cptr
-    cdef cydriver.CUlibraryOption_enum _attr
-
-    # Return values
-    cdef unsigned int _uint
-{{endif}}
-{{if 'CUmemAllocationHandleType_enum' in found_types}}
-
-cdef class HelperCUmemAllocationHandleType:
-    cdef void* _cptr
-    cdef cydriver.CUmemAllocationHandleType_enum _type
-
-    # Return values
-    cdef int _int
-    cdef void* _handle
-    cdef unsigned int _d3dkmt_handle
-    cdef driver.CUmemFabricHandle _mem_fabric_handle
-{{endif}}
-
-cdef class InputVoidPtrPtrHelper:
-    cdef void** _cptr
-
-{{if 'CUcoredumpSettings_enum' in found_types}}
-
-cdef class HelperCUcoredumpSettings:
-    cdef void* _cptr
-    cdef cydriver.CUcoredumpSettings_enum _attrib
-    cdef bint _is_getter
-    cdef size_t _size
-
-    # Return values
-    cdef bint _bool
-    cdef char* _charstar
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_lib/utils.pyx.in b/cuda_bindings/cuda/bindings/_lib/utils.pyx.in
deleted file mode 100644
index 15bafa99..00000000
--- a/cuda_bindings/cuda/bindings/_lib/utils.pyx.in
+++ /dev/null
@@ -1,593 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
-from libc.stdlib cimport calloc, free
-from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
-from libc.stddef cimport wchar_t
-from libc.string cimport memcpy
-from enum import Enum
-from typing import List, Tuple
-import ctypes
-cimport cuda.bindings.cydriver as cydriver
-import cuda.bindings.driver as driver
-cimport cuda.bindings._lib.param_packer as param_packer
-
-ctypedef unsigned long long void_ptr
-
-cdef void* callocWrapper(length, size):
-    cdef void* out = calloc(length, size)
-    if out is NULL:
-        raise MemoryError('Failed to allocated length x size memory: {}x{}'.format(length, size))
-    return out
-
-cdef class HelperKernelParams:
-    supported_types = { # excluding void_p and None, which are handled specially
-        ctypes.c_bool,
-        ctypes.c_char,
-        ctypes.c_wchar,
-        ctypes.c_byte,
-        ctypes.c_ubyte,
-        ctypes.c_short,
-        ctypes.c_ushort,
-        ctypes.c_int,
-        ctypes.c_uint,
-        ctypes.c_long,
-        ctypes.c_ulong,
-        ctypes.c_longlong,
-        ctypes.c_ulonglong,
-        ctypes.c_size_t,
-        ctypes.c_float,
-        ctypes.c_double
-    }
-
-    max_param_size = max(ctypes.sizeof(max(HelperKernelParams.supported_types, key=lambda t:ctypes.sizeof(t))), sizeof(void_ptr))
-
-    def __cinit__(self, kernelParams):
-        self._pyobj_acquired = False
-        self._malloc_list_created = False
-        if kernelParams is None:
-            self._ckernelParams = NULL
-        elif isinstance(kernelParams, (int)):
-            # Easy run, user gave us an already configured void** address
-            self._ckernelParams = <void**><void_ptr>kernelParams
-        elif PyObject_CheckBuffer(kernelParams):
-            # Easy run, get address from Python Buffer Protocol
-            err_buffer = PyObject_GetBuffer(kernelParams, &self._pybuffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
-            if err_buffer == -1:
-                raise RuntimeError("Argument 'kernelParams' failed to retrieve buffer through Buffer Protocol")
-            self._pyobj_acquired = True
-            self._ckernelParams = <void**><void_ptr>self._pybuffer.buf
-        elif isinstance(kernelParams, (Tuple)) and len(kernelParams) == 2 and isinstance(kernelParams[0], (Tuple)) and isinstance(kernelParams[1], (Tuple)):
-            # Hard run, construct and fill out contigues memory using provided kernel values and types based
-            if len(kernelParams[0]) != len(kernelParams[1]):
-                raise TypeError("Argument 'kernelParams' has tuples with different length")
-            if len(kernelParams[0]) != 0:
-                self._length = len(kernelParams[0])
-                self._ckernelParams = <void**>callocWrapper(len(kernelParams[0]), sizeof(void*))
-                self._ckernelParamsData = <char*>callocWrapper(len(kernelParams[0]), HelperKernelParams.max_param_size)
-                self._malloc_list_created = True
-
-            idx = 0
-            data_idx = 0
-            for value, ctype in zip(kernelParams[0], kernelParams[1]):
-                if ctype is None:
-                    # special cases for None
-                    if callable(getattr(value, 'getPtr', None)):
-                        self._ckernelParams[idx] = <void*><void_ptr>value.getPtr()
-                    elif isinstance(value, (ctypes.Structure)):
-                        self._ckernelParams[idx] = <void*><void_ptr>ctypes.addressof(value)
-                    elif isinstance(value, (Enum)):
-                        self._ckernelParams[idx] = &(self._ckernelParamsData[data_idx])
-                        (<int*>self._ckernelParams[idx])[0] = value.value
-                        data_idx += sizeof(int)
-                    else:
-                        raise TypeError("Provided argument is of type {} but expected Type {}, {} or CUDA Binding structure with getPtr() attribute".format(type(value), type(ctypes.Structure), type(ctypes.c_void_p)))
-                elif ctype in HelperKernelParams.supported_types:
-                    self._ckernelParams[idx] = &(self._ckernelParamsData[data_idx])
-
-                    # handle case where a float is passed as a double
-                    if ctype == ctypes.c_double and isinstance(value, ctypes.c_float):
-                        value = ctype(value.value)
-                    if not isinstance(value, ctype): # make it a ctype
-                        size = param_packer.feed(self._ckernelParams[idx], value, ctype)
-                        if size == 0: # feed failed
-                            value = ctype(value)
-                            size = ctypes.sizeof(ctype)
-                            addr = <void*>(<void_ptr>ctypes.addressof(value))
-                            memcpy(self._ckernelParams[idx], addr, size)
-                    else:
-                        size = ctypes.sizeof(ctype)
-                        addr = <void*>(<void_ptr>ctypes.addressof(value))
-                        memcpy(self._ckernelParams[idx], addr, size)
-                    data_idx += size
-                elif ctype == ctypes.c_void_p:
-                    # special cases for void_p
-                    if isinstance(value, (int, ctypes.c_void_p)):
-                        self._ckernelParams[idx] = &(self._ckernelParamsData[data_idx])
-                        (<void_ptr*>self._ckernelParams[idx])[0] = value.value if isinstance(value, (ctypes.c_void_p)) else value
-                        data_idx += sizeof(void_ptr)
-                    elif callable(getattr(value, 'getPtr', None)):
-                        self._ckernelParams[idx] = &(self._ckernelParamsData[data_idx])
-                        (<void_ptr*>self._ckernelParams[idx])[0] = value.getPtr()
-                        data_idx += sizeof(void_ptr)
-                    else:
-                        raise TypeError("Provided argument is of type {} but expected Type {}, {} or CUDA Binding structure with getPtr() attribute".format(type(value), type(int), type(ctypes.c_void_p)))
-                else:
-                    raise TypeError("Unsupported type: " + str(type(ctype)))
-                idx += 1
-        else:
-            raise TypeError("Argument 'kernelParams' is not a valid type: Tuple[Tuple[Any, ...], Tuple[Any, ...]] or PyObject implimenting Buffer Protocol or Int")
-
-    def __dealloc__(self):
-        if self._pyobj_acquired is True:
-            PyBuffer_Release(&self._pybuffer)
-        if self._malloc_list_created is True:
-            free(self._ckernelParams)
-            free(self._ckernelParamsData)
-
-    @property
-    def ckernelParams(self):
-        return <void_ptr>self._ckernelParams
-
-cdef class HelperInputVoidPtr:
-    def __cinit__(self, ptr):
-        self._pyobj_acquired = False
-        if ptr is None:
-            self._cptr = NULL
-        elif isinstance(ptr, (int)):
-            # Easy run, user gave us an already configured void** address
-            self._cptr = <void*><void_ptr>ptr
-        elif isinstance(ptr, (driver.CUdeviceptr)):
-            self._cptr = <void*><void_ptr>int(ptr)
-        elif PyObject_CheckBuffer(ptr):
-            # Easy run, get address from Python Buffer Protocol
-            err_buffer = PyObject_GetBuffer(ptr, &self._pybuffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
-            if err_buffer == -1:
-                raise RuntimeError("Failed to retrieve buffer through Buffer Protocol")
-            self._pyobj_acquired = True
-            self._cptr = <void*><void_ptr>self._pybuffer.buf
-        else:
-            raise TypeError("Provided argument is of type {} but expected Type {}, {} or object with Buffer Protocol".format(type(ptr), type(None), type(int)))
-
-    def __dealloc__(self):
-        if self._pyobj_acquired is True:
-            PyBuffer_Release(&self._pybuffer)
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-{{if 'CUmemPool_attribute_enum' in found_types}}
-
-cdef class HelperCUmemPool_attribute:
-    def __cinit__(self, attr, init_value, is_getter=False):
-        self._is_getter = is_getter
-        self._attr = attr.value
-        if self._attr in ({{if 'CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,{{endif}}
-                          {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,{{endif}}
-                          {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,{{endif}}):
-            self._int_val = init_value
-            self._cptr = <void*>&self._int_val
-        elif self._attr in ({{if 'CU_MEMPOOL_ATTR_RELEASE_THRESHOLD'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_USED_MEM_CURRENT'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_CURRENT,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_USED_MEM_HIGH'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_HIGH,{{endif}}):
-            if self._is_getter:
-                self._cuuint64_t_val = driver.cuuint64_t()
-                self._cptr = <void*><void_ptr>self._cuuint64_t_val.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def pyObj(self):
-        assert(self._is_getter == True)
-        if self._attr in ({{if 'CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,{{endif}}
-                          {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,{{endif}}
-                          {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,{{endif}}):
-            return self._int_val
-        elif self._attr in ({{if 'CU_MEMPOOL_ATTR_RELEASE_THRESHOLD'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_USED_MEM_CURRENT'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_CURRENT,{{endif}}
-                            {{if 'CU_MEMPOOL_ATTR_USED_MEM_HIGH'}}cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_HIGH,{{endif}}):
-            return self._cuuint64_t_val
-        else:
-            raise TypeError('Unsupported attribute value: {}'.format(self._attr))
-{{endif}}
-{{if 'CUmem_range_attribute_enum' in found_types}}
-
-cdef class HelperCUmem_range_attribute:
-    def __cinit__(self, attr, data_size):
-        self._data_size = data_size
-        self._attr = attr.value
-        if self._attr in ({{if 'CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,{{endif}}
-                          {{if 'CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,{{endif}}
-                          {{if 'CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,{{endif}}):
-            self._cptr = <void*>&self._int_val
-        elif self._attr in ({{if 'CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,{{endif}}):
-            self._cptr = callocWrapper(1, self._data_size)
-            self._int_val_list = <int*>self._cptr
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        if self._attr in ({{if 'CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,{{endif}}):
-            free(self._cptr)
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def pyObj(self):
-        if self._attr in ({{if 'CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,{{endif}}
-                          {{if 'CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,{{endif}}
-                          {{if 'CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION,{{endif}}):
-            return self._int_val
-        elif self._attr in ({{if 'CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY'}}cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,{{endif}}):
-            return [self._int_val_list[idx] for idx in range(int(self._data_size/4))]
-        else:
-            raise TypeError('Unsupported attribute value: {}'.format(self._attr))
-{{endif}}
-{{if 'CUpointer_attribute_enum' in found_types}}
-
-cdef class HelperCUpointer_attribute:
-    def __cinit__(self, attr, init_value, is_getter=False):
-        self._is_getter = is_getter
-        self._attr = attr.value
-        if self._attr in ({{if 'CU_POINTER_ATTRIBUTE_CONTEXT'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_CONTEXT,{{endif}}):
-            if self._is_getter:
-                self._ctx = driver.CUcontext()
-                self._cptr = <void*><void_ptr>self._ctx.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_MEMORY_TYPE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAGS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS,{{endif}}):
-            self._uint = init_value
-            self._cptr = <void*>&self._uint
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_DEVICE_POINTER'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_RANGE_START_ADDR'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR,{{endif}}):
-            if self._is_getter:
-                self._devptr = driver.CUdeviceptr()
-                self._cptr = <void*><void_ptr>self._devptr.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_HOST_POINTER'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_HOST_POINTER,{{endif}}):
-            self._void = <void**><void_ptr>init_value
-            self._cptr = <void*>&self._void
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_P2P_TOKENS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_P2P_TOKENS,{{endif}}):
-            if self._is_getter:
-                self._token = driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS()
-                self._cptr = <void*><void_ptr>self._token.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_SYNC_MEMOPS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_MANAGED'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_MANAGED,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_MAPPED'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MAPPED,{{endif}}):
-            self._bool = init_value
-            self._cptr = <void*>&self._bool
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_BUFFER_ID'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_BUFFER_ID,{{endif}}):
-            self._ull = init_value
-            self._cptr = <void*>&self._ull
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_RANGE_SIZE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_SIZE,{{endif}}):
-            self._size = init_value
-            self._cptr = <void*>&self._size
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE,{{endif}}):
-            if self._is_getter:
-                self._mempool = driver.CUmemoryPool()
-                self._cptr = <void*><void_ptr>self._mempool.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def pyObj(self):
-        assert(self._is_getter == True)
-        if self._attr in ({{if 'CU_POINTER_ATTRIBUTE_CONTEXT'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_CONTEXT,{{endif}}):
-            return self._ctx
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_MEMORY_TYPE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAGS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS,{{endif}}):
-            return self._uint
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_DEVICE_POINTER'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_RANGE_START_ADDR'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR,{{endif}}):
-            return self._devptr
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_HOST_POINTER'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_HOST_POINTER,{{endif}}):
-            return <void_ptr>self._void
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_P2P_TOKENS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_P2P_TOKENS,{{endif}}):
-            return self._token
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_SYNC_MEMOPS'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_MANAGED'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_MANAGED,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE,{{endif}}
-                            {{if 'CU_POINTER_ATTRIBUTE_MAPPED'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MAPPED,{{endif}}):
-            return self._bool
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_BUFFER_ID'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_BUFFER_ID,{{endif}}):
-            return self._ull
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_RANGE_SIZE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_SIZE,{{endif}}):
-            return self._size
-        elif self._attr in ({{if 'CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE'}}cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE,{{endif}}):
-            return self._mempool
-        else:
-            raise TypeError('Unsupported attribute value: {}'.format(self._attr))
-{{endif}}
-{{if 'CUgraphMem_attribute_enum' in found_types}}
-
-cdef class HelperCUgraphMem_attribute:
-    def __cinit__(self, attr, init_value, is_getter=False):
-        self._is_getter = is_getter
-        self._attr = attr.value
-        if self._attr in ({{if 'CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_USED_MEM_HIGH' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH,{{endif}}):
-            if self._is_getter:
-                self._cuuint64_t_val = driver.cuuint64_t()
-                self._cptr = <void*><void_ptr>self._cuuint64_t_val.getPtr()
-            else:
-                self._cptr = <void*><void_ptr>init_value.getPtr()
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def pyObj(self):
-        assert(self._is_getter == True)
-        if self._attr in ({{if 'CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_USED_MEM_HIGH' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT,{{endif}}
-                          {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH' in found_values}}cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH,{{endif}}):
-            return self._cuuint64_t_val
-        else:
-            raise TypeError('Unsupported attribute value: {}'.format(self._attr))
-{{endif}}
-{{if 'CUjit_option_enum' in found_types}}
-
-cdef class HelperCUjit_option:
-    def __cinit__(self, attr, init_value):
-        self._attr = attr.value
-        if self._attr in ({{if 'CU_JIT_MAX_REGISTERS' in found_values}}cydriver.CUjit_option_enum.CU_JIT_MAX_REGISTERS,{{endif}}
-                          {{if 'CU_JIT_THREADS_PER_BLOCK' in found_values}}cydriver.CUjit_option_enum.CU_JIT_THREADS_PER_BLOCK,{{endif}}
-                          {{if 'CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,{{endif}}
-                          {{if 'CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,{{endif}}
-                          {{if 'CU_JIT_OPTIMIZATION_LEVEL' in found_values}}cydriver.CUjit_option_enum.CU_JIT_OPTIMIZATION_LEVEL,{{endif}}
-                          {{if 'CU_JIT_GLOBAL_SYMBOL_COUNT' in found_values}}cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_COUNT,{{endif}}
-                          {{if 'CU_JIT_TARGET_FROM_CUCONTEXT' in found_values}}cydriver.CUjit_option_enum.CU_JIT_TARGET_FROM_CUCONTEXT,{{endif}}
-                          {{if 'CU_JIT_REFERENCED_KERNEL_COUNT' in found_values}}cydriver.CUjit_option_enum.CU_JIT_REFERENCED_KERNEL_COUNT,{{endif}}
-                          {{if 'CU_JIT_REFERENCED_VARIABLE_COUNT' in found_values}}cydriver.CUjit_option_enum.CU_JIT_REFERENCED_VARIABLE_COUNT,{{endif}}
-                          {{if 'CU_JIT_MIN_CTA_PER_SM' in found_values}}cydriver.CUjit_option_enum.CU_JIT_MIN_CTA_PER_SM,{{endif}}):
-            self._uint = init_value
-            self._cptr = <void*><void_ptr>self._uint
-        elif self._attr in ({{if 'CU_JIT_WALL_TIME' in found_values}}cydriver.CUjit_option_enum.CU_JIT_WALL_TIME,{{endif}}):
-            self._float = init_value
-            self._cptr = <void*><void_ptr>self._float
-        elif self._attr in ({{if 'CU_JIT_INFO_LOG_BUFFER' in found_values}}cydriver.CUjit_option_enum.CU_JIT_INFO_LOG_BUFFER,{{endif}}
-                            {{if 'CU_JIT_ERROR_LOG_BUFFER' in found_values}}cydriver.CUjit_option_enum.CU_JIT_ERROR_LOG_BUFFER{{endif}}):
-            self._charstar = init_value
-            self._cptr = <void*><void_ptr>self._charstar
-        elif self._attr in ({{if 'CU_JIT_TARGET' in found_values}}cydriver.CUjit_option_enum.CU_JIT_TARGET,{{endif}}):
-            self._target = init_value.value
-            self._cptr = <void*><void_ptr>self._target
-        elif self._attr in ({{if 'CU_JIT_FALLBACK_STRATEGY' in found_values}}cydriver.CUjit_option_enum.CU_JIT_FALLBACK_STRATEGY,{{endif}}):
-            self._fallback = init_value.value
-            self._cptr = <void*><void_ptr>self._fallback
-        elif self._attr in ({{if 'CU_JIT_GENERATE_DEBUG_INFO' in found_values}}cydriver.CUjit_option_enum.CU_JIT_GENERATE_DEBUG_INFO,{{endif}}
-                            {{if 'CU_JIT_LOG_VERBOSE' in found_values}}cydriver.CUjit_option_enum.CU_JIT_LOG_VERBOSE,{{endif}}
-                            {{if 'CU_JIT_GENERATE_LINE_INFO' in found_values}}cydriver.CUjit_option_enum.CU_JIT_GENERATE_LINE_INFO,{{endif}}
-                            {{if 'CU_JIT_LTO' in found_values}}cydriver.CUjit_option_enum.CU_JIT_LTO,{{endif}}
-                            {{if 'CU_JIT_FTZ' in found_values}}cydriver.CUjit_option_enum.CU_JIT_FTZ,{{endif}}
-                            {{if 'CU_JIT_PREC_DIV' in found_values}}cydriver.CUjit_option_enum.CU_JIT_PREC_DIV,{{endif}}
-                            {{if 'CU_JIT_PREC_SQRT' in found_values}}cydriver.CUjit_option_enum.CU_JIT_PREC_SQRT,{{endif}}
-                            {{if 'CU_JIT_FMA' in found_values}}cydriver.CUjit_option_enum.CU_JIT_FMA,{{endif}}
-                            {{if 'CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES,{{endif}}):
-            self._int = init_value
-            self._cptr = <void*><void_ptr>self._int
-        elif self._attr in ({{if 'CU_JIT_CACHE_MODE' in found_values}}cydriver.CUjit_option_enum.CU_JIT_CACHE_MODE,{{endif}}):
-            self._cacheMode = init_value.value
-            self._cptr = <void*><void_ptr>self._cacheMode
-        elif self._attr in ({{if 'CU_JIT_GLOBAL_SYMBOL_NAMES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_NAMES,{{endif}}
-                            {{if 'CU_JIT_REFERENCED_KERNEL_NAMES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_REFERENCED_KERNEL_NAMES,{{endif}}
-                            {{if 'CU_JIT_REFERENCED_VARIABLE_NAMES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_REFERENCED_VARIABLE_NAMES,{{endif}}):
-            self._charstarstar = init_value
-            self._cptr = <void*>&self._charstarstar[0]
-        elif self._attr in ({{if 'CU_JIT_GLOBAL_SYMBOL_ADDRESSES' in found_values}}cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_ADDRESSES,{{endif}}):
-            pylist = [HelperInputVoidPtr(val) for val in init_value]
-            self._voidstarstar = InputVoidPtrPtrHelper(pylist)
-            self._cptr = <void*><void_ptr>self._voidstarstar.cptr
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-{{endif}}
-
-{{if 'CUlibraryOption_enum' in found_types}}
-
-cdef class HelperCUlibraryOption:
-    def __cinit__(self, attr, init_value):
-        self._attr = attr.value
-        if False:
-            pass
-        {{if 'CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE' in found_values}}
-        elif self._attr in (cydriver.CUlibraryOption_enum.CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE,):
-            self._cptr = <void*><void_ptr>init_value.getPtr()
-        {{endif}}
-        {{if 'CU_LIBRARY_BINARY_IS_PRESERVED' in found_values}}
-        elif self._attr in (cydriver.CUlibraryOption_enum.CU_LIBRARY_BINARY_IS_PRESERVED,):
-            self._uint = init_value
-            self._cptr = <void*><void_ptr>self._uint
-        {{endif}}
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-{{endif}}
-
-{{if 'CUmemAllocationHandleType_enum' in found_types}}
-
-cdef class HelperCUmemAllocationHandleType:
-    def __cinit__(self, attr):
-        self._type = attr.value
-        if False:
-            pass
-        {{if 'CU_MEM_HANDLE_TYPE_NONE' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_NONE,):
-            self._cptr = <void*>&self._int
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,):
-            self._cptr = <void*>&self._int
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_WIN32' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32,):
-            self._cptr = <void*>&self._handle
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_WIN32_KMT' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32_KMT,):
-            self._cptr = <void*>&self._d3dkmt_handle
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_FABRIC' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_FABRIC,):
-            self._mem_fabric_handle = driver.CUmemFabricHandle()
-            self._cptr = <void*><void_ptr>self._mem_fabric_handle.getPtr()
-        {{endif}}
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def pyObj(self):
-        if False:
-            pass
-        {{if 'CU_MEM_HANDLE_TYPE_NONE' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_NONE,):
-            return self._int
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,):
-            return self._int
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_WIN32' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32,):
-            return <void_ptr>self._handle
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_WIN32_KMT' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32_KMT,):
-            return self._d3dkmt_handle
-        {{endif}}
-        {{if 'CU_MEM_HANDLE_TYPE_FABRIC' in found_values}}
-        elif self._type in (cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_FABRIC,):
-            return self._mem_fabric_handle
-        {{endif}}
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(self._type))
-{{endif}}
-
-cdef class InputVoidPtrPtrHelper:
-    def __cinit__(self, lst):
-        self._cptr = <void**>callocWrapper(len(lst), sizeof(void*))
-        for idx in range(len(lst)):
-            self._cptr[idx] = <void*><void_ptr>lst[idx].cptr
-
-    def __dealloc__(self):
-        free(self._cptr)
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-{{if 'CUcoredumpSettings_enum' in found_types}}
-
-cdef class HelperCUcoredumpSettings:
-    def __cinit__(self, attr, init_value, is_getter=False):
-        self._is_getter = is_getter
-        self._attrib = attr.value
-        if self._attrib in ({{if 'CU_COREDUMP_FILE' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_FILE,{{endif}}
-                          {{if 'CU_COREDUMP_PIPE' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_PIPE,{{endif}}):
-            if self._is_getter:
-                self._charstar = <char*>callocWrapper(1024, 1)
-                self._cptr = <void*><void_ptr>self._charstar
-                self._size = 1024
-            else:
-                self._charstar = init_value
-                self._cptr = <void*><void_ptr>self._charstar
-                self._size = len(init_value)
-        elif self._attrib in ({{if 'CU_COREDUMP_ENABLE_ON_EXCEPTION' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_ON_EXCEPTION,{{endif}}
-                            {{if 'CU_COREDUMP_TRIGGER_HOST' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_TRIGGER_HOST,{{endif}}
-                            {{if 'CU_COREDUMP_LIGHTWEIGHT' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_LIGHTWEIGHT,{{endif}}
-                            {{if 'CU_COREDUMP_ENABLE_USER_TRIGGER' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_USER_TRIGGER,{{endif}}):
-            if self._is_getter == False:
-                self._bool = init_value
-            
-            self._cptr = <void*>&self._bool
-            self._size = 1
-        else:
-            raise TypeError('Unsupported attribute: {}'.format(attr.name))
-
-    def __dealloc__(self):
-        pass
-
-    @property
-    def cptr(self):
-        return <void_ptr>self._cptr
-
-    def size(self):
-        return self._size
-
-    def pyObj(self):
-        assert(self._is_getter == True)
-        if self._attrib in ({{if 'CU_COREDUMP_FILE' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_FILE,{{endif}}
-                          {{if 'CU_COREDUMP_PIPE' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_PIPE,{{endif}}):
-            return self._charstar
-        elif self._attrib in ({{if 'CU_COREDUMP_ENABLE_ON_EXCEPTION' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_ON_EXCEPTION,{{endif}}
-                            {{if 'CU_COREDUMP_TRIGGER_HOST' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_TRIGGER_HOST,{{endif}}
-                            {{if 'CU_COREDUMP_LIGHTWEIGHT' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_LIGHTWEIGHT,{{endif}}
-                            {{if 'CU_COREDUMP_ENABLE_USER_TRIGGER' in found_values}}cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_USER_TRIGGER,{{endif}}):
-            return self._bool
-        else:
-            raise TypeError('Unsupported attribute value: {}'.format(self._attrib))
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/_version.py b/cuda_bindings/cuda/bindings/_version.py
deleted file mode 100644
index 03e1a0b1..00000000
--- a/cuda_bindings/cuda/bindings/_version.py
+++ /dev/null
@@ -1,683 +0,0 @@
-
-# This file helps to compute a version number in source trees obtained from
-# git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (built by setup.py sdist) and build
-# directories (produced by setup.py build) will contain a much shorter file
-# that just contains the computed version number.
-
-# This file is released into the public domain.
-# Generated by versioneer-0.29
-# https://github.com/python-versioneer/python-versioneer
-
-"""Git implementation of _version.py."""
-
-import errno
-import os
-import re
-import subprocess
-import sys
-from typing import Any, Callable, Dict, List, Optional, Tuple
-import functools
-
-
-def get_keywords() -> Dict[str, str]:
-    """Get the keywords needed to look up the version information."""
-    # these strings will be replaced by git during git-archive.
-    # setup.py/versioneer.py will grep for the variable names, so they must
-    # each be defined on a line of their own. _version.py will just call
-    # get_keywords().
-    git_refnames = "$Format:%d$"
-    git_full = "$Format:%H$"
-    git_date = "$Format:%ci$"
-    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
-    return keywords
-
-
-class VersioneerConfig:
-    """Container for Versioneer configuration parameters."""
-
-    VCS: str
-    style: str
-    tag_prefix: str
-    parentdir_prefix: str
-    versionfile_source: str
-    verbose: bool
-
-
-def get_config() -> VersioneerConfig:
-    """Create, populate and return the VersioneerConfig() object."""
-    # these strings are filled in when 'setup.py versioneer' creates
-    # _version.py
-    cfg = VersioneerConfig()
-    cfg.VCS = "git"
-    cfg.style = "pep440"
-    cfg.tag_prefix = "v"
-    cfg.parentdir_prefix = "cuda-python-"
-    cfg.versionfile_source = "cuda/_version.py"
-    cfg.verbose = False
-    return cfg
-
-
-class NotThisMethod(Exception):
-    """Exception raised if a method is not valid for the current scenario."""
-
-
-LONG_VERSION_PY: Dict[str, str] = {}
-HANDLERS: Dict[str, Dict[str, Callable]] = {}
-
-
-def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
-    """Create decorator to mark a method as the handler of a VCS."""
-    def decorate(f: Callable) -> Callable:
-        """Store f in HANDLERS[vcs][method]."""
-        if vcs not in HANDLERS:
-            HANDLERS[vcs] = {}
-        HANDLERS[vcs][method] = f
-        return f
-    return decorate
-
-
-def run_command(
-    commands: List[str],
-    args: List[str],
-    cwd: Optional[str] = None,
-    verbose: bool = False,
-    hide_stderr: bool = False,
-    env: Optional[Dict[str, str]] = None,
-) -> Tuple[Optional[str], Optional[int]]:
-    """Call the given command(s)."""
-    assert isinstance(commands, list)
-    process = None
-
-    popen_kwargs: Dict[str, Any] = {}
-    if sys.platform == "win32":
-        # This hides the console window if pythonw.exe is used
-        startupinfo = subprocess.STARTUPINFO()
-        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
-        popen_kwargs["startupinfo"] = startupinfo
-
-    for command in commands:
-        try:
-            dispcmd = str([command] + args)
-            # remember shell=False, so use git.cmd on windows, not just git
-            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
-                                       stdout=subprocess.PIPE,
-                                       stderr=(subprocess.PIPE if hide_stderr
-                                               else None), **popen_kwargs)
-            break
-        except OSError as e:
-            if e.errno == errno.ENOENT:
-                continue
-            if verbose:
-                print("unable to run %s" % dispcmd)
-                print(e)
-            return None, None
-    else:
-        if verbose:
-            print("unable to find command, tried %s" % (commands,))
-        return None, None
-    stdout = process.communicate()[0].strip().decode()
-    if process.returncode != 0:
-        if verbose:
-            print("unable to run %s (error)" % dispcmd)
-            print("stdout was %s" % stdout)
-        return None, process.returncode
-    return stdout, process.returncode
-
-
-def versions_from_parentdir(
-    parentdir_prefix: str,
-    root: str,
-    verbose: bool,
-) -> Dict[str, Any]:
-    """Try to determine the version from the parent directory name.
-
-    Source tarballs conventionally unpack into a directory that includes both
-    the project name and a version string. We will also support searching up
-    two directory levels for an appropriately named parent directory
-    """
-    rootdirs = []
-
-    for _ in range(3):
-        dirname = os.path.basename(root)
-        if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
-        rootdirs.append(root)
-        root = os.path.dirname(root)  # up a level
-
-    if verbose:
-        print("Tried directories %s but none started with prefix %s" %
-              (str(rootdirs), parentdir_prefix))
-    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
-
-
-@register_vcs_handler("git", "get_keywords")
-def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
-    """Extract version information from the given file."""
-    # the code embedded in _version.py can just fetch the value of these
-    # keywords. When used from setup.py, we don't want to import _version.py,
-    # so we do it with a regexp instead. This function is not used from
-    # _version.py.
-    keywords: Dict[str, str] = {}
-    try:
-        with open(versionfile_abs, "r") as fobj:
-            for line in fobj:
-                if line.strip().startswith("git_refnames ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["refnames"] = mo.group(1)
-                if line.strip().startswith("git_full ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["full"] = mo.group(1)
-                if line.strip().startswith("git_date ="):
-                    mo = re.search(r'=\s*"(.*)"', line)
-                    if mo:
-                        keywords["date"] = mo.group(1)
-    except OSError:
-        pass
-    return keywords
-
-
-@register_vcs_handler("git", "keywords")
-def git_versions_from_keywords(
-    keywords: Dict[str, str],
-    tag_prefix: str,
-    verbose: bool,
-) -> Dict[str, Any]:
-    """Get version information from git keywords."""
-    if "refnames" not in keywords:
-        raise NotThisMethod("Short version file found")
-    date = keywords.get("date")
-    if date is not None:
-        # Use only the last line.  Previous lines may contain GPG signature
-        # information.
-        date = date.splitlines()[-1]
-
-        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
-        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
-        # -like" string, which we must then edit to make compliant), because
-        # it's been around since git-1.5.3, and it's too difficult to
-        # discover which version we're using, or to work around using an
-        # older one.
-        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-    refnames = keywords["refnames"].strip()
-    if refnames.startswith("$Format"):
-        if verbose:
-            print("keywords are unexpanded, not using")
-        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
-    refs = {r.strip() for r in refnames.strip("()").split(",")}
-    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
-    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
-    TAG = "tag: "
-    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
-    if not tags:
-        # Either we're using git < 1.8.3, or there really are no tags. We use
-        # a heuristic: assume all version tags have a digit. The old git %d
-        # expansion behaves like git log --decorate=short and strips out the
-        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
-        # between branches and tags. By ignoring refnames without digits, we
-        # filter out many common branch names like "release" and
-        # "stabilization", as well as "HEAD" and "master".
-        tags = {r for r in refs if re.search(r'\d', r)}
-        if verbose:
-            print("discarding '%s', no digits" % ",".join(refs - tags))
-    if verbose:
-        print("likely tags: %s" % ",".join(sorted(tags)))
-    for ref in sorted(tags):
-        # sorting will prefer e.g. "2.0" over "2.0rc1"
-        if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
-            # Filter out refs that exactly match prefix or that don't start
-            # with a number once the prefix is stripped (mostly a concern
-            # when prefix is '')
-            if not re.match(r'\d', r):
-                continue
-            if verbose:
-                print("picking %s" % r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
-    # no suitable tags, so version is "0+unknown", but full hex is still there
-    if verbose:
-        print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
-
-
-@register_vcs_handler("git", "pieces_from_vcs")
-def git_pieces_from_vcs(
-    tag_prefix: str,
-    root: str,
-    verbose: bool,
-    runner: Callable = run_command
-) -> Dict[str, Any]:
-    """Get version from 'git describe' in the root of the source tree.
-
-    This only gets called if the git-archive 'subst' keywords were *not*
-    expanded, and _version.py hasn't already been rewritten with a short
-    version string, meaning we're inside a checked out source tree.
-    """
-    GITS = ["git"]
-    if sys.platform == "win32":
-        GITS = ["git.cmd", "git.exe"]
-
-    # GIT_DIR can interfere with correct operation of Versioneer.
-    # It may be intended to be passed to the Versioneer-versioned project,
-    # but that should not change where we get our version from.
-    env = os.environ.copy()
-    env.pop("GIT_DIR", None)
-    runner = functools.partial(runner, env=env)
-
-    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                   hide_stderr=not verbose)
-    if rc != 0:
-        if verbose:
-            print("Directory %s not under git control" % root)
-        raise NotThisMethod("'git rev-parse --git-dir' returned error")
-
-    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
-    # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = runner(GITS, [
-        "describe", "--tags", "--dirty", "--always", "--long",
-        "--match", f"{tag_prefix}[[:digit:]]*"
-    ], cwd=root)
-    # --long was added in git-1.5.5
-    if describe_out is None:
-        raise NotThisMethod("'git describe' failed")
-    describe_out = describe_out.strip()
-    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
-    if full_out is None:
-        raise NotThisMethod("'git rev-parse' failed")
-    full_out = full_out.strip()
-
-    pieces: Dict[str, Any] = {}
-    pieces["long"] = full_out
-    pieces["short"] = full_out[:7]  # maybe improved later
-    pieces["error"] = None
-
-    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
-                             cwd=root)
-    # --abbrev-ref was added in git-1.6.3
-    if rc != 0 or branch_name is None:
-        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
-    branch_name = branch_name.strip()
-
-    if branch_name == "HEAD":
-        # If we aren't exactly on a branch, pick a branch which represents
-        # the current commit. If all else fails, we are on a branchless
-        # commit.
-        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
-        # --contains was added in git-1.5.4
-        if rc != 0 or branches is None:
-            raise NotThisMethod("'git branch --contains' returned error")
-        branches = branches.split("\n")
-
-        # Remove the first line if we're running detached
-        if "(" in branches[0]:
-            branches.pop(0)
-
-        # Strip off the leading "* " from the list of branches.
-        branches = [branch[2:] for branch in branches]
-        if "master" in branches:
-            branch_name = "master"
-        elif not branches:
-            branch_name = None
-        else:
-            # Pick the first branch that is returned. Good or bad.
-            branch_name = branches[0]
-
-    pieces["branch"] = branch_name
-
-    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
-    # TAG might have hyphens.
-    git_describe = describe_out
-
-    # look for -dirty suffix
-    dirty = git_describe.endswith("-dirty")
-    pieces["dirty"] = dirty
-    if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
-
-    # now we have TAG-NUM-gHEX or HEX
-
-    if "-" in git_describe:
-        # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
-        if not mo:
-            # unparsable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%s'"
-                               % describe_out)
-            return pieces
-
-        # tag
-        full_tag = mo.group(1)
-        if not full_tag.startswith(tag_prefix):
-            if verbose:
-                fmt = "tag '%s' doesn't start with prefix '%s'"
-                print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
-                               % (full_tag, tag_prefix))
-            return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
-
-        # distance: number of commits since tag
-        pieces["distance"] = int(mo.group(2))
-
-        # commit: short hex revision ID
-        pieces["short"] = mo.group(3)
-
-    else:
-        # HEX: no tags
-        pieces["closest-tag"] = None
-        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
-        pieces["distance"] = len(out.split())  # total number of commits
-
-    # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
-    # Use only the last line.  Previous lines may contain GPG signature
-    # information.
-    date = date.splitlines()[-1]
-    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
-
-    return pieces
-
-
-def plus_or_dot(pieces: Dict[str, Any]) -> str:
-    """Return a + if we don't already have one, else return a ."""
-    if "+" in pieces.get("closest-tag", ""):
-        return "."
-    return "+"
-
-
-def render_pep440(pieces: Dict[str, Any]) -> str:
-    """Build up version string, with post-release "local version identifier".
-
-    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
-    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
-
-    Exceptions:
-    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_branch(pieces: Dict[str, Any]) -> str:
-    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
-
-    The ".dev0" means not master branch. Note that .dev0 sorts backwards
-    (a feature branch will appear "older" than the master branch).
-
-    Exceptions:
-    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0"
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
-    """Split pep440 version string at the post-release segment.
-
-    Returns the release segments before the post-release and the
-    post-release version number (or -1 if no post-release segment is present).
-    """
-    vc = str.split(ver, ".post")
-    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
-
-
-def render_pep440_pre(pieces: Dict[str, Any]) -> str:
-    """TAG[.postN.devDISTANCE] -- No -dirty.
-
-    Exceptions:
-    1: no tags. 0.post0.devDISTANCE
-    """
-    if pieces["closest-tag"]:
-        if pieces["distance"]:
-            # update the post release segment
-            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
-            rendered = tag_version
-            if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
-            else:
-                rendered += ".post0.dev%d" % (pieces["distance"])
-        else:
-            # no commits, use the tag as the version
-            rendered = pieces["closest-tag"]
-    else:
-        # exception #1
-        rendered = "0.post0.dev%d" % pieces["distance"]
-    return rendered
-
-
-def render_pep440_post(pieces: Dict[str, Any]) -> str:
-    """TAG[.postDISTANCE[.dev0]+gHEX] .
-
-    The ".dev0" means dirty. Note that .dev0 sorts backwards
-    (a dirty tree will appear "older" than the corresponding clean one),
-    but you shouldn't be releasing software with -dirty anyways.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-    return rendered
-
-
-def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
-    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
-
-    The ".dev0" means not master branch.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["branch"] != "master":
-                rendered += ".dev0"
-            rendered += plus_or_dot(pieces)
-            rendered += "g%s" % pieces["short"]
-            if pieces["dirty"]:
-                rendered += ".dirty"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["branch"] != "master":
-            rendered += ".dev0"
-        rendered += "+g%s" % pieces["short"]
-        if pieces["dirty"]:
-            rendered += ".dirty"
-    return rendered
-
-
-def render_pep440_old(pieces: Dict[str, Any]) -> str:
-    """TAG[.postDISTANCE[.dev0]] .
-
-    The ".dev0" means dirty.
-
-    Exceptions:
-    1: no tags. 0.postDISTANCE[.dev0]
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"] or pieces["dirty"]:
-            rendered += ".post%d" % pieces["distance"]
-            if pieces["dirty"]:
-                rendered += ".dev0"
-    else:
-        # exception #1
-        rendered = "0.post%d" % pieces["distance"]
-        if pieces["dirty"]:
-            rendered += ".dev0"
-    return rendered
-
-
-def render_git_describe(pieces: Dict[str, Any]) -> str:
-    """TAG[-DISTANCE-gHEX][-dirty].
-
-    Like 'git describe --tags --dirty --always'.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        if pieces["distance"]:
-            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render_git_describe_long(pieces: Dict[str, Any]) -> str:
-    """TAG-DISTANCE-gHEX[-dirty].
-
-    Like 'git describe --tags --dirty --always -long'.
-    The distance/hash is unconditional.
-
-    Exceptions:
-    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
-    """
-    if pieces["closest-tag"]:
-        rendered = pieces["closest-tag"]
-        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
-    else:
-        # exception #1
-        rendered = pieces["short"]
-    if pieces["dirty"]:
-        rendered += "-dirty"
-    return rendered
-
-
-def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
-    """Render the given version pieces into the requested style."""
-    if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
-
-    if not style or style == "default":
-        style = "pep440"  # the default
-
-    if style == "pep440":
-        rendered = render_pep440(pieces)
-    elif style == "pep440-branch":
-        rendered = render_pep440_branch(pieces)
-    elif style == "pep440-pre":
-        rendered = render_pep440_pre(pieces)
-    elif style == "pep440-post":
-        rendered = render_pep440_post(pieces)
-    elif style == "pep440-post-branch":
-        rendered = render_pep440_post_branch(pieces)
-    elif style == "pep440-old":
-        rendered = render_pep440_old(pieces)
-    elif style == "git-describe":
-        rendered = render_git_describe(pieces)
-    elif style == "git-describe-long":
-        rendered = render_git_describe_long(pieces)
-    else:
-        raise ValueError("unknown style '%s'" % style)
-
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
-
-
-def get_versions() -> Dict[str, Any]:
-    """Get version information or return default if unable to do so."""
-    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
-    # __file__, we can work backwards from there to the root. Some
-    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
-    # case we can only use expanded keywords.
-
-    cfg = get_config()
-    verbose = cfg.verbose
-
-    try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
-    except NotThisMethod:
-        pass
-
-    try:
-        root = os.path.realpath(__file__)
-        # versionfile_source is the relative path from the top of the source
-        # tree (where the .git directory might live) to this file. Invert
-        # this to find the root from __file__.
-        for _ in cfg.versionfile_source.split('/'):
-            root = os.path.dirname(root)
-    except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
-
-    try:
-        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
-        return render(pieces, cfg.style)
-    except NotThisMethod:
-        pass
-
-    try:
-        if cfg.parentdir_prefix:
-            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
-    except NotThisMethod:
-        pass
-
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
diff --git a/cuda_bindings/cuda/bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/cydriver.pxd.in
deleted file mode 100644
index 95877a3f..00000000
--- a/cuda_bindings/cuda/bindings/cydriver.pxd.in
+++ /dev/null
@@ -1,4837 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-from libc.stdint cimport uint32_t, uint64_t
-
-cdef extern from "cuda.h":
-
-    ctypedef uint32_t cuuint32_t
-
-    ctypedef uint64_t cuuint64_t
-
-    ctypedef unsigned long long CUdeviceptr_v2
-
-    ctypedef CUdeviceptr_v2 CUdeviceptr
-
-    ctypedef int CUdevice_v1
-
-    ctypedef CUdevice_v1 CUdevice
-
-    cdef struct CUctx_st:
-        pass
-    ctypedef CUctx_st* CUcontext
-
-    cdef struct CUmod_st:
-        pass
-    ctypedef CUmod_st* CUmodule
-
-    cdef struct CUfunc_st:
-        pass
-    ctypedef CUfunc_st* CUfunction
-
-    cdef struct CUlib_st:
-        pass
-    ctypedef CUlib_st* CUlibrary
-
-    cdef struct CUkern_st:
-        pass
-    ctypedef CUkern_st* CUkernel
-
-    cdef struct CUarray_st:
-        pass
-    ctypedef CUarray_st* CUarray
-
-    cdef struct CUmipmappedArray_st:
-        pass
-    ctypedef CUmipmappedArray_st* CUmipmappedArray
-
-    cdef struct CUtexref_st:
-        pass
-    ctypedef CUtexref_st* CUtexref
-
-    cdef struct CUsurfref_st:
-        pass
-    ctypedef CUsurfref_st* CUsurfref
-
-    cdef struct CUevent_st:
-        pass
-    ctypedef CUevent_st* CUevent
-
-    cdef struct CUstream_st:
-        pass
-    ctypedef CUstream_st* CUstream
-
-    cdef struct CUgraphicsResource_st:
-        pass
-    ctypedef CUgraphicsResource_st* CUgraphicsResource
-
-    ctypedef unsigned long long CUtexObject_v1
-
-    ctypedef CUtexObject_v1 CUtexObject
-
-    ctypedef unsigned long long CUsurfObject_v1
-
-    ctypedef CUsurfObject_v1 CUsurfObject
-
-    cdef struct CUextMemory_st:
-        pass
-    ctypedef CUextMemory_st* CUexternalMemory
-
-    cdef struct CUextSemaphore_st:
-        pass
-    ctypedef CUextSemaphore_st* CUexternalSemaphore
-
-    cdef struct CUgraph_st:
-        pass
-    ctypedef CUgraph_st* CUgraph
-
-    cdef struct CUgraphNode_st:
-        pass
-    ctypedef CUgraphNode_st* CUgraphNode
-
-    cdef struct CUgraphExec_st:
-        pass
-    ctypedef CUgraphExec_st* CUgraphExec
-
-    cdef struct CUmemPoolHandle_st:
-        pass
-    ctypedef CUmemPoolHandle_st* CUmemoryPool
-
-    cdef struct CUuserObject_st:
-        pass
-    ctypedef CUuserObject_st* CUuserObject
-
-    ctypedef cuuint64_t CUgraphConditionalHandle
-
-    cdef struct CUgraphDeviceUpdatableNode_st:
-        pass
-    ctypedef CUgraphDeviceUpdatableNode_st* CUgraphDeviceNode
-
-    cdef struct CUasyncCallbackEntry_st:
-        pass
-    ctypedef CUasyncCallbackEntry_st* CUasyncCallbackHandle
-
-    cdef struct CUgreenCtx_st:
-        pass
-    ctypedef CUgreenCtx_st* CUgreenCtx
-
-    cdef struct CUuuid_st:
-        char bytes[16]
-
-    ctypedef CUuuid_st CUuuid
-
-    cdef struct CUmemFabricHandle_st:
-        unsigned char data[64]
-
-    ctypedef CUmemFabricHandle_st CUmemFabricHandle_v1
-
-    ctypedef CUmemFabricHandle_v1 CUmemFabricHandle
-
-    cdef struct CUipcEventHandle_st:
-        char reserved[64]
-
-    ctypedef CUipcEventHandle_st CUipcEventHandle_v1
-
-    ctypedef CUipcEventHandle_v1 CUipcEventHandle
-
-    cdef struct CUipcMemHandle_st:
-        char reserved[64]
-
-    ctypedef CUipcMemHandle_st CUipcMemHandle_v1
-
-    ctypedef CUipcMemHandle_v1 CUipcMemHandle
-
-    cdef enum CUipcMem_flags_enum:
-        CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 1
-
-    ctypedef CUipcMem_flags_enum CUipcMem_flags
-
-    cdef enum CUmemAttach_flags_enum:
-        CU_MEM_ATTACH_GLOBAL = 1
-        CU_MEM_ATTACH_HOST = 2
-        CU_MEM_ATTACH_SINGLE = 4
-
-    ctypedef CUmemAttach_flags_enum CUmemAttach_flags
-
-    cdef enum CUctx_flags_enum:
-        CU_CTX_SCHED_AUTO = 0
-        CU_CTX_SCHED_SPIN = 1
-        CU_CTX_SCHED_YIELD = 2
-        CU_CTX_SCHED_BLOCKING_SYNC = 4
-        CU_CTX_BLOCKING_SYNC = 4
-        CU_CTX_SCHED_MASK = 7
-        CU_CTX_MAP_HOST = 8
-        CU_CTX_LMEM_RESIZE_TO_MAX = 16
-        CU_CTX_COREDUMP_ENABLE = 32
-        CU_CTX_USER_COREDUMP_ENABLE = 64
-        CU_CTX_SYNC_MEMOPS = 128
-        CU_CTX_FLAGS_MASK = 255
-
-    ctypedef CUctx_flags_enum CUctx_flags
-
-    cdef enum CUevent_sched_flags_enum:
-        CU_EVENT_SCHED_AUTO = 0
-        CU_EVENT_SCHED_SPIN = 1
-        CU_EVENT_SCHED_YIELD = 2
-        CU_EVENT_SCHED_BLOCKING_SYNC = 4
-
-    ctypedef CUevent_sched_flags_enum CUevent_sched_flags
-
-    cdef enum cl_event_flags_enum:
-        NVCL_EVENT_SCHED_AUTO = 0
-        NVCL_EVENT_SCHED_SPIN = 1
-        NVCL_EVENT_SCHED_YIELD = 2
-        NVCL_EVENT_SCHED_BLOCKING_SYNC = 4
-
-    ctypedef cl_event_flags_enum cl_event_flags
-
-    cdef enum cl_context_flags_enum:
-        NVCL_CTX_SCHED_AUTO = 0
-        NVCL_CTX_SCHED_SPIN = 1
-        NVCL_CTX_SCHED_YIELD = 2
-        NVCL_CTX_SCHED_BLOCKING_SYNC = 4
-
-    ctypedef cl_context_flags_enum cl_context_flags
-
-    cdef enum CUstream_flags_enum:
-        CU_STREAM_DEFAULT = 0
-        CU_STREAM_NON_BLOCKING = 1
-
-    ctypedef CUstream_flags_enum CUstream_flags
-
-    cdef enum CUevent_flags_enum:
-        CU_EVENT_DEFAULT = 0
-        CU_EVENT_BLOCKING_SYNC = 1
-        CU_EVENT_DISABLE_TIMING = 2
-        CU_EVENT_INTERPROCESS = 4
-
-    ctypedef CUevent_flags_enum CUevent_flags
-
-    cdef enum CUevent_record_flags_enum:
-        CU_EVENT_RECORD_DEFAULT = 0
-        CU_EVENT_RECORD_EXTERNAL = 1
-
-    ctypedef CUevent_record_flags_enum CUevent_record_flags
-
-    cdef enum CUevent_wait_flags_enum:
-        CU_EVENT_WAIT_DEFAULT = 0
-        CU_EVENT_WAIT_EXTERNAL = 1
-
-    ctypedef CUevent_wait_flags_enum CUevent_wait_flags
-
-    cdef enum CUstreamWaitValue_flags_enum:
-        CU_STREAM_WAIT_VALUE_GEQ = 0
-        CU_STREAM_WAIT_VALUE_EQ = 1
-        CU_STREAM_WAIT_VALUE_AND = 2
-        CU_STREAM_WAIT_VALUE_NOR = 3
-        CU_STREAM_WAIT_VALUE_FLUSH = 1073741824
-
-    ctypedef CUstreamWaitValue_flags_enum CUstreamWaitValue_flags
-
-    cdef enum CUstreamWriteValue_flags_enum:
-        CU_STREAM_WRITE_VALUE_DEFAULT = 0
-        CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 1
-
-    ctypedef CUstreamWriteValue_flags_enum CUstreamWriteValue_flags
-
-    cdef enum CUstreamBatchMemOpType_enum:
-        CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1
-        CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2
-        CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3
-        CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4
-        CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5
-        CU_STREAM_MEM_OP_BARRIER = 6
-
-    ctypedef CUstreamBatchMemOpType_enum CUstreamBatchMemOpType
-
-    cdef enum CUstreamMemoryBarrier_flags_enum:
-        CU_STREAM_MEMORY_BARRIER_TYPE_SYS = 0
-        CU_STREAM_MEMORY_BARRIER_TYPE_GPU = 1
-
-    ctypedef CUstreamMemoryBarrier_flags_enum CUstreamMemoryBarrier_flags
-
-    cdef struct CUstreamMemOpWaitValueParams_st:
-        CUstreamBatchMemOpType operation
-        CUdeviceptr address
-        cuuint32_t value
-        cuuint64_t value64
-        unsigned int flags
-        CUdeviceptr alias
-
-    cdef struct CUstreamMemOpWriteValueParams_st:
-        CUstreamBatchMemOpType operation
-        CUdeviceptr address
-        cuuint32_t value
-        cuuint64_t value64
-        unsigned int flags
-        CUdeviceptr alias
-
-    cdef struct CUstreamMemOpFlushRemoteWritesParams_st:
-        CUstreamBatchMemOpType operation
-        unsigned int flags
-
-    cdef struct CUstreamMemOpMemoryBarrierParams_st:
-        CUstreamBatchMemOpType operation
-        unsigned int flags
-
-    cdef union CUstreamBatchMemOpParams_union:
-        CUstreamBatchMemOpType operation
-        CUstreamMemOpWaitValueParams_st waitValue
-        CUstreamMemOpWriteValueParams_st writeValue
-        CUstreamMemOpFlushRemoteWritesParams_st flushRemoteWrites
-        CUstreamMemOpMemoryBarrierParams_st memoryBarrier
-        cuuint64_t pad[6]
-
-    ctypedef CUstreamBatchMemOpParams_union CUstreamBatchMemOpParams_v1
-
-    ctypedef CUstreamBatchMemOpParams_v1 CUstreamBatchMemOpParams
-
-    cdef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st:
-        CUcontext ctx
-        unsigned int count
-        CUstreamBatchMemOpParams* paramArray
-        unsigned int flags
-
-    ctypedef CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st CUDA_BATCH_MEM_OP_NODE_PARAMS_v1
-
-    ctypedef CUDA_BATCH_MEM_OP_NODE_PARAMS_v1 CUDA_BATCH_MEM_OP_NODE_PARAMS
-
-    cdef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st:
-        CUcontext ctx
-        unsigned int count
-        CUstreamBatchMemOpParams* paramArray
-        unsigned int flags
-
-    ctypedef CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st CUDA_BATCH_MEM_OP_NODE_PARAMS_v2
-
-    cdef enum CUoccupancy_flags_enum:
-        CU_OCCUPANCY_DEFAULT = 0
-        CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 1
-
-    ctypedef CUoccupancy_flags_enum CUoccupancy_flags
-
-    cdef enum CUstreamUpdateCaptureDependencies_flags_enum:
-        CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0
-        CU_STREAM_SET_CAPTURE_DEPENDENCIES = 1
-
-    ctypedef CUstreamUpdateCaptureDependencies_flags_enum CUstreamUpdateCaptureDependencies_flags
-
-    cdef enum CUasyncNotificationType_enum:
-        CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET = 1
-
-    ctypedef CUasyncNotificationType_enum CUasyncNotificationType
-
-    cdef struct anon_struct0:
-        unsigned long long bytesOverBudget
-
-    cdef union anon_union2:
-        anon_struct0 overBudget
-
-    cdef struct CUasyncNotificationInfo_st:
-        CUasyncNotificationType type
-        anon_union2 info
-
-    ctypedef CUasyncNotificationInfo_st CUasyncNotificationInfo
-
-    ctypedef void (*CUasyncCallback)(CUasyncNotificationInfo* info, void* userData, CUasyncCallbackHandle callback)
-
-    cdef enum CUarray_format_enum:
-        CU_AD_FORMAT_UNSIGNED_INT8 = 1
-        CU_AD_FORMAT_UNSIGNED_INT16 = 2
-        CU_AD_FORMAT_UNSIGNED_INT32 = 3
-        CU_AD_FORMAT_SIGNED_INT8 = 8
-        CU_AD_FORMAT_SIGNED_INT16 = 9
-        CU_AD_FORMAT_SIGNED_INT32 = 10
-        CU_AD_FORMAT_HALF = 16
-        CU_AD_FORMAT_FLOAT = 32
-        CU_AD_FORMAT_BC1_UNORM = 145
-        CU_AD_FORMAT_BC1_UNORM_SRGB = 146
-        CU_AD_FORMAT_BC2_UNORM = 147
-        CU_AD_FORMAT_BC2_UNORM_SRGB = 148
-        CU_AD_FORMAT_BC3_UNORM = 149
-        CU_AD_FORMAT_BC3_UNORM_SRGB = 150
-        CU_AD_FORMAT_BC4_UNORM = 151
-        CU_AD_FORMAT_BC4_SNORM = 152
-        CU_AD_FORMAT_BC5_UNORM = 153
-        CU_AD_FORMAT_BC5_SNORM = 154
-        CU_AD_FORMAT_BC6H_UF16 = 155
-        CU_AD_FORMAT_BC6H_SF16 = 156
-        CU_AD_FORMAT_BC7_UNORM = 157
-        CU_AD_FORMAT_BC7_UNORM_SRGB = 158
-        CU_AD_FORMAT_P010 = 159
-        CU_AD_FORMAT_P016 = 161
-        CU_AD_FORMAT_NV16 = 162
-        CU_AD_FORMAT_P210 = 163
-        CU_AD_FORMAT_P216 = 164
-        CU_AD_FORMAT_YUY2 = 165
-        CU_AD_FORMAT_Y210 = 166
-        CU_AD_FORMAT_Y216 = 167
-        CU_AD_FORMAT_AYUV = 168
-        CU_AD_FORMAT_Y410 = 169
-        CU_AD_FORMAT_NV12 = 176
-        CU_AD_FORMAT_Y416 = 177
-        CU_AD_FORMAT_Y444_PLANAR8 = 178
-        CU_AD_FORMAT_Y444_PLANAR10 = 179
-        CU_AD_FORMAT_UNORM_INT8X1 = 192
-        CU_AD_FORMAT_UNORM_INT8X2 = 193
-        CU_AD_FORMAT_UNORM_INT8X4 = 194
-        CU_AD_FORMAT_UNORM_INT16X1 = 195
-        CU_AD_FORMAT_UNORM_INT16X2 = 196
-        CU_AD_FORMAT_UNORM_INT16X4 = 197
-        CU_AD_FORMAT_SNORM_INT8X1 = 198
-        CU_AD_FORMAT_SNORM_INT8X2 = 199
-        CU_AD_FORMAT_SNORM_INT8X4 = 200
-        CU_AD_FORMAT_SNORM_INT16X1 = 201
-        CU_AD_FORMAT_SNORM_INT16X2 = 202
-        CU_AD_FORMAT_SNORM_INT16X4 = 203
-        CU_AD_FORMAT_MAX = 2147483647
-
-    ctypedef CUarray_format_enum CUarray_format
-
-    cdef enum CUaddress_mode_enum:
-        CU_TR_ADDRESS_MODE_WRAP = 0
-        CU_TR_ADDRESS_MODE_CLAMP = 1
-        CU_TR_ADDRESS_MODE_MIRROR = 2
-        CU_TR_ADDRESS_MODE_BORDER = 3
-
-    ctypedef CUaddress_mode_enum CUaddress_mode
-
-    cdef enum CUfilter_mode_enum:
-        CU_TR_FILTER_MODE_POINT = 0
-        CU_TR_FILTER_MODE_LINEAR = 1
-
-    ctypedef CUfilter_mode_enum CUfilter_mode
-
-    cdef enum CUdevice_attribute_enum:
-        CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
-        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2
-        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3
-        CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4
-        CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5
-        CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6
-        CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7
-        CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
-        CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8
-        CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9
-        CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10
-        CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11
-        CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12
-        CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12
-        CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
-        CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14
-        CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15
-        CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
-        CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17
-        CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
-        CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19
-        CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29
-        CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30
-        CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31
-        CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32
-        CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33
-        CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34
-        CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
-        CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
-        CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37
-        CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38
-        CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
-        CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40
-        CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43
-        CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49
-        CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50
-        CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74
-        CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
-        CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
-        CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77
-        CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78
-        CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79
-        CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80
-        CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81
-        CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
-        CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83
-        CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84
-        CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85
-        CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86
-        CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87
-        CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
-        CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89
-        CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90
-        CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91
-        CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = 92
-        CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = 93
-        CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = 94
-        CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95
-        CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96
-        CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97
-        CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98
-        CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99
-        CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100
-        CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101
-        CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102
-        CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102
-        CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103
-        CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104
-        CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105
-        CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106
-        CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107
-        CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108
-        CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109
-        CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110
-        CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111
-        CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112
-        CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113
-        CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114
-        CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115
-        CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116
-        CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117
-        CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118
-        CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119
-        CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = 120
-        CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121
-        CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 122
-        CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 123
-        CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124
-        CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = 125
-        CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = 126
-        CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = 127
-        CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = 128
-        CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = 129
-        CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130
-        CU_DEVICE_ATTRIBUTE_NUMA_ID = 131
-        CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132
-        CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133
-        CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134
-        CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135
-        CU_DEVICE_ATTRIBUTE_MAX = 136
-
-    ctypedef CUdevice_attribute_enum CUdevice_attribute
-
-    cdef struct CUdevprop_st:
-        int maxThreadsPerBlock
-        int maxThreadsDim[3]
-        int maxGridSize[3]
-        int sharedMemPerBlock
-        int totalConstantMemory
-        int SIMDWidth
-        int memPitch
-        int regsPerBlock
-        int clockRate
-        int textureAlign
-
-    ctypedef CUdevprop_st CUdevprop_v1
-
-    ctypedef CUdevprop_v1 CUdevprop
-
-    cdef enum CUpointer_attribute_enum:
-        CU_POINTER_ATTRIBUTE_CONTEXT = 1
-        CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2
-        CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
-        CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
-        CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5
-        CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6
-        CU_POINTER_ATTRIBUTE_BUFFER_ID = 7
-        CU_POINTER_ATTRIBUTE_IS_MANAGED = 8
-        CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9
-        CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10
-        CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11
-        CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12
-        CU_POINTER_ATTRIBUTE_MAPPED = 13
-        CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14
-        CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15
-        CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16
-        CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17
-        CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18
-        CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19
-        CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20
-
-    ctypedef CUpointer_attribute_enum CUpointer_attribute
-
-    cdef enum CUfunction_attribute_enum:
-        CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0
-        CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1
-        CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2
-        CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3
-        CU_FUNC_ATTRIBUTE_NUM_REGS = 4
-        CU_FUNC_ATTRIBUTE_PTX_VERSION = 5
-        CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6
-        CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7
-        CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8
-        CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9
-        CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = 10
-        CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = 11
-        CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = 12
-        CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = 13
-        CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = 14
-        CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 15
-        CU_FUNC_ATTRIBUTE_MAX = 16
-
-    ctypedef CUfunction_attribute_enum CUfunction_attribute
-
-    cdef enum CUfunc_cache_enum:
-        CU_FUNC_CACHE_PREFER_NONE = 0
-        CU_FUNC_CACHE_PREFER_SHARED = 1
-        CU_FUNC_CACHE_PREFER_L1 = 2
-        CU_FUNC_CACHE_PREFER_EQUAL = 3
-
-    ctypedef CUfunc_cache_enum CUfunc_cache
-
-    cdef enum CUsharedconfig_enum:
-        CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0
-        CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 1
-        CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 2
-
-    ctypedef CUsharedconfig_enum CUsharedconfig
-
-    cdef enum CUshared_carveout_enum:
-        CU_SHAREDMEM_CARVEOUT_DEFAULT = -1
-        CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0
-        CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100
-
-    ctypedef CUshared_carveout_enum CUshared_carveout
-
-    cdef enum CUmemorytype_enum:
-        CU_MEMORYTYPE_HOST = 1
-        CU_MEMORYTYPE_DEVICE = 2
-        CU_MEMORYTYPE_ARRAY = 3
-        CU_MEMORYTYPE_UNIFIED = 4
-
-    ctypedef CUmemorytype_enum CUmemorytype
-
-    cdef enum CUcomputemode_enum:
-        CU_COMPUTEMODE_DEFAULT = 0
-        CU_COMPUTEMODE_PROHIBITED = 2
-        CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3
-
-    ctypedef CUcomputemode_enum CUcomputemode
-
-    cdef enum CUmem_advise_enum:
-        CU_MEM_ADVISE_SET_READ_MOSTLY = 1
-        CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2
-        CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3
-        CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4
-        CU_MEM_ADVISE_SET_ACCESSED_BY = 5
-        CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6
-
-    ctypedef CUmem_advise_enum CUmem_advise
-
-    cdef enum CUmem_range_attribute_enum:
-        CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1
-        CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2
-        CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3
-        CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4
-        CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE = 5
-        CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID = 6
-        CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE = 7
-        CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID = 8
-
-    ctypedef CUmem_range_attribute_enum CUmem_range_attribute
-
-    cdef enum CUjit_option_enum:
-        CU_JIT_MAX_REGISTERS = 0
-        CU_JIT_THREADS_PER_BLOCK = 1
-        CU_JIT_WALL_TIME = 2
-        CU_JIT_INFO_LOG_BUFFER = 3
-        CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
-        CU_JIT_ERROR_LOG_BUFFER = 5
-        CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
-        CU_JIT_OPTIMIZATION_LEVEL = 7
-        CU_JIT_TARGET_FROM_CUCONTEXT = 8
-        CU_JIT_TARGET = 9
-        CU_JIT_FALLBACK_STRATEGY = 10
-        CU_JIT_GENERATE_DEBUG_INFO = 11
-        CU_JIT_LOG_VERBOSE = 12
-        CU_JIT_GENERATE_LINE_INFO = 13
-        CU_JIT_CACHE_MODE = 14
-        CU_JIT_NEW_SM3X_OPT = 15
-        CU_JIT_FAST_COMPILE = 16
-        CU_JIT_GLOBAL_SYMBOL_NAMES = 17
-        CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 18
-        CU_JIT_GLOBAL_SYMBOL_COUNT = 19
-        CU_JIT_LTO = 20
-        CU_JIT_FTZ = 21
-        CU_JIT_PREC_DIV = 22
-        CU_JIT_PREC_SQRT = 23
-        CU_JIT_FMA = 24
-        CU_JIT_REFERENCED_KERNEL_NAMES = 25
-        CU_JIT_REFERENCED_KERNEL_COUNT = 26
-        CU_JIT_REFERENCED_VARIABLE_NAMES = 27
-        CU_JIT_REFERENCED_VARIABLE_COUNT = 28
-        CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES = 29
-        CU_JIT_POSITION_INDEPENDENT_CODE = 30
-        CU_JIT_MIN_CTA_PER_SM = 31
-        CU_JIT_MAX_THREADS_PER_BLOCK = 32
-        CU_JIT_OVERRIDE_DIRECTIVE_VALUES = 33
-        CU_JIT_NUM_OPTIONS = 34
-
-    ctypedef CUjit_option_enum CUjit_option
-
-    cdef enum CUjit_target_enum:
-        CU_TARGET_COMPUTE_30 = 30
-        CU_TARGET_COMPUTE_32 = 32
-        CU_TARGET_COMPUTE_35 = 35
-        CU_TARGET_COMPUTE_37 = 37
-        CU_TARGET_COMPUTE_50 = 50
-        CU_TARGET_COMPUTE_52 = 52
-        CU_TARGET_COMPUTE_53 = 53
-        CU_TARGET_COMPUTE_60 = 60
-        CU_TARGET_COMPUTE_61 = 61
-        CU_TARGET_COMPUTE_62 = 62
-        CU_TARGET_COMPUTE_70 = 70
-        CU_TARGET_COMPUTE_72 = 72
-        CU_TARGET_COMPUTE_75 = 75
-        CU_TARGET_COMPUTE_80 = 80
-        CU_TARGET_COMPUTE_86 = 86
-        CU_TARGET_COMPUTE_87 = 87
-        CU_TARGET_COMPUTE_89 = 89
-        CU_TARGET_COMPUTE_90 = 90
-        CU_TARGET_COMPUTE_90A = 65626
-
-    ctypedef CUjit_target_enum CUjit_target
-
-    cdef enum CUjit_fallback_enum:
-        CU_PREFER_PTX = 0
-        CU_PREFER_BINARY = 1
-
-    ctypedef CUjit_fallback_enum CUjit_fallback
-
-    cdef enum CUjit_cacheMode_enum:
-        CU_JIT_CACHE_OPTION_NONE = 0
-        CU_JIT_CACHE_OPTION_CG = 1
-        CU_JIT_CACHE_OPTION_CA = 2
-
-    ctypedef CUjit_cacheMode_enum CUjit_cacheMode
-
-    cdef enum CUjitInputType_enum:
-        CU_JIT_INPUT_CUBIN = 0
-        CU_JIT_INPUT_PTX = 1
-        CU_JIT_INPUT_FATBINARY = 2
-        CU_JIT_INPUT_OBJECT = 3
-        CU_JIT_INPUT_LIBRARY = 4
-        CU_JIT_INPUT_NVVM = 5
-        CU_JIT_NUM_INPUT_TYPES = 6
-
-    ctypedef CUjitInputType_enum CUjitInputType
-
-    cdef struct CUlinkState_st:
-        pass
-    ctypedef CUlinkState_st* CUlinkState
-
-    cdef enum CUgraphicsRegisterFlags_enum:
-        CU_GRAPHICS_REGISTER_FLAGS_NONE = 0
-        CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 1
-        CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 2
-        CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 4
-        CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 8
-
-    ctypedef CUgraphicsRegisterFlags_enum CUgraphicsRegisterFlags
-
-    cdef enum CUgraphicsMapResourceFlags_enum:
-        CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0
-        CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 1
-        CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 2
-
-    ctypedef CUgraphicsMapResourceFlags_enum CUgraphicsMapResourceFlags
-
-    cdef enum CUarray_cubemap_face_enum:
-        CU_CUBEMAP_FACE_POSITIVE_X = 0
-        CU_CUBEMAP_FACE_NEGATIVE_X = 1
-        CU_CUBEMAP_FACE_POSITIVE_Y = 2
-        CU_CUBEMAP_FACE_NEGATIVE_Y = 3
-        CU_CUBEMAP_FACE_POSITIVE_Z = 4
-        CU_CUBEMAP_FACE_NEGATIVE_Z = 5
-
-    ctypedef CUarray_cubemap_face_enum CUarray_cubemap_face
-
-    cdef enum CUlimit_enum:
-        CU_LIMIT_STACK_SIZE = 0
-        CU_LIMIT_PRINTF_FIFO_SIZE = 1
-        CU_LIMIT_MALLOC_HEAP_SIZE = 2
-        CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 3
-        CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4
-        CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 5
-        CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 6
-        CU_LIMIT_SHMEM_SIZE = 7
-        CU_LIMIT_CIG_ENABLED = 8
-        CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 9
-        CU_LIMIT_MAX = 10
-
-    ctypedef CUlimit_enum CUlimit
-
-    cdef enum CUresourcetype_enum:
-        CU_RESOURCE_TYPE_ARRAY = 0
-        CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 1
-        CU_RESOURCE_TYPE_LINEAR = 2
-        CU_RESOURCE_TYPE_PITCH2D = 3
-
-    ctypedef CUresourcetype_enum CUresourcetype
-
-    ctypedef void (*CUhostFn)(void* userData)
-
-    cdef enum CUaccessProperty_enum:
-        CU_ACCESS_PROPERTY_NORMAL = 0
-        CU_ACCESS_PROPERTY_STREAMING = 1
-        CU_ACCESS_PROPERTY_PERSISTING = 2
-
-    ctypedef CUaccessProperty_enum CUaccessProperty
-
-    cdef struct CUaccessPolicyWindow_st:
-        void* base_ptr
-        size_t num_bytes
-        float hitRatio
-        CUaccessProperty hitProp
-        CUaccessProperty missProp
-
-    ctypedef CUaccessPolicyWindow_st CUaccessPolicyWindow_v1
-
-    ctypedef CUaccessPolicyWindow_v1 CUaccessPolicyWindow
-
-    cdef struct CUDA_KERNEL_NODE_PARAMS_st:
-        CUfunction func
-        unsigned int gridDimX
-        unsigned int gridDimY
-        unsigned int gridDimZ
-        unsigned int blockDimX
-        unsigned int blockDimY
-        unsigned int blockDimZ
-        unsigned int sharedMemBytes
-        void** kernelParams
-        void** extra
-
-    ctypedef CUDA_KERNEL_NODE_PARAMS_st CUDA_KERNEL_NODE_PARAMS_v1
-
-    cdef struct CUDA_KERNEL_NODE_PARAMS_v2_st:
-        CUfunction func
-        unsigned int gridDimX
-        unsigned int gridDimY
-        unsigned int gridDimZ
-        unsigned int blockDimX
-        unsigned int blockDimY
-        unsigned int blockDimZ
-        unsigned int sharedMemBytes
-        void** kernelParams
-        void** extra
-        CUkernel kern
-        CUcontext ctx
-
-    ctypedef CUDA_KERNEL_NODE_PARAMS_v2_st CUDA_KERNEL_NODE_PARAMS_v2
-
-    ctypedef CUDA_KERNEL_NODE_PARAMS_v2 CUDA_KERNEL_NODE_PARAMS
-
-    cdef struct CUDA_KERNEL_NODE_PARAMS_v3_st:
-        CUfunction func
-        unsigned int gridDimX
-        unsigned int gridDimY
-        unsigned int gridDimZ
-        unsigned int blockDimX
-        unsigned int blockDimY
-        unsigned int blockDimZ
-        unsigned int sharedMemBytes
-        void** kernelParams
-        void** extra
-        CUkernel kern
-        CUcontext ctx
-
-    ctypedef CUDA_KERNEL_NODE_PARAMS_v3_st CUDA_KERNEL_NODE_PARAMS_v3
-
-    cdef struct CUDA_MEMSET_NODE_PARAMS_st:
-        CUdeviceptr dst
-        size_t pitch
-        unsigned int value
-        unsigned int elementSize
-        size_t width
-        size_t height
-
-    ctypedef CUDA_MEMSET_NODE_PARAMS_st CUDA_MEMSET_NODE_PARAMS_v1
-
-    ctypedef CUDA_MEMSET_NODE_PARAMS_v1 CUDA_MEMSET_NODE_PARAMS
-
-    cdef struct CUDA_MEMSET_NODE_PARAMS_v2_st:
-        CUdeviceptr dst
-        size_t pitch
-        unsigned int value
-        unsigned int elementSize
-        size_t width
-        size_t height
-        CUcontext ctx
-
-    ctypedef CUDA_MEMSET_NODE_PARAMS_v2_st CUDA_MEMSET_NODE_PARAMS_v2
-
-    cdef struct CUDA_HOST_NODE_PARAMS_st:
-        CUhostFn fn
-        void* userData
-
-    ctypedef CUDA_HOST_NODE_PARAMS_st CUDA_HOST_NODE_PARAMS_v1
-
-    ctypedef CUDA_HOST_NODE_PARAMS_v1 CUDA_HOST_NODE_PARAMS
-
-    cdef struct CUDA_HOST_NODE_PARAMS_v2_st:
-        CUhostFn fn
-        void* userData
-
-    ctypedef CUDA_HOST_NODE_PARAMS_v2_st CUDA_HOST_NODE_PARAMS_v2
-
-    cdef enum CUgraphConditionalNodeType_enum:
-        CU_GRAPH_COND_TYPE_IF = 0
-        CU_GRAPH_COND_TYPE_WHILE = 1
-
-    ctypedef CUgraphConditionalNodeType_enum CUgraphConditionalNodeType
-
-    cdef struct CUDA_CONDITIONAL_NODE_PARAMS:
-        CUgraphConditionalHandle handle
-        CUgraphConditionalNodeType type
-        unsigned int size
-        CUgraph* phGraph_out
-        CUcontext ctx
-
-    cdef enum CUgraphNodeType_enum:
-        CU_GRAPH_NODE_TYPE_KERNEL = 0
-        CU_GRAPH_NODE_TYPE_MEMCPY = 1
-        CU_GRAPH_NODE_TYPE_MEMSET = 2
-        CU_GRAPH_NODE_TYPE_HOST = 3
-        CU_GRAPH_NODE_TYPE_GRAPH = 4
-        CU_GRAPH_NODE_TYPE_EMPTY = 5
-        CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6
-        CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7
-        CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8
-        CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9
-        CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10
-        CU_GRAPH_NODE_TYPE_MEM_FREE = 11
-        CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = 12
-        CU_GRAPH_NODE_TYPE_CONDITIONAL = 13
-
-    ctypedef CUgraphNodeType_enum CUgraphNodeType
-
-    cdef enum CUgraphDependencyType_enum:
-        CU_GRAPH_DEPENDENCY_TYPE_DEFAULT = 0
-        CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC = 1
-
-    ctypedef CUgraphDependencyType_enum CUgraphDependencyType
-
-    cdef struct CUgraphEdgeData_st:
-        unsigned char from_port
-        unsigned char to_port
-        unsigned char type
-        unsigned char reserved[5]
-
-    ctypedef CUgraphEdgeData_st CUgraphEdgeData
-
-    cdef enum CUgraphInstantiateResult_enum:
-        CUDA_GRAPH_INSTANTIATE_SUCCESS = 0
-        CUDA_GRAPH_INSTANTIATE_ERROR = 1
-        CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE = 2
-        CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED = 3
-        CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED = 4
-
-    ctypedef CUgraphInstantiateResult_enum CUgraphInstantiateResult
-
-    cdef struct CUDA_GRAPH_INSTANTIATE_PARAMS_st:
-        cuuint64_t flags
-        CUstream hUploadStream
-        CUgraphNode hErrNode_out
-        CUgraphInstantiateResult result_out
-
-    ctypedef CUDA_GRAPH_INSTANTIATE_PARAMS_st CUDA_GRAPH_INSTANTIATE_PARAMS
-
-    cdef enum CUsynchronizationPolicy_enum:
-        CU_SYNC_POLICY_AUTO = 1
-        CU_SYNC_POLICY_SPIN = 2
-        CU_SYNC_POLICY_YIELD = 3
-        CU_SYNC_POLICY_BLOCKING_SYNC = 4
-
-    ctypedef CUsynchronizationPolicy_enum CUsynchronizationPolicy
-
-    cdef enum CUclusterSchedulingPolicy_enum:
-        CU_CLUSTER_SCHEDULING_POLICY_DEFAULT = 0
-        CU_CLUSTER_SCHEDULING_POLICY_SPREAD = 1
-        CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING = 2
-
-    ctypedef CUclusterSchedulingPolicy_enum CUclusterSchedulingPolicy
-
-    cdef enum CUlaunchMemSyncDomain_enum:
-        CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT = 0
-        CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE = 1
-
-    ctypedef CUlaunchMemSyncDomain_enum CUlaunchMemSyncDomain
-
-    cdef struct CUlaunchMemSyncDomainMap_st:
-        unsigned char default_
-        unsigned char remote
-
-    ctypedef CUlaunchMemSyncDomainMap_st CUlaunchMemSyncDomainMap
-
-    cdef enum CUlaunchAttributeID_enum:
-        CU_LAUNCH_ATTRIBUTE_IGNORE = 0
-        CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
-        CU_LAUNCH_ATTRIBUTE_COOPERATIVE = 2
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = 4
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = 6
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = 7
-        CU_LAUNCH_ATTRIBUTE_PRIORITY = 8
-        CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9
-        CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14
-
-    ctypedef CUlaunchAttributeID_enum CUlaunchAttributeID
-
-    cdef struct anon_struct1:
-        unsigned int x
-        unsigned int y
-        unsigned int z
-
-    cdef struct anon_struct2:
-        CUevent event
-        int flags
-        int triggerAtBlockStart
-
-    cdef struct anon_struct3:
-        CUevent event
-        int flags
-
-    cdef struct anon_struct4:
-        int deviceUpdatable
-        CUgraphDeviceNode devNode
-
-    cdef union CUlaunchAttributeValue_union:
-        char pad[64]
-        CUaccessPolicyWindow accessPolicyWindow
-        int cooperative
-        CUsynchronizationPolicy syncPolicy
-        anon_struct1 clusterDim
-        CUclusterSchedulingPolicy clusterSchedulingPolicyPreference
-        int programmaticStreamSerializationAllowed
-        anon_struct2 programmaticEvent
-        anon_struct3 launchCompletionEvent
-        int priority
-        CUlaunchMemSyncDomainMap memSyncDomainMap
-        CUlaunchMemSyncDomain memSyncDomain
-        anon_struct4 deviceUpdatableKernelNode
-        unsigned int sharedMemCarveout
-
-    ctypedef CUlaunchAttributeValue_union CUlaunchAttributeValue
-
-    cdef struct CUlaunchAttribute_st:
-        CUlaunchAttributeID id
-        CUlaunchAttributeValue value
-
-    ctypedef CUlaunchAttribute_st CUlaunchAttribute
-
-    cdef struct CUlaunchConfig_st:
-        unsigned int gridDimX
-        unsigned int gridDimY
-        unsigned int gridDimZ
-        unsigned int blockDimX
-        unsigned int blockDimY
-        unsigned int blockDimZ
-        unsigned int sharedMemBytes
-        CUstream hStream
-        CUlaunchAttribute* attrs
-        unsigned int numAttrs
-
-    ctypedef CUlaunchConfig_st CUlaunchConfig
-
-    ctypedef CUlaunchAttributeID CUkernelNodeAttrID
-
-    ctypedef CUlaunchAttributeValue CUkernelNodeAttrValue_v1
-
-    ctypedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue
-
-    cdef enum CUstreamCaptureStatus_enum:
-        CU_STREAM_CAPTURE_STATUS_NONE = 0
-        CU_STREAM_CAPTURE_STATUS_ACTIVE = 1
-        CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2
-
-    ctypedef CUstreamCaptureStatus_enum CUstreamCaptureStatus
-
-    cdef enum CUstreamCaptureMode_enum:
-        CU_STREAM_CAPTURE_MODE_GLOBAL = 0
-        CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1
-        CU_STREAM_CAPTURE_MODE_RELAXED = 2
-
-    ctypedef CUstreamCaptureMode_enum CUstreamCaptureMode
-
-    ctypedef CUlaunchAttributeID CUstreamAttrID
-
-    ctypedef CUlaunchAttributeValue CUstreamAttrValue_v1
-
-    ctypedef CUstreamAttrValue_v1 CUstreamAttrValue
-
-    cdef enum CUdriverProcAddress_flags_enum:
-        CU_GET_PROC_ADDRESS_DEFAULT = 0
-        CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1
-        CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 2
-
-    ctypedef CUdriverProcAddress_flags_enum CUdriverProcAddress_flags
-
-    cdef enum CUdriverProcAddressQueryResult_enum:
-        CU_GET_PROC_ADDRESS_SUCCESS = 0
-        CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1
-        CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2
-
-    ctypedef CUdriverProcAddressQueryResult_enum CUdriverProcAddressQueryResult
-
-    cdef enum CUexecAffinityType_enum:
-        CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0
-        CU_EXEC_AFFINITY_TYPE_MAX = 1
-
-    ctypedef CUexecAffinityType_enum CUexecAffinityType
-
-    cdef struct CUexecAffinitySmCount_st:
-        unsigned int val
-
-    ctypedef CUexecAffinitySmCount_st CUexecAffinitySmCount_v1
-
-    ctypedef CUexecAffinitySmCount_v1 CUexecAffinitySmCount
-
-    cdef union anon_union3:
-        CUexecAffinitySmCount smCount
-
-    cdef struct CUexecAffinityParam_st:
-        CUexecAffinityType type
-        anon_union3 param
-
-    ctypedef CUexecAffinityParam_st CUexecAffinityParam_v1
-
-    ctypedef CUexecAffinityParam_v1 CUexecAffinityParam
-
-    cdef enum CUcigDataType_enum:
-        CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 1
-
-    ctypedef CUcigDataType_enum CUcigDataType
-
-    cdef struct CUctxCigParam_st:
-        CUcigDataType sharedDataType
-        void* sharedData
-
-    ctypedef CUctxCigParam_st CUctxCigParam
-
-    cdef struct CUctxCreateParams_st:
-        CUexecAffinityParam* execAffinityParams
-        int numExecAffinityParams
-        CUctxCigParam* cigParams
-
-    ctypedef CUctxCreateParams_st CUctxCreateParams
-
-    cdef enum CUlibraryOption_enum:
-        CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0
-        CU_LIBRARY_BINARY_IS_PRESERVED = 1
-        CU_LIBRARY_NUM_OPTIONS = 2
-
-    ctypedef CUlibraryOption_enum CUlibraryOption
-
-    cdef struct CUlibraryHostUniversalFunctionAndDataTable_st:
-        void* functionTable
-        size_t functionWindowSize
-        void* dataTable
-        size_t dataWindowSize
-
-    ctypedef CUlibraryHostUniversalFunctionAndDataTable_st CUlibraryHostUniversalFunctionAndDataTable
-
-    cdef enum cudaError_enum:
-        CUDA_SUCCESS = 0
-        CUDA_ERROR_INVALID_VALUE = 1
-        CUDA_ERROR_OUT_OF_MEMORY = 2
-        CUDA_ERROR_NOT_INITIALIZED = 3
-        CUDA_ERROR_DEINITIALIZED = 4
-        CUDA_ERROR_PROFILER_DISABLED = 5
-        CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6
-        CUDA_ERROR_PROFILER_ALREADY_STARTED = 7
-        CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8
-        CUDA_ERROR_STUB_LIBRARY = 34
-        CUDA_ERROR_DEVICE_UNAVAILABLE = 46
-        CUDA_ERROR_NO_DEVICE = 100
-        CUDA_ERROR_INVALID_DEVICE = 101
-        CUDA_ERROR_DEVICE_NOT_LICENSED = 102
-        CUDA_ERROR_INVALID_IMAGE = 200
-        CUDA_ERROR_INVALID_CONTEXT = 201
-        CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202
-        CUDA_ERROR_MAP_FAILED = 205
-        CUDA_ERROR_UNMAP_FAILED = 206
-        CUDA_ERROR_ARRAY_IS_MAPPED = 207
-        CUDA_ERROR_ALREADY_MAPPED = 208
-        CUDA_ERROR_NO_BINARY_FOR_GPU = 209
-        CUDA_ERROR_ALREADY_ACQUIRED = 210
-        CUDA_ERROR_NOT_MAPPED = 211
-        CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212
-        CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213
-        CUDA_ERROR_ECC_UNCORRECTABLE = 214
-        CUDA_ERROR_UNSUPPORTED_LIMIT = 215
-        CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216
-        CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217
-        CUDA_ERROR_INVALID_PTX = 218
-        CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219
-        CUDA_ERROR_NVLINK_UNCORRECTABLE = 220
-        CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221
-        CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222
-        CUDA_ERROR_JIT_COMPILATION_DISABLED = 223
-        CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224
-        CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225
-        CUDA_ERROR_INVALID_SOURCE = 300
-        CUDA_ERROR_FILE_NOT_FOUND = 301
-        CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302
-        CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303
-        CUDA_ERROR_OPERATING_SYSTEM = 304
-        CUDA_ERROR_INVALID_HANDLE = 400
-        CUDA_ERROR_ILLEGAL_STATE = 401
-        CUDA_ERROR_LOSSY_QUERY = 402
-        CUDA_ERROR_NOT_FOUND = 500
-        CUDA_ERROR_NOT_READY = 600
-        CUDA_ERROR_ILLEGAL_ADDRESS = 700
-        CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701
-        CUDA_ERROR_LAUNCH_TIMEOUT = 702
-        CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703
-        CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704
-        CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705
-        CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708
-        CUDA_ERROR_CONTEXT_IS_DESTROYED = 709
-        CUDA_ERROR_ASSERT = 710
-        CUDA_ERROR_TOO_MANY_PEERS = 711
-        CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712
-        CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713
-        CUDA_ERROR_HARDWARE_STACK_ERROR = 714
-        CUDA_ERROR_ILLEGAL_INSTRUCTION = 715
-        CUDA_ERROR_MISALIGNED_ADDRESS = 716
-        CUDA_ERROR_INVALID_ADDRESS_SPACE = 717
-        CUDA_ERROR_INVALID_PC = 718
-        CUDA_ERROR_LAUNCH_FAILED = 719
-        CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720
-        CUDA_ERROR_NOT_PERMITTED = 800
-        CUDA_ERROR_NOT_SUPPORTED = 801
-        CUDA_ERROR_SYSTEM_NOT_READY = 802
-        CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803
-        CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804
-        CUDA_ERROR_MPS_CONNECTION_FAILED = 805
-        CUDA_ERROR_MPS_RPC_FAILURE = 806
-        CUDA_ERROR_MPS_SERVER_NOT_READY = 807
-        CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808
-        CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809
-        CUDA_ERROR_MPS_CLIENT_TERMINATED = 810
-        CUDA_ERROR_CDP_NOT_SUPPORTED = 811
-        CUDA_ERROR_CDP_VERSION_MISMATCH = 812
-        CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900
-        CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901
-        CUDA_ERROR_STREAM_CAPTURE_MERGE = 902
-        CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903
-        CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904
-        CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905
-        CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906
-        CUDA_ERROR_CAPTURED_EVENT = 907
-        CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908
-        CUDA_ERROR_TIMEOUT = 909
-        CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910
-        CUDA_ERROR_EXTERNAL_DEVICE = 911
-        CUDA_ERROR_INVALID_CLUSTER_SIZE = 912
-        CUDA_ERROR_FUNCTION_NOT_LOADED = 913
-        CUDA_ERROR_INVALID_RESOURCE_TYPE = 914
-        CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915
-        CUDA_ERROR_UNKNOWN = 999
-
-    ctypedef cudaError_enum CUresult
-
-    cdef enum CUdevice_P2PAttribute_enum:
-        CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 1
-        CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 2
-        CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 3
-        CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 4
-        CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 4
-
-    ctypedef CUdevice_P2PAttribute_enum CUdevice_P2PAttribute
-
-    ctypedef void (*CUstreamCallback)(CUstream hStream, CUresult status, void* userData)
-
-    ctypedef size_t (*CUoccupancyB2DSize)(int blockSize)
-
-    cdef struct CUDA_MEMCPY2D_st:
-        size_t srcXInBytes
-        size_t srcY
-        CUmemorytype srcMemoryType
-        const void* srcHost
-        CUdeviceptr srcDevice
-        CUarray srcArray
-        size_t srcPitch
-        size_t dstXInBytes
-        size_t dstY
-        CUmemorytype dstMemoryType
-        void* dstHost
-        CUdeviceptr dstDevice
-        CUarray dstArray
-        size_t dstPitch
-        size_t WidthInBytes
-        size_t Height
-
-    ctypedef CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v2
-
-    ctypedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D
-
-    cdef struct CUDA_MEMCPY3D_st:
-        size_t srcXInBytes
-        size_t srcY
-        size_t srcZ
-        size_t srcLOD
-        CUmemorytype srcMemoryType
-        const void* srcHost
-        CUdeviceptr srcDevice
-        CUarray srcArray
-        void* reserved0
-        size_t srcPitch
-        size_t srcHeight
-        size_t dstXInBytes
-        size_t dstY
-        size_t dstZ
-        size_t dstLOD
-        CUmemorytype dstMemoryType
-        void* dstHost
-        CUdeviceptr dstDevice
-        CUarray dstArray
-        void* reserved1
-        size_t dstPitch
-        size_t dstHeight
-        size_t WidthInBytes
-        size_t Height
-        size_t Depth
-
-    ctypedef CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v2
-
-    ctypedef CUDA_MEMCPY3D_v2 CUDA_MEMCPY3D
-
-    cdef struct CUDA_MEMCPY3D_PEER_st:
-        size_t srcXInBytes
-        size_t srcY
-        size_t srcZ
-        size_t srcLOD
-        CUmemorytype srcMemoryType
-        const void* srcHost
-        CUdeviceptr srcDevice
-        CUarray srcArray
-        CUcontext srcContext
-        size_t srcPitch
-        size_t srcHeight
-        size_t dstXInBytes
-        size_t dstY
-        size_t dstZ
-        size_t dstLOD
-        CUmemorytype dstMemoryType
-        void* dstHost
-        CUdeviceptr dstDevice
-        CUarray dstArray
-        CUcontext dstContext
-        size_t dstPitch
-        size_t dstHeight
-        size_t WidthInBytes
-        size_t Height
-        size_t Depth
-
-    ctypedef CUDA_MEMCPY3D_PEER_st CUDA_MEMCPY3D_PEER_v1
-
-    ctypedef CUDA_MEMCPY3D_PEER_v1 CUDA_MEMCPY3D_PEER
-
-    cdef struct CUDA_MEMCPY_NODE_PARAMS_st:
-        int flags
-        int reserved
-        CUcontext copyCtx
-        CUDA_MEMCPY3D copyParams
-
-    ctypedef CUDA_MEMCPY_NODE_PARAMS_st CUDA_MEMCPY_NODE_PARAMS
-
-    cdef struct CUDA_ARRAY_DESCRIPTOR_st:
-        size_t Width
-        size_t Height
-        CUarray_format Format
-        unsigned int NumChannels
-
-    ctypedef CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v2
-
-    ctypedef CUDA_ARRAY_DESCRIPTOR_v2 CUDA_ARRAY_DESCRIPTOR
-
-    cdef struct CUDA_ARRAY3D_DESCRIPTOR_st:
-        size_t Width
-        size_t Height
-        size_t Depth
-        CUarray_format Format
-        unsigned int NumChannels
-        unsigned int Flags
-
-    ctypedef CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v2
-
-    ctypedef CUDA_ARRAY3D_DESCRIPTOR_v2 CUDA_ARRAY3D_DESCRIPTOR
-
-    cdef struct anon_struct5:
-        unsigned int width
-        unsigned int height
-        unsigned int depth
-
-    cdef struct CUDA_ARRAY_SPARSE_PROPERTIES_st:
-        anon_struct5 tileExtent
-        unsigned int miptailFirstLevel
-        unsigned long long miptailSize
-        unsigned int flags
-        unsigned int reserved[4]
-
-    ctypedef CUDA_ARRAY_SPARSE_PROPERTIES_st CUDA_ARRAY_SPARSE_PROPERTIES_v1
-
-    ctypedef CUDA_ARRAY_SPARSE_PROPERTIES_v1 CUDA_ARRAY_SPARSE_PROPERTIES
-
-    cdef struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st:
-        size_t size
-        size_t alignment
-        unsigned int reserved[4]
-
-    ctypedef CUDA_ARRAY_MEMORY_REQUIREMENTS_st CUDA_ARRAY_MEMORY_REQUIREMENTS_v1
-
-    ctypedef CUDA_ARRAY_MEMORY_REQUIREMENTS_v1 CUDA_ARRAY_MEMORY_REQUIREMENTS
-
-    cdef struct anon_struct6:
-        CUarray hArray
-
-    cdef struct anon_struct7:
-        CUmipmappedArray hMipmappedArray
-
-    cdef struct anon_struct8:
-        CUdeviceptr devPtr
-        CUarray_format format
-        unsigned int numChannels
-        size_t sizeInBytes
-
-    cdef struct anon_struct9:
-        CUdeviceptr devPtr
-        CUarray_format format
-        unsigned int numChannels
-        size_t width
-        size_t height
-        size_t pitchInBytes
-
-    cdef struct anon_struct10:
-        int reserved[32]
-
-    cdef union anon_union4:
-        anon_struct6 array
-        anon_struct7 mipmap
-        anon_struct8 linear
-        anon_struct9 pitch2D
-        anon_struct10 reserved
-
-    cdef struct CUDA_RESOURCE_DESC_st:
-        CUresourcetype resType
-        anon_union4 res
-        unsigned int flags
-
-    ctypedef CUDA_RESOURCE_DESC_st CUDA_RESOURCE_DESC_v1
-
-    ctypedef CUDA_RESOURCE_DESC_v1 CUDA_RESOURCE_DESC
-
-    cdef struct CUDA_TEXTURE_DESC_st:
-        CUaddress_mode addressMode[3]
-        CUfilter_mode filterMode
-        unsigned int flags
-        unsigned int maxAnisotropy
-        CUfilter_mode mipmapFilterMode
-        float mipmapLevelBias
-        float minMipmapLevelClamp
-        float maxMipmapLevelClamp
-        float borderColor[4]
-        int reserved[12]
-
-    ctypedef CUDA_TEXTURE_DESC_st CUDA_TEXTURE_DESC_v1
-
-    ctypedef CUDA_TEXTURE_DESC_v1 CUDA_TEXTURE_DESC
-
-    cdef enum CUresourceViewFormat_enum:
-        CU_RES_VIEW_FORMAT_NONE = 0
-        CU_RES_VIEW_FORMAT_UINT_1X8 = 1
-        CU_RES_VIEW_FORMAT_UINT_2X8 = 2
-        CU_RES_VIEW_FORMAT_UINT_4X8 = 3
-        CU_RES_VIEW_FORMAT_SINT_1X8 = 4
-        CU_RES_VIEW_FORMAT_SINT_2X8 = 5
-        CU_RES_VIEW_FORMAT_SINT_4X8 = 6
-        CU_RES_VIEW_FORMAT_UINT_1X16 = 7
-        CU_RES_VIEW_FORMAT_UINT_2X16 = 8
-        CU_RES_VIEW_FORMAT_UINT_4X16 = 9
-        CU_RES_VIEW_FORMAT_SINT_1X16 = 10
-        CU_RES_VIEW_FORMAT_SINT_2X16 = 11
-        CU_RES_VIEW_FORMAT_SINT_4X16 = 12
-        CU_RES_VIEW_FORMAT_UINT_1X32 = 13
-        CU_RES_VIEW_FORMAT_UINT_2X32 = 14
-        CU_RES_VIEW_FORMAT_UINT_4X32 = 15
-        CU_RES_VIEW_FORMAT_SINT_1X32 = 16
-        CU_RES_VIEW_FORMAT_SINT_2X32 = 17
-        CU_RES_VIEW_FORMAT_SINT_4X32 = 18
-        CU_RES_VIEW_FORMAT_FLOAT_1X16 = 19
-        CU_RES_VIEW_FORMAT_FLOAT_2X16 = 20
-        CU_RES_VIEW_FORMAT_FLOAT_4X16 = 21
-        CU_RES_VIEW_FORMAT_FLOAT_1X32 = 22
-        CU_RES_VIEW_FORMAT_FLOAT_2X32 = 23
-        CU_RES_VIEW_FORMAT_FLOAT_4X32 = 24
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 25
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 26
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 27
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 28
-        CU_RES_VIEW_FORMAT_SIGNED_BC4 = 29
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 30
-        CU_RES_VIEW_FORMAT_SIGNED_BC5 = 31
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 32
-        CU_RES_VIEW_FORMAT_SIGNED_BC6H = 33
-        CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 34
-
-    ctypedef CUresourceViewFormat_enum CUresourceViewFormat
-
-    cdef struct CUDA_RESOURCE_VIEW_DESC_st:
-        CUresourceViewFormat format
-        size_t width
-        size_t height
-        size_t depth
-        unsigned int firstMipmapLevel
-        unsigned int lastMipmapLevel
-        unsigned int firstLayer
-        unsigned int lastLayer
-        unsigned int reserved[16]
-
-    ctypedef CUDA_RESOURCE_VIEW_DESC_st CUDA_RESOURCE_VIEW_DESC_v1
-
-    ctypedef CUDA_RESOURCE_VIEW_DESC_v1 CUDA_RESOURCE_VIEW_DESC
-
-    cdef struct CUtensorMap_st:
-        cuuint64_t opaque[16]
-
-    ctypedef CUtensorMap_st CUtensorMap
-
-    cdef enum CUtensorMapDataType_enum:
-        CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0
-        CU_TENSOR_MAP_DATA_TYPE_UINT16 = 1
-        CU_TENSOR_MAP_DATA_TYPE_UINT32 = 2
-        CU_TENSOR_MAP_DATA_TYPE_INT32 = 3
-        CU_TENSOR_MAP_DATA_TYPE_UINT64 = 4
-        CU_TENSOR_MAP_DATA_TYPE_INT64 = 5
-        CU_TENSOR_MAP_DATA_TYPE_FLOAT16 = 6
-        CU_TENSOR_MAP_DATA_TYPE_FLOAT32 = 7
-        CU_TENSOR_MAP_DATA_TYPE_FLOAT64 = 8
-        CU_TENSOR_MAP_DATA_TYPE_BFLOAT16 = 9
-        CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ = 10
-        CU_TENSOR_MAP_DATA_TYPE_TFLOAT32 = 11
-        CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ = 12
-
-    ctypedef CUtensorMapDataType_enum CUtensorMapDataType
-
-    cdef enum CUtensorMapInterleave_enum:
-        CU_TENSOR_MAP_INTERLEAVE_NONE = 0
-        CU_TENSOR_MAP_INTERLEAVE_16B = 1
-        CU_TENSOR_MAP_INTERLEAVE_32B = 2
-
-    ctypedef CUtensorMapInterleave_enum CUtensorMapInterleave
-
-    cdef enum CUtensorMapSwizzle_enum:
-        CU_TENSOR_MAP_SWIZZLE_NONE = 0
-        CU_TENSOR_MAP_SWIZZLE_32B = 1
-        CU_TENSOR_MAP_SWIZZLE_64B = 2
-        CU_TENSOR_MAP_SWIZZLE_128B = 3
-
-    ctypedef CUtensorMapSwizzle_enum CUtensorMapSwizzle
-
-    cdef enum CUtensorMapL2promotion_enum:
-        CU_TENSOR_MAP_L2_PROMOTION_NONE = 0
-        CU_TENSOR_MAP_L2_PROMOTION_L2_64B = 1
-        CU_TENSOR_MAP_L2_PROMOTION_L2_128B = 2
-        CU_TENSOR_MAP_L2_PROMOTION_L2_256B = 3
-
-    ctypedef CUtensorMapL2promotion_enum CUtensorMapL2promotion
-
-    cdef enum CUtensorMapFloatOOBfill_enum:
-        CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0
-        CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA = 1
-
-    ctypedef CUtensorMapFloatOOBfill_enum CUtensorMapFloatOOBfill
-
-    cdef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st:
-        unsigned long long p2pToken
-        unsigned int vaSpaceToken
-
-    ctypedef CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1
-
-    ctypedef CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1 CUDA_POINTER_ATTRIBUTE_P2P_TOKENS
-
-    cdef enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum:
-        CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0
-        CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 1
-        CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 3
-
-    ctypedef CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS
-
-    cdef struct CUDA_LAUNCH_PARAMS_st:
-        CUfunction function
-        unsigned int gridDimX
-        unsigned int gridDimY
-        unsigned int gridDimZ
-        unsigned int blockDimX
-        unsigned int blockDimY
-        unsigned int blockDimZ
-        unsigned int sharedMemBytes
-        CUstream hStream
-        void** kernelParams
-
-    ctypedef CUDA_LAUNCH_PARAMS_st CUDA_LAUNCH_PARAMS_v1
-
-    ctypedef CUDA_LAUNCH_PARAMS_v1 CUDA_LAUNCH_PARAMS
-
-    cdef enum CUexternalMemoryHandleType_enum:
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7
-        CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8
-
-    ctypedef CUexternalMemoryHandleType_enum CUexternalMemoryHandleType
-
-    cdef struct anon_struct11:
-        void* handle
-        const void* name
-
-    cdef union anon_union5:
-        int fd
-        anon_struct11 win32
-        const void* nvSciBufObject
-
-    cdef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st:
-        CUexternalMemoryHandleType type
-        anon_union5 handle
-        unsigned long long size
-        unsigned int flags
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1
-
-    ctypedef CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 CUDA_EXTERNAL_MEMORY_HANDLE_DESC
-
-    cdef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st:
-        unsigned long long offset
-        unsigned long long size
-        unsigned int flags
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1
-
-    ctypedef CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 CUDA_EXTERNAL_MEMORY_BUFFER_DESC
-
-    cdef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st:
-        unsigned long long offset
-        CUDA_ARRAY3D_DESCRIPTOR arrayDesc
-        unsigned int numLevels
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1
-
-    ctypedef CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC
-
-    cdef enum CUexternalSemaphoreHandleType_enum:
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
-
-    ctypedef CUexternalSemaphoreHandleType_enum CUexternalSemaphoreHandleType
-
-    cdef struct anon_struct12:
-        void* handle
-        const void* name
-
-    cdef union anon_union6:
-        int fd
-        anon_struct12 win32
-        const void* nvSciSyncObj
-
-    cdef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st:
-        CUexternalSemaphoreHandleType type
-        anon_union6 handle
-        unsigned int flags
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC
-
-    cdef struct anon_struct13:
-        unsigned long long value
-
-    cdef union anon_union7:
-        void* fence
-        unsigned long long reserved
-
-    cdef struct anon_struct14:
-        unsigned long long key
-
-    cdef struct anon_struct15:
-        anon_struct13 fence
-        anon_union7 nvSciSync
-        anon_struct14 keyedMutex
-        unsigned int reserved[12]
-
-    cdef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st:
-        anon_struct15 params
-        unsigned int flags
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-
-    cdef struct anon_struct16:
-        unsigned long long value
-
-    cdef union anon_union8:
-        void* fence
-        unsigned long long reserved
-
-    cdef struct anon_struct17:
-        unsigned long long key
-        unsigned int timeoutMs
-
-    cdef struct anon_struct18:
-        anon_struct16 fence
-        anon_union8 nvSciSync
-        anon_struct17 keyedMutex
-        unsigned int reserved[10]
-
-    cdef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st:
-        anon_struct18 params
-        unsigned int flags
-        unsigned int reserved[16]
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1
-
-    ctypedef CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-
-    cdef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st:
-        CUexternalSemaphore* extSemArray
-        const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray
-        unsigned int numExtSems
-
-    ctypedef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1
-
-    ctypedef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1 CUDA_EXT_SEM_SIGNAL_NODE_PARAMS
-
-    cdef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st:
-        CUexternalSemaphore* extSemArray
-        const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray
-        unsigned int numExtSems
-
-    ctypedef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2
-
-    cdef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st:
-        CUexternalSemaphore* extSemArray
-        const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray
-        unsigned int numExtSems
-
-    ctypedef CUDA_EXT_SEM_WAIT_NODE_PARAMS_st CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1
-
-    ctypedef CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1 CUDA_EXT_SEM_WAIT_NODE_PARAMS
-
-    cdef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st:
-        CUexternalSemaphore* extSemArray
-        const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray
-        unsigned int numExtSems
-
-    ctypedef CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2
-
-    ctypedef unsigned long long CUmemGenericAllocationHandle_v1
-
-    ctypedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle
-
-    cdef enum CUmemAllocationHandleType_enum:
-        CU_MEM_HANDLE_TYPE_NONE = 0
-        CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 1
-        CU_MEM_HANDLE_TYPE_WIN32 = 2
-        CU_MEM_HANDLE_TYPE_WIN32_KMT = 4
-        CU_MEM_HANDLE_TYPE_FABRIC = 8
-        CU_MEM_HANDLE_TYPE_MAX = 2147483647
-
-    ctypedef CUmemAllocationHandleType_enum CUmemAllocationHandleType
-
-    cdef enum CUmemAccess_flags_enum:
-        CU_MEM_ACCESS_FLAGS_PROT_NONE = 0
-        CU_MEM_ACCESS_FLAGS_PROT_READ = 1
-        CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 3
-        CU_MEM_ACCESS_FLAGS_PROT_MAX = 2147483647
-
-    ctypedef CUmemAccess_flags_enum CUmemAccess_flags
-
-    cdef enum CUmemLocationType_enum:
-        CU_MEM_LOCATION_TYPE_INVALID = 0
-        CU_MEM_LOCATION_TYPE_DEVICE = 1
-        CU_MEM_LOCATION_TYPE_HOST = 2
-        CU_MEM_LOCATION_TYPE_HOST_NUMA = 3
-        CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT = 4
-        CU_MEM_LOCATION_TYPE_MAX = 2147483647
-
-    ctypedef CUmemLocationType_enum CUmemLocationType
-
-    cdef enum CUmemAllocationType_enum:
-        CU_MEM_ALLOCATION_TYPE_INVALID = 0
-        CU_MEM_ALLOCATION_TYPE_PINNED = 1
-        CU_MEM_ALLOCATION_TYPE_MAX = 2147483647
-
-    ctypedef CUmemAllocationType_enum CUmemAllocationType
-
-    cdef enum CUmemAllocationGranularity_flags_enum:
-        CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0
-        CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 1
-
-    ctypedef CUmemAllocationGranularity_flags_enum CUmemAllocationGranularity_flags
-
-    cdef enum CUmemRangeHandleType_enum:
-        CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = 1
-        CU_MEM_RANGE_HANDLE_TYPE_MAX = 2147483647
-
-    ctypedef CUmemRangeHandleType_enum CUmemRangeHandleType
-
-    cdef enum CUarraySparseSubresourceType_enum:
-        CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0
-        CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
-
-    ctypedef CUarraySparseSubresourceType_enum CUarraySparseSubresourceType
-
-    cdef enum CUmemOperationType_enum:
-        CU_MEM_OPERATION_TYPE_MAP = 1
-        CU_MEM_OPERATION_TYPE_UNMAP = 2
-
-    ctypedef CUmemOperationType_enum CUmemOperationType
-
-    cdef enum CUmemHandleType_enum:
-        CU_MEM_HANDLE_TYPE_GENERIC = 0
-
-    ctypedef CUmemHandleType_enum CUmemHandleType
-
-    cdef union anon_union9:
-        CUmipmappedArray mipmap
-        CUarray array
-
-    cdef struct anon_struct19:
-        unsigned int level
-        unsigned int layer
-        unsigned int offsetX
-        unsigned int offsetY
-        unsigned int offsetZ
-        unsigned int extentWidth
-        unsigned int extentHeight
-        unsigned int extentDepth
-
-    cdef struct anon_struct20:
-        unsigned int layer
-        unsigned long long offset
-        unsigned long long size
-
-    cdef union anon_union10:
-        anon_struct19 sparseLevel
-        anon_struct20 miptail
-
-    cdef union anon_union11:
-        CUmemGenericAllocationHandle memHandle
-
-    cdef struct CUarrayMapInfo_st:
-        CUresourcetype resourceType
-        anon_union9 resource
-        CUarraySparseSubresourceType subresourceType
-        anon_union10 subresource
-        CUmemOperationType memOperationType
-        CUmemHandleType memHandleType
-        anon_union11 memHandle
-        unsigned long long offset
-        unsigned int deviceBitMask
-        unsigned int flags
-        unsigned int reserved[2]
-
-    ctypedef CUarrayMapInfo_st CUarrayMapInfo_v1
-
-    ctypedef CUarrayMapInfo_v1 CUarrayMapInfo
-
-    cdef struct CUmemLocation_st:
-        CUmemLocationType type
-        int id
-
-    ctypedef CUmemLocation_st CUmemLocation_v1
-
-    ctypedef CUmemLocation_v1 CUmemLocation
-
-    cdef enum CUmemAllocationCompType_enum:
-        CU_MEM_ALLOCATION_COMP_NONE = 0
-        CU_MEM_ALLOCATION_COMP_GENERIC = 1
-
-    ctypedef CUmemAllocationCompType_enum CUmemAllocationCompType
-
-    cdef struct anon_struct21:
-        unsigned char compressionType
-        unsigned char gpuDirectRDMACapable
-        unsigned short usage
-        unsigned char reserved[4]
-
-    cdef struct CUmemAllocationProp_st:
-        CUmemAllocationType type
-        CUmemAllocationHandleType requestedHandleTypes
-        CUmemLocation location
-        void* win32HandleMetaData
-        anon_struct21 allocFlags
-
-    ctypedef CUmemAllocationProp_st CUmemAllocationProp_v1
-
-    ctypedef CUmemAllocationProp_v1 CUmemAllocationProp
-
-    cdef enum CUmulticastGranularity_flags_enum:
-        CU_MULTICAST_GRANULARITY_MINIMUM = 0
-        CU_MULTICAST_GRANULARITY_RECOMMENDED = 1
-
-    ctypedef CUmulticastGranularity_flags_enum CUmulticastGranularity_flags
-
-    cdef struct CUmulticastObjectProp_st:
-        unsigned int numDevices
-        size_t size
-        unsigned long long handleTypes
-        unsigned long long flags
-
-    ctypedef CUmulticastObjectProp_st CUmulticastObjectProp_v1
-
-    ctypedef CUmulticastObjectProp_v1 CUmulticastObjectProp
-
-    cdef struct CUmemAccessDesc_st:
-        CUmemLocation location
-        CUmemAccess_flags flags
-
-    ctypedef CUmemAccessDesc_st CUmemAccessDesc_v1
-
-    ctypedef CUmemAccessDesc_v1 CUmemAccessDesc
-
-    cdef enum CUgraphExecUpdateResult_enum:
-        CU_GRAPH_EXEC_UPDATE_SUCCESS = 0
-        CU_GRAPH_EXEC_UPDATE_ERROR = 1
-        CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 2
-        CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 3
-        CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 4
-        CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 5
-        CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 6
-        CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 7
-        CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED = 8
-
-    ctypedef CUgraphExecUpdateResult_enum CUgraphExecUpdateResult
-
-    cdef struct CUgraphExecUpdateResultInfo_st:
-        CUgraphExecUpdateResult result
-        CUgraphNode errorNode
-        CUgraphNode errorFromNode
-
-    ctypedef CUgraphExecUpdateResultInfo_st CUgraphExecUpdateResultInfo_v1
-
-    ctypedef CUgraphExecUpdateResultInfo_v1 CUgraphExecUpdateResultInfo
-
-    cdef enum CUmemPool_attribute_enum:
-        CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1
-        CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC = 2
-        CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES = 3
-        CU_MEMPOOL_ATTR_RELEASE_THRESHOLD = 4
-        CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT = 5
-        CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH = 6
-        CU_MEMPOOL_ATTR_USED_MEM_CURRENT = 7
-        CU_MEMPOOL_ATTR_USED_MEM_HIGH = 8
-
-    ctypedef CUmemPool_attribute_enum CUmemPool_attribute
-
-    cdef struct CUmemPoolProps_st:
-        CUmemAllocationType allocType
-        CUmemAllocationHandleType handleTypes
-        CUmemLocation location
-        void* win32SecurityAttributes
-        size_t maxSize
-        unsigned short usage
-        unsigned char reserved[54]
-
-    ctypedef CUmemPoolProps_st CUmemPoolProps_v1
-
-    ctypedef CUmemPoolProps_v1 CUmemPoolProps
-
-    cdef struct CUmemPoolPtrExportData_st:
-        unsigned char reserved[64]
-
-    ctypedef CUmemPoolPtrExportData_st CUmemPoolPtrExportData_v1
-
-    ctypedef CUmemPoolPtrExportData_v1 CUmemPoolPtrExportData
-
-    cdef struct CUDA_MEM_ALLOC_NODE_PARAMS_v1_st:
-        CUmemPoolProps poolProps
-        const CUmemAccessDesc* accessDescs
-        size_t accessDescCount
-        size_t bytesize
-        CUdeviceptr dptr
-
-    ctypedef CUDA_MEM_ALLOC_NODE_PARAMS_v1_st CUDA_MEM_ALLOC_NODE_PARAMS_v1
-
-    ctypedef CUDA_MEM_ALLOC_NODE_PARAMS_v1 CUDA_MEM_ALLOC_NODE_PARAMS
-
-    cdef struct CUDA_MEM_ALLOC_NODE_PARAMS_v2_st:
-        CUmemPoolProps poolProps
-        const CUmemAccessDesc* accessDescs
-        size_t accessDescCount
-        size_t bytesize
-        CUdeviceptr dptr
-
-    ctypedef CUDA_MEM_ALLOC_NODE_PARAMS_v2_st CUDA_MEM_ALLOC_NODE_PARAMS_v2
-
-    cdef struct CUDA_MEM_FREE_NODE_PARAMS_st:
-        CUdeviceptr dptr
-
-    ctypedef CUDA_MEM_FREE_NODE_PARAMS_st CUDA_MEM_FREE_NODE_PARAMS
-
-    cdef enum CUgraphMem_attribute_enum:
-        CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT = 0
-        CU_GRAPH_MEM_ATTR_USED_MEM_HIGH = 1
-        CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT = 2
-        CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH = 3
-
-    ctypedef CUgraphMem_attribute_enum CUgraphMem_attribute
-
-    cdef struct CUDA_CHILD_GRAPH_NODE_PARAMS_st:
-        CUgraph graph
-
-    ctypedef CUDA_CHILD_GRAPH_NODE_PARAMS_st CUDA_CHILD_GRAPH_NODE_PARAMS
-
-    cdef struct CUDA_EVENT_RECORD_NODE_PARAMS_st:
-        CUevent event
-
-    ctypedef CUDA_EVENT_RECORD_NODE_PARAMS_st CUDA_EVENT_RECORD_NODE_PARAMS
-
-    cdef struct CUDA_EVENT_WAIT_NODE_PARAMS_st:
-        CUevent event
-
-    ctypedef CUDA_EVENT_WAIT_NODE_PARAMS_st CUDA_EVENT_WAIT_NODE_PARAMS
-
-    cdef struct CUgraphNodeParams_st:
-        CUgraphNodeType type
-        int reserved0[3]
-        long long reserved1[29]
-        CUDA_KERNEL_NODE_PARAMS_v3 kernel
-        CUDA_MEMCPY_NODE_PARAMS memcpy
-        CUDA_MEMSET_NODE_PARAMS_v2 memset
-        CUDA_HOST_NODE_PARAMS_v2 host
-        CUDA_CHILD_GRAPH_NODE_PARAMS graph
-        CUDA_EVENT_WAIT_NODE_PARAMS eventWait
-        CUDA_EVENT_RECORD_NODE_PARAMS eventRecord
-        CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2 extSemSignal
-        CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2 extSemWait
-        CUDA_MEM_ALLOC_NODE_PARAMS_v2 alloc
-        CUDA_MEM_FREE_NODE_PARAMS free
-        CUDA_BATCH_MEM_OP_NODE_PARAMS_v2 memOp
-        CUDA_CONDITIONAL_NODE_PARAMS conditional
-        long long reserved2
-
-    ctypedef CUgraphNodeParams_st CUgraphNodeParams
-
-    cdef enum CUflushGPUDirectRDMAWritesOptions_enum:
-        CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1
-        CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 2
-
-    ctypedef CUflushGPUDirectRDMAWritesOptions_enum CUflushGPUDirectRDMAWritesOptions
-
-    cdef enum CUGPUDirectRDMAWritesOrdering_enum:
-        CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0
-        CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100
-        CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200
-
-    ctypedef CUGPUDirectRDMAWritesOrdering_enum CUGPUDirectRDMAWritesOrdering
-
-    cdef enum CUflushGPUDirectRDMAWritesScope_enum:
-        CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100
-        CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200
-
-    ctypedef CUflushGPUDirectRDMAWritesScope_enum CUflushGPUDirectRDMAWritesScope
-
-    cdef enum CUflushGPUDirectRDMAWritesTarget_enum:
-        CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0
-
-    ctypedef CUflushGPUDirectRDMAWritesTarget_enum CUflushGPUDirectRDMAWritesTarget
-
-    cdef enum CUgraphDebugDot_flags_enum:
-        CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1
-        CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 2
-        CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 4
-        CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 8
-        CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 16
-        CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 32
-        CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 64
-        CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 128
-        CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 256
-        CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 512
-        CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1024
-        CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 2048
-        CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 4096
-        CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = 8192
-        CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO = 16384
-        CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS = 32768
-
-    ctypedef CUgraphDebugDot_flags_enum CUgraphDebugDot_flags
-
-    cdef enum CUuserObject_flags_enum:
-        CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1
-
-    ctypedef CUuserObject_flags_enum CUuserObject_flags
-
-    cdef enum CUuserObjectRetain_flags_enum:
-        CU_GRAPH_USER_OBJECT_MOVE = 1
-
-    ctypedef CUuserObjectRetain_flags_enum CUuserObjectRetain_flags
-
-    cdef enum CUgraphInstantiate_flags_enum:
-        CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1
-        CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD = 2
-        CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH = 4
-        CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = 8
-
-    ctypedef CUgraphInstantiate_flags_enum CUgraphInstantiate_flags
-
-    cdef enum CUdeviceNumaConfig_enum:
-        CU_DEVICE_NUMA_CONFIG_NONE = 0
-        CU_DEVICE_NUMA_CONFIG_NUMA_NODE = 1
-
-    ctypedef CUdeviceNumaConfig_enum CUdeviceNumaConfig
-
-    cdef enum CUmoduleLoadingMode_enum:
-        CU_MODULE_EAGER_LOADING = 1
-        CU_MODULE_LAZY_LOADING = 2
-
-    ctypedef CUmoduleLoadingMode_enum CUmoduleLoadingMode
-
-    cdef enum CUfunctionLoadingState_enum:
-        CU_FUNCTION_LOADING_STATE_UNLOADED = 0
-        CU_FUNCTION_LOADING_STATE_LOADED = 1
-        CU_FUNCTION_LOADING_STATE_MAX = 2
-
-    ctypedef CUfunctionLoadingState_enum CUfunctionLoadingState
-
-    cdef enum CUcoredumpSettings_enum:
-        CU_COREDUMP_ENABLE_ON_EXCEPTION = 1
-        CU_COREDUMP_TRIGGER_HOST = 2
-        CU_COREDUMP_LIGHTWEIGHT = 3
-        CU_COREDUMP_ENABLE_USER_TRIGGER = 4
-        CU_COREDUMP_FILE = 5
-        CU_COREDUMP_PIPE = 6
-        CU_COREDUMP_GENERATION_FLAGS = 7
-        CU_COREDUMP_MAX = 8
-
-    ctypedef CUcoredumpSettings_enum CUcoredumpSettings
-
-    cdef enum CUCoredumpGenerationFlags:
-        CU_COREDUMP_DEFAULT_FLAGS = 0
-        CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = 1
-        CU_COREDUMP_SKIP_GLOBAL_MEMORY = 2
-        CU_COREDUMP_SKIP_SHARED_MEMORY = 4
-        CU_COREDUMP_SKIP_LOCAL_MEMORY = 8
-        CU_COREDUMP_SKIP_ABORT = 16
-        CU_COREDUMP_SKIP_CONSTBANK_MEMORY = 32
-        CU_COREDUMP_LIGHTWEIGHT_FLAGS = 47
-
-    cdef struct CUdevResourceDesc_st:
-        pass
-    ctypedef CUdevResourceDesc_st* CUdevResourceDesc
-
-    ctypedef enum CUgreenCtxCreate_flags:
-        CU_GREEN_CTX_DEFAULT_STREAM = 1
-
-    ctypedef enum CUdevSmResourceSplit_flags:
-        CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = 1
-        CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = 2
-
-    ctypedef enum CUdevResourceType:
-        CU_DEV_RESOURCE_TYPE_INVALID = 0
-        CU_DEV_RESOURCE_TYPE_SM = 1
-
-    cdef struct CUdevSmResource_st:
-        unsigned int smCount
-
-    ctypedef CUdevSmResource_st CUdevSmResource
-
-    cdef struct CUdevResource_st:
-        CUdevResourceType type
-        unsigned char _internal_padding[92]
-        CUdevSmResource sm
-        unsigned char _oversize[48]
-
-    ctypedef CUdevResource_st CUdevResource_v1
-
-    ctypedef CUdevResource_v1 CUdevResource
-
-cdef extern from "cudaProfiler.h":
-
-    cdef enum CUoutput_mode_enum:
-        CU_OUT_KEY_VALUE_PAIR = 0
-        CU_OUT_CSV = 1
-
-    ctypedef CUoutput_mode_enum CUoutput_mode
-
-cdef enum CUeglFrameType_enum:
-    CU_EGL_FRAME_TYPE_ARRAY = 0
-    CU_EGL_FRAME_TYPE_PITCH = 1
-
-ctypedef CUeglFrameType_enum CUeglFrameType
-
-cdef enum CUeglResourceLocationFlags_enum:
-    CU_EGL_RESOURCE_LOCATION_SYSMEM = 0
-    CU_EGL_RESOURCE_LOCATION_VIDMEM = 1
-
-ctypedef CUeglResourceLocationFlags_enum CUeglResourceLocationFlags
-
-cdef enum CUeglColorFormat_enum:
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR = 0
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR = 1
-    CU_EGL_COLOR_FORMAT_YUV422_PLANAR = 2
-    CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR = 3
-    CU_EGL_COLOR_FORMAT_RGB = 4
-    CU_EGL_COLOR_FORMAT_BGR = 5
-    CU_EGL_COLOR_FORMAT_ARGB = 6
-    CU_EGL_COLOR_FORMAT_RGBA = 7
-    CU_EGL_COLOR_FORMAT_L = 8
-    CU_EGL_COLOR_FORMAT_R = 9
-    CU_EGL_COLOR_FORMAT_YUV444_PLANAR = 10
-    CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR = 11
-    CU_EGL_COLOR_FORMAT_YUYV_422 = 12
-    CU_EGL_COLOR_FORMAT_UYVY_422 = 13
-    CU_EGL_COLOR_FORMAT_ABGR = 14
-    CU_EGL_COLOR_FORMAT_BGRA = 15
-    CU_EGL_COLOR_FORMAT_A = 16
-    CU_EGL_COLOR_FORMAT_RG = 17
-    CU_EGL_COLOR_FORMAT_AYUV = 18
-    CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR = 19
-    CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR = 20
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR = 21
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR = 22
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR = 23
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR = 24
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR = 25
-    CU_EGL_COLOR_FORMAT_VYUY_ER = 26
-    CU_EGL_COLOR_FORMAT_UYVY_ER = 27
-    CU_EGL_COLOR_FORMAT_YUYV_ER = 28
-    CU_EGL_COLOR_FORMAT_YVYU_ER = 29
-    CU_EGL_COLOR_FORMAT_YUV_ER = 30
-    CU_EGL_COLOR_FORMAT_YUVA_ER = 31
-    CU_EGL_COLOR_FORMAT_AYUV_ER = 32
-    CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER = 33
-    CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER = 34
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER = 35
-    CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER = 36
-    CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER = 37
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER = 38
-    CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER = 39
-    CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER = 40
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER = 41
-    CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER = 42
-    CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER = 43
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER = 44
-    CU_EGL_COLOR_FORMAT_BAYER_RGGB = 45
-    CU_EGL_COLOR_FORMAT_BAYER_BGGR = 46
-    CU_EGL_COLOR_FORMAT_BAYER_GRBG = 47
-    CU_EGL_COLOR_FORMAT_BAYER_GBRG = 48
-    CU_EGL_COLOR_FORMAT_BAYER10_RGGB = 49
-    CU_EGL_COLOR_FORMAT_BAYER10_BGGR = 50
-    CU_EGL_COLOR_FORMAT_BAYER10_GRBG = 51
-    CU_EGL_COLOR_FORMAT_BAYER10_GBRG = 52
-    CU_EGL_COLOR_FORMAT_BAYER12_RGGB = 53
-    CU_EGL_COLOR_FORMAT_BAYER12_BGGR = 54
-    CU_EGL_COLOR_FORMAT_BAYER12_GRBG = 55
-    CU_EGL_COLOR_FORMAT_BAYER12_GBRG = 56
-    CU_EGL_COLOR_FORMAT_BAYER14_RGGB = 57
-    CU_EGL_COLOR_FORMAT_BAYER14_BGGR = 58
-    CU_EGL_COLOR_FORMAT_BAYER14_GRBG = 59
-    CU_EGL_COLOR_FORMAT_BAYER14_GBRG = 60
-    CU_EGL_COLOR_FORMAT_BAYER20_RGGB = 61
-    CU_EGL_COLOR_FORMAT_BAYER20_BGGR = 62
-    CU_EGL_COLOR_FORMAT_BAYER20_GRBG = 63
-    CU_EGL_COLOR_FORMAT_BAYER20_GBRG = 64
-    CU_EGL_COLOR_FORMAT_YVU444_PLANAR = 65
-    CU_EGL_COLOR_FORMAT_YVU422_PLANAR = 66
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR = 67
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB = 68
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR = 69
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG = 70
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG = 71
-    CU_EGL_COLOR_FORMAT_BAYER_BCCR = 72
-    CU_EGL_COLOR_FORMAT_BAYER_RCCB = 73
-    CU_EGL_COLOR_FORMAT_BAYER_CRBC = 74
-    CU_EGL_COLOR_FORMAT_BAYER_CBRC = 75
-    CU_EGL_COLOR_FORMAT_BAYER10_CCCC = 76
-    CU_EGL_COLOR_FORMAT_BAYER12_BCCR = 77
-    CU_EGL_COLOR_FORMAT_BAYER12_RCCB = 78
-    CU_EGL_COLOR_FORMAT_BAYER12_CRBC = 79
-    CU_EGL_COLOR_FORMAT_BAYER12_CBRC = 80
-    CU_EGL_COLOR_FORMAT_BAYER12_CCCC = 81
-    CU_EGL_COLOR_FORMAT_Y = 82
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020 = 83
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020 = 84
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020 = 85
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020 = 86
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709 = 87
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709 = 88
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709 = 89
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709 = 90
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709 = 91
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020 = 92
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020 = 93
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR = 94
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709 = 95
-    CU_EGL_COLOR_FORMAT_Y_ER = 96
-    CU_EGL_COLOR_FORMAT_Y_709_ER = 97
-    CU_EGL_COLOR_FORMAT_Y10_ER = 98
-    CU_EGL_COLOR_FORMAT_Y10_709_ER = 99
-    CU_EGL_COLOR_FORMAT_Y12_ER = 100
-    CU_EGL_COLOR_FORMAT_Y12_709_ER = 101
-    CU_EGL_COLOR_FORMAT_YUVA = 102
-    CU_EGL_COLOR_FORMAT_YUV = 103
-    CU_EGL_COLOR_FORMAT_YVYU = 104
-    CU_EGL_COLOR_FORMAT_VYUY = 105
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER = 106
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER = 107
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER = 108
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER = 109
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER = 110
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER = 111
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER = 112
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER = 113
-    CU_EGL_COLOR_FORMAT_MAX = 114
-
-ctypedef CUeglColorFormat_enum CUeglColorFormat
-
-cdef union anon_union14:
-    CUarray pArray[3]
-    void* pPitch[3]
-
-cdef struct CUeglFrame_st:
-    anon_union14 frame
-    unsigned int width
-    unsigned int height
-    unsigned int depth
-    unsigned int pitch
-    unsigned int planeCount
-    unsigned int numChannels
-    CUeglFrameType frameType
-    CUeglColorFormat eglColorFormat
-    CUarray_format cuFormat
-
-ctypedef CUeglFrame_st CUeglFrame_v1
-
-ctypedef CUeglFrame_v1 CUeglFrame
-
-cdef extern from "":
-    cdef struct CUeglStreamConnection_st:
-        pass
-ctypedef CUeglStreamConnection_st* CUeglStreamConnection
-
-cdef enum CUGLDeviceList_enum:
-    CU_GL_DEVICE_LIST_ALL = 1
-    CU_GL_DEVICE_LIST_CURRENT_FRAME = 2
-    CU_GL_DEVICE_LIST_NEXT_FRAME = 3
-
-ctypedef CUGLDeviceList_enum CUGLDeviceList
-
-cdef enum CUGLmap_flags_enum:
-    CU_GL_MAP_RESOURCE_FLAGS_NONE = 0
-    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 1
-    CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 2
-
-ctypedef CUGLmap_flags_enum CUGLmap_flags
-
-ctypedef unsigned int GLenum
-
-ctypedef unsigned int GLuint
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLImageKHR
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLStreamKHR
-
-ctypedef unsigned int EGLint
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLSyncKHR
-
-ctypedef uint32_t VdpDevice
-
-ctypedef unsigned long long VdpGetProcAddress
-
-ctypedef uint32_t VdpVideoSurface
-
-ctypedef uint32_t VdpOutputSurface
-
-{{if 'cuGetErrorString' in found_functions}}
-
-cdef CUresult cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetErrorName' in found_functions}}
-
-cdef CUresult cuGetErrorName(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuInit' in found_functions}}
-
-cdef CUresult cuInit(unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDriverGetVersion' in found_functions}}
-
-cdef CUresult cuDriverGetVersion(int* driverVersion) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGet' in found_functions}}
-
-cdef CUresult cuDeviceGet(CUdevice* device, int ordinal) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetCount' in found_functions}}
-
-cdef CUresult cuDeviceGetCount(int* count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetName' in found_functions}}
-
-cdef CUresult cuDeviceGetName(char* name, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetUuid' in found_functions}}
-
-cdef CUresult cuDeviceGetUuid(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetUuid_v2' in found_functions}}
-
-cdef CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetLuid' in found_functions}}
-
-cdef CUresult cuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceTotalMem_v2' in found_functions}}
-
-cdef CUresult cuDeviceTotalMem(size_t* numbytes, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format pformat, unsigned numChannels, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef CUresult cuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceSetMemPool' in found_functions}}
-
-cdef CUresult cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetMemPool' in found_functions}}
-
-cdef CUresult cuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-
-cdef CUresult cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-
-cdef CUresult cuDeviceGetExecAffinitySupport(int* pi, CUexecAffinityType typename, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetProperties' in found_functions}}
-
-cdef CUresult cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceComputeCapability' in found_functions}}
-
-cdef CUresult cuDeviceComputeCapability(int* major, int* minor, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxRelease(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxReset(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v2' in found_functions}}
-
-cdef CUresult cuCtxCreate(CUcontext* pctx, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v3' in found_functions}}
-
-cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxCreate_v4' in found_functions}}
-
-cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDestroy_v2' in found_functions}}
-
-cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxPushCurrent_v2' in found_functions}}
-
-cdef CUresult cuCtxPushCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxPopCurrent_v2' in found_functions}}
-
-cdef CUresult cuCtxPopCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetCurrent' in found_functions}}
-
-cdef CUresult cuCtxSetCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetCurrent' in found_functions}}
-
-cdef CUresult cuCtxGetCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetDevice' in found_functions}}
-
-cdef CUresult cuCtxGetDevice(CUdevice* device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetFlags' in found_functions}}
-
-cdef CUresult cuCtxGetFlags(unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetFlags' in found_functions}}
-
-cdef CUresult cuCtxSetFlags(unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetId' in found_functions}}
-
-cdef CUresult cuCtxGetId(CUcontext ctx, unsigned long long* ctxId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSynchronize' in found_functions}}
-
-cdef CUresult cuCtxSynchronize() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetLimit' in found_functions}}
-
-cdef CUresult cuCtxSetLimit(CUlimit limit, size_t value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetLimit' in found_functions}}
-
-cdef CUresult cuCtxGetLimit(size_t* pvalue, CUlimit limit) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetCacheConfig' in found_functions}}
-
-cdef CUresult cuCtxGetCacheConfig(CUfunc_cache* pconfig) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetCacheConfig' in found_functions}}
-
-cdef CUresult cuCtxSetCacheConfig(CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetApiVersion' in found_functions}}
-
-cdef CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-
-cdef CUresult cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-
-cdef CUresult cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetExecAffinity' in found_functions}}
-
-cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxRecordEvent' in found_functions}}
-
-cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxWaitEvent' in found_functions}}
-
-cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxAttach' in found_functions}}
-
-cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDetach' in found_functions}}
-
-cdef CUresult cuCtxDetach(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuCtxSetSharedMemConfig(CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoad' in found_functions}}
-
-cdef CUresult cuModuleLoad(CUmodule* module, const char* fname) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadData' in found_functions}}
-
-cdef CUresult cuModuleLoadData(CUmodule* module, const void* image) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadDataEx' in found_functions}}
-
-cdef CUresult cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleLoadFatBinary' in found_functions}}
-
-cdef CUresult cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleUnload' in found_functions}}
-
-cdef CUresult cuModuleUnload(CUmodule hmod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetLoadingMode' in found_functions}}
-
-cdef CUresult cuModuleGetLoadingMode(CUmoduleLoadingMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetFunction' in found_functions}}
-
-cdef CUresult cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetFunctionCount' in found_functions}}
-
-cdef CUresult cuModuleGetFunctionCount(unsigned int* count, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleEnumerateFunctions' in found_functions}}
-
-cdef CUresult cuModuleEnumerateFunctions(CUfunction* functions, unsigned int numFunctions, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetGlobal_v2' in found_functions}}
-
-cdef CUresult cuModuleGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkCreate_v2' in found_functions}}
-
-cdef CUresult cuLinkCreate(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkAddData_v2' in found_functions}}
-
-cdef CUresult cuLinkAddData(CUlinkState state, CUjitInputType typename, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkAddFile_v2' in found_functions}}
-
-cdef CUresult cuLinkAddFile(CUlinkState state, CUjitInputType typename, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkComplete' in found_functions}}
-
-cdef CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLinkDestroy' in found_functions}}
-
-cdef CUresult cuLinkDestroy(CUlinkState state) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetTexRef' in found_functions}}
-
-cdef CUresult cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuModuleGetSurfRef' in found_functions}}
-
-cdef CUresult cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryLoadData' in found_functions}}
-
-cdef CUresult cuLibraryLoadData(CUlibrary* library, const void* code, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryLoadFromFile' in found_functions}}
-
-cdef CUresult cuLibraryLoadFromFile(CUlibrary* library, const char* fileName, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryUnload' in found_functions}}
-
-cdef CUresult cuLibraryUnload(CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetKernel' in found_functions}}
-
-cdef CUresult cuLibraryGetKernel(CUkernel* pKernel, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetKernelCount' in found_functions}}
-
-cdef CUresult cuLibraryGetKernelCount(unsigned int* count, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryEnumerateKernels' in found_functions}}
-
-cdef CUresult cuLibraryEnumerateKernels(CUkernel* kernels, unsigned int numKernels, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetModule' in found_functions}}
-
-cdef CUresult cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetFunction' in found_functions}}
-
-cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetLibrary' in found_functions}}
-
-cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetGlobal' in found_functions}}
-
-cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetManaged' in found_functions}}
-
-cdef CUresult cuLibraryGetManaged(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-
-cdef CUresult cuLibraryGetUnifiedFunction(void** fptr, CUlibrary library, const char* symbol) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetAttribute' in found_functions}}
-
-cdef CUresult cuKernelGetAttribute(int* pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelSetAttribute' in found_functions}}
-
-cdef CUresult cuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelSetCacheConfig' in found_functions}}
-
-cdef CUresult cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetName' in found_functions}}
-
-cdef CUresult cuKernelGetName(const char** name, CUkernel hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuKernelGetParamInfo' in found_functions}}
-
-cdef CUresult cuKernelGetParamInfo(CUkernel kernel, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetInfo_v2' in found_functions}}
-
-cdef CUresult cuMemGetInfo(size_t* free, size_t* total) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAlloc_v2' in found_functions}}
-
-cdef CUresult cuMemAlloc(CUdeviceptr* dptr, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocPitch_v2' in found_functions}}
-
-cdef CUresult cuMemAllocPitch(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFree_v2' in found_functions}}
-
-cdef CUresult cuMemFree(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAddressRange_v2' in found_functions}}
-
-cdef CUresult cuMemGetAddressRange(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocHost_v2' in found_functions}}
-
-cdef CUresult cuMemAllocHost(void** pp, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFreeHost' in found_functions}}
-
-cdef CUresult cuMemFreeHost(void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostAlloc' in found_functions}}
-
-cdef CUresult cuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-
-cdef CUresult cuMemHostGetDevicePointer(CUdeviceptr* pdptr, void* p, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostGetFlags' in found_functions}}
-
-cdef CUresult cuMemHostGetFlags(unsigned int* pFlags, void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocManaged' in found_functions}}
-
-cdef CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef CUresult cuDeviceRegisterAsyncNotification(CUdevice device, CUasyncCallback callbackFunc, void* userData, CUasyncCallbackHandle* callback) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef CUresult cuDeviceUnregisterAsyncNotification(CUdevice device, CUasyncCallbackHandle callback) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}
-
-cdef CUresult cuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetPCIBusId' in found_functions}}
-
-cdef CUresult cuDeviceGetPCIBusId(char* pciBusId, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcGetEventHandle' in found_functions}}
-
-cdef CUresult cuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcOpenEventHandle' in found_functions}}
-
-cdef CUresult cuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcGetMemHandle' in found_functions}}
-
-cdef CUresult cuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-
-cdef CUresult cuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuIpcCloseMemHandle' in found_functions}}
-
-cdef CUresult cuIpcCloseMemHandle(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostRegister_v2' in found_functions}}
-
-cdef CUresult cuMemHostRegister(void* p, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemHostUnregister' in found_functions}}
-
-cdef CUresult cuMemHostUnregister(void* p) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy' in found_functions}}
-
-cdef CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyPeer' in found_functions}}
-
-cdef CUresult cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoD(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoH_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoH(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoH_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoH(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2D_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2D(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2DUnaligned(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3D_v2' in found_functions}}
-
-cdef CUresult cuMemcpy3D(const CUDA_MEMCPY3D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DPeer' in found_functions}}
-
-cdef CUresult cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAsync' in found_functions}}
-
-cdef CUresult cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyPeerAsync' in found_functions}}
-
-cdef CUresult cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoHAsync(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoHAsync(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}
-
-cdef CUresult cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD8_v2' in found_functions}}
-
-cdef CUresult cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD16_v2' in found_functions}}
-
-cdef CUresult cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD32_v2' in found_functions}}
-
-cdef CUresult cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D8_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D16_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D32_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD8Async' in found_functions}}
-
-cdef CUresult cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD16Async' in found_functions}}
-
-cdef CUresult cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD32Async' in found_functions}}
-
-cdef CUresult cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D8Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D16Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemsetD2D32Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayCreate_v2' in found_functions}}
-
-cdef CUresult cuArrayCreate(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}
-
-cdef CUresult cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUarray array, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUmipmappedArray mipmap, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayGetPlane' in found_functions}}
-
-cdef CUresult cuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArrayDestroy' in found_functions}}
-
-cdef CUresult cuArrayDestroy(CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArray3DCreate_v2' in found_functions}}
-
-cdef CUresult cuArray3DCreate(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-
-cdef CUresult cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayCreate' in found_functions}}
-
-cdef CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMipmappedArrayDestroy' in found_functions}}
-
-cdef CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}
-
-cdef CUresult cuMemGetHandleForAddressRange(void* handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAddressReserve' in found_functions}}
-
-cdef CUresult cuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAddressFree' in found_functions}}
-
-cdef CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemCreate' in found_functions}}
-
-cdef CUresult cuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRelease' in found_functions}}
-
-cdef CUresult cuMemRelease(CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemMap' in found_functions}}
-
-cdef CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemMapArrayAsync' in found_functions}}
-
-cdef CUresult cuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemUnmap' in found_functions}}
-
-cdef CUresult cuMemUnmap(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemSetAccess' in found_functions}}
-
-cdef CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAccess' in found_functions}}
-
-cdef CUresult cuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemExportToShareableHandle' in found_functions}}
-
-cdef CUresult cuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemImportFromShareableHandle' in found_functions}}
-
-cdef CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAllocationGranularity' in found_functions}}
-
-cdef CUresult cuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-
-cdef CUresult cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRetainAllocationHandle' in found_functions}}
-
-cdef CUresult cuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemFreeAsync' in found_functions}}
-
-cdef CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocAsync' in found_functions}}
-
-cdef CUresult cuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolTrimTo' in found_functions}}
-
-cdef CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolSetAttribute' in found_functions}}
-
-cdef CUresult cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolGetAttribute' in found_functions}}
-
-cdef CUresult cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolSetAccess' in found_functions}}
-
-cdef CUresult cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolGetAccess' in found_functions}}
-
-cdef CUresult cuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolCreate' in found_functions}}
-
-cdef CUresult cuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolDestroy' in found_functions}}
-
-cdef CUresult cuMemPoolDestroy(CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}
-
-cdef CUresult cuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-
-cdef CUresult cuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef CUresult cuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolExportPointer' in found_functions}}
-
-cdef CUresult cuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPoolImportPointer' in found_functions}}
-
-cdef CUresult cuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastCreate' in found_functions}}
-
-cdef CUresult cuMulticastCreate(CUmemGenericAllocationHandle* mcHandle, const CUmulticastObjectProp* prop) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastAddDevice' in found_functions}}
-
-cdef CUresult cuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastBindMem' in found_functions}}
-
-cdef CUresult cuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastBindAddr' in found_functions}}
-
-cdef CUresult cuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastUnbind' in found_functions}}
-
-cdef CUresult cuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMulticastGetGranularity' in found_functions}}
-
-cdef CUresult cuMulticastGetGranularity(size_t* granularity, const CUmulticastObjectProp* prop, CUmulticastGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerGetAttribute' in found_functions}}
-
-cdef CUresult cuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPrefetchAsync' in found_functions}}
-
-cdef CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}
-
-cdef CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAdvise' in found_functions}}
-
-cdef CUresult cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemAdvise_v2' in found_functions}}
-
-cdef CUresult cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRangeGetAttribute' in found_functions}}
-
-cdef CUresult cuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuMemRangeGetAttributes' in found_functions}}
-
-cdef CUresult cuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerSetAttribute' in found_functions}}
-
-cdef CUresult cuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuPointerGetAttributes' in found_functions}}
-
-cdef CUresult cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCreate' in found_functions}}
-
-cdef CUresult cuStreamCreate(CUstream* phStream, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCreateWithPriority' in found_functions}}
-
-cdef CUresult cuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetPriority' in found_functions}}
-
-cdef CUresult cuStreamGetPriority(CUstream hStream, int* priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetFlags' in found_functions}}
-
-cdef CUresult cuStreamGetFlags(CUstream hStream, unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetId' in found_functions}}
-
-cdef CUresult cuStreamGetId(CUstream hStream, unsigned long long* streamId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCtx' in found_functions}}
-
-cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCtx_v2' in found_functions}}
-
-cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitEvent' in found_functions}}
-
-cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamAddCallback' in found_functions}}
-
-cdef CUresult cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBeginCapture_v2' in found_functions}}
-
-cdef CUresult cuStreamBeginCapture(CUstream hStream, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-
-cdef CUresult cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef CUresult cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamEndCapture' in found_functions}}
-
-cdef CUresult cuStreamEndCapture(CUstream hStream, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamIsCapturing' in found_functions}}
-
-cdef CUresult cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef CUresult cuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef CUresult cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, const CUgraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef CUresult cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode* dependencies, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef CUresult cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamAttachMemAsync' in found_functions}}
-
-cdef CUresult cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamQuery' in found_functions}}
-
-cdef CUresult cuStreamQuery(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamSynchronize' in found_functions}}
-
-cdef CUresult cuStreamSynchronize(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamDestroy_v2' in found_functions}}
-
-cdef CUresult cuStreamDestroy(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamCopyAttributes' in found_functions}}
-
-cdef CUresult cuStreamCopyAttributes(CUstream dst, CUstream src) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetAttribute' in found_functions}}
-
-cdef CUresult cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamSetAttribute' in found_functions}}
-
-cdef CUresult cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventCreate' in found_functions}}
-
-cdef CUresult cuEventCreate(CUevent* phEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventRecord' in found_functions}}
-
-cdef CUresult cuEventRecord(CUevent hEvent, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventRecordWithFlags' in found_functions}}
-
-cdef CUresult cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventQuery' in found_functions}}
-
-cdef CUresult cuEventQuery(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventSynchronize' in found_functions}}
-
-cdef CUresult cuEventSynchronize(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventDestroy_v2' in found_functions}}
-
-cdef CUresult cuEventDestroy(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuEventElapsedTime' in found_functions}}
-
-cdef CUresult cuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuImportExternalMemory' in found_functions}}
-
-cdef CUresult cuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDestroyExternalMemory' in found_functions}}
-
-cdef CUresult cuDestroyExternalMemory(CUexternalMemory extMem) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuImportExternalSemaphore' in found_functions}}
-
-cdef CUresult cuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDestroyExternalSemaphore' in found_functions}}
-
-cdef CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitValue32_v2' in found_functions}}
-
-cdef CUresult cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWaitValue64_v2' in found_functions}}
-
-cdef CUresult cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWriteValue32_v2' in found_functions}}
-
-cdef CUresult cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamWriteValue64_v2' in found_functions}}
-
-cdef CUresult cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}
-
-cdef CUresult cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetAttribute' in found_functions}}
-
-cdef CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetAttribute' in found_functions}}
-
-cdef CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetCacheConfig' in found_functions}}
-
-cdef CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetModule' in found_functions}}
-
-cdef CUresult cuFuncGetModule(CUmodule* hmod, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetName' in found_functions}}
-
-cdef CUresult cuFuncGetName(const char** name, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncGetParamInfo' in found_functions}}
-
-cdef CUresult cuFuncGetParamInfo(CUfunction func, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncIsLoaded' in found_functions}}
-
-cdef CUresult cuFuncIsLoaded(CUfunctionLoadingState* state, CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncLoad' in found_functions}}
-
-cdef CUresult cuFuncLoad(CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchKernel' in found_functions}}
-
-cdef CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchKernelEx' in found_functions}}
-
-cdef CUresult cuLaunchKernelEx(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernel' in found_functions}}
-
-cdef CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-
-cdef CUresult cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchHostFunc' in found_functions}}
-
-cdef CUresult cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetBlockShape' in found_functions}}
-
-cdef CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetSharedSize' in found_functions}}
-
-cdef CUresult cuFuncSetSharedSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetSize' in found_functions}}
-
-cdef CUresult cuParamSetSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSeti' in found_functions}}
-
-cdef CUresult cuParamSeti(CUfunction hfunc, int offset, unsigned int value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetf' in found_functions}}
-
-cdef CUresult cuParamSetf(CUfunction hfunc, int offset, float value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetv' in found_functions}}
-
-cdef CUresult cuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunch' in found_functions}}
-
-cdef CUresult cuLaunch(CUfunction f) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchGrid' in found_functions}}
-
-cdef CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuLaunchGridAsync' in found_functions}}
-
-cdef CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuParamSetTexRef' in found_functions}}
-
-cdef CUresult cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphCreate' in found_functions}}
-
-cdef CUresult cuGraphCreate(CUgraph* phGraph, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}
-
-cdef CUresult cuGraphAddKernelNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemcpyNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemsetNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddHostNode' in found_functions}}
-
-cdef CUresult cuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphHostNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphHostNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddChildGraphNode' in found_functions}}
-
-cdef CUresult cuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef CUresult cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEmptyNode' in found_functions}}
-
-cdef CUresult cuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEventRecordNode' in found_functions}}
-
-cdef CUresult cuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddEventWaitNode' in found_functions}}
-
-cdef CUresult cuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-
-cdef CUresult cuGraphAddBatchMemOpNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemAllocNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemAllocNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddMemFreeNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemFreeNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr* dptr_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGraphMemTrim' in found_functions}}
-
-cdef CUresult cuDeviceGraphMemTrim(CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef CUresult cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphClone' in found_functions}}
-
-cdef CUresult cuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeFindInClone' in found_functions}}
-
-cdef CUresult cuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetType' in found_functions}}
-
-cdef CUresult cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetNodes' in found_functions}}
-
-cdef CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetRootNodes' in found_functions}}
-
-cdef CUresult cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetEdges' in found_functions}}
-
-cdef CUresult cuGraphGetEdges(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphGetEdges_v2' in found_functions}}
-
-cdef CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, CUgraphEdgeData* edgeData, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode* dependencies, CUgraphEdgeData* edgeData, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode* dependentNodes, CUgraphEdgeData* edgeData, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddDependencies' in found_functions}}
-
-cdef CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies' in found_functions}}
-
-cdef CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDestroyNode' in found_functions}}
-
-cdef CUresult cuGraphDestroyNode(CUgraphNode hNode) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}
-
-cdef CUresult cuGraphInstantiate(CUgraphExec* phGraphExec, CUgraph hGraph, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphInstantiateWithParams' in found_functions}}
-
-cdef CUresult cuGraphInstantiateWithParams(CUgraphExec* phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS* instantiateParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecGetFlags' in found_functions}}
-
-cdef CUresult cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t* flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeSetEnabled' in found_functions}}
-
-cdef CUresult cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeGetEnabled' in found_functions}}
-
-cdef CUresult cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int* isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphUpload' in found_functions}}
-
-cdef CUresult cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphLaunch' in found_functions}}
-
-cdef CUresult cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecDestroy' in found_functions}}
-
-cdef CUresult cuGraphExecDestroy(CUgraphExec hGraphExec) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDestroy' in found_functions}}
-
-cdef CUresult cuGraphDestroy(CUgraph hGraph) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecUpdate_v2' in found_functions}}
-
-cdef CUresult cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo* resultInfo) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphDebugDotPrint' in found_functions}}
-
-cdef CUresult cuGraphDebugDotPrint(CUgraph hGraph, const char* path, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectCreate' in found_functions}}
-
-cdef CUresult cuUserObjectCreate(CUuserObject* object_out, void* ptr, CUhostFn destroy, unsigned int initialRefcount, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectRetain' in found_functions}}
-
-cdef CUresult cuUserObjectRetain(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuUserObjectRelease' in found_functions}}
-
-cdef CUresult cuUserObjectRelease(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphRetainUserObject' in found_functions}}
-
-cdef CUresult cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphReleaseUserObject' in found_functions}}
-
-cdef CUresult cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddNode' in found_functions}}
-
-cdef CUresult cuGraphAddNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphAddNode_v2' in found_functions}}
-
-cdef CUresult cuGraphAddNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphExecNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}
-
-cdef CUresult cuGraphConditionalHandleCreate(CUgraphConditionalHandle* pHandle_out, CUgraph hGraph, CUcontext ctx, unsigned int defaultLaunchValue, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef CUresult cuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialClusterSize(int* clusterSize, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveClusters(int* numClusters, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetArray' in found_functions}}
-
-cdef CUresult cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddress_v2' in found_functions}}
-
-cdef CUresult cuTexRefSetAddress(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t numbytes) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-
-cdef CUresult cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFormat' in found_functions}}
-
-cdef CUresult cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetAddressMode' in found_functions}}
-
-cdef CUresult cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-
-cdef CUresult cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetBorderColor' in found_functions}}
-
-cdef CUresult cuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefSetFlags' in found_functions}}
-
-cdef CUresult cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetAddress_v2' in found_functions}}
-
-cdef CUresult cuTexRefGetAddress(CUdeviceptr* pdptr, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetArray' in found_functions}}
-
-cdef CUresult cuTexRefGetArray(CUarray* phArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetAddressMode' in found_functions}}
-
-cdef CUresult cuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFormat' in found_functions}}
-
-cdef CUresult cuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-
-cdef CUresult cuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetBorderColor' in found_functions}}
-
-cdef CUresult cuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefGetFlags' in found_functions}}
-
-cdef CUresult cuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefCreate' in found_functions}}
-
-cdef CUresult cuTexRefCreate(CUtexref* pTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexRefDestroy' in found_functions}}
-
-cdef CUresult cuTexRefDestroy(CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfRefSetArray' in found_functions}}
-
-cdef CUresult cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfRefGetArray' in found_functions}}
-
-cdef CUresult cuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectCreate' in found_functions}}
-
-cdef CUresult cuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectDestroy' in found_functions}}
-
-cdef CUresult cuTexObjectDestroy(CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectCreate' in found_functions}}
-
-cdef CUresult cuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectDestroy' in found_functions}}
-
-cdef CUresult cuSurfObjectDestroy(CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapEncodeTiled' in found_functions}}
-
-cdef CUresult cuTensorMapEncodeTiled(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const cuuint32_t* boxDim, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}
-
-cdef CUresult cuTensorMapEncodeIm2col(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const int* pixelBoxLowerCorner, const int* pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuTensorMapReplaceAddress' in found_functions}}
-
-cdef CUresult cuTensorMapReplaceAddress(CUtensorMap* tensorMap, void* globalAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceCanAccessPeer' in found_functions}}
-
-cdef CUresult cuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxEnablePeerAccess' in found_functions}}
-
-cdef CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxDisablePeerAccess' in found_functions}}
-
-cdef CUresult cuCtxDisablePeerAccess(CUcontext peerContext) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsUnregisterResource' in found_functions}}
-
-cdef CUresult cuGraphicsUnregisterResource(CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef CUresult cuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-
-cdef CUresult cuGraphicsResourceGetMappedPointer(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-
-cdef CUresult cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsMapResources' in found_functions}}
-
-cdef CUresult cuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGraphicsUnmapResources' in found_functions}}
-
-cdef CUresult cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetProcAddress_v2' in found_functions}}
-
-cdef CUresult cuGetProcAddress(const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpGetAttribute' in found_functions}}
-
-cdef CUresult cuCoredumpGetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-
-cdef CUresult cuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpSetAttribute' in found_functions}}
-
-cdef CUresult cuCoredumpSetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-
-cdef CUresult cuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGetExportTable' in found_functions}}
-
-cdef CUresult cuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxCreate' in found_functions}}
-
-cdef CUresult cuGreenCtxCreate(CUgreenCtx* phCtx, CUdevResourceDesc desc, CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxDestroy' in found_functions}}
-
-cdef CUresult cuGreenCtxDestroy(CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxFromGreenCtx' in found_functions}}
-
-cdef CUresult cuCtxFromGreenCtx(CUcontext* pContext, CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDeviceGetDevResource' in found_functions}}
-
-cdef CUresult cuDeviceGetDevResource(CUdevice device, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuCtxGetDevResource' in found_functions}}
-
-cdef CUresult cuCtxGetDevResource(CUcontext hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxGetDevResource' in found_functions}}
-
-cdef CUresult cuGreenCtxGetDevResource(CUgreenCtx hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}
-
-cdef CUresult cuDevSmResourceSplitByCount(CUdevResource* result, unsigned int* nbGroups, const CUdevResource* input, CUdevResource* remaining, unsigned int useFlags, unsigned int minCount) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuDevResourceGenerateDesc' in found_functions}}
-
-cdef CUresult cuDevResourceGenerateDesc(CUdevResourceDesc* phDesc, CUdevResource* resources, unsigned int nbResources) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxRecordEvent' in found_functions}}
-
-cdef CUresult cuGreenCtxRecordEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxWaitEvent' in found_functions}}
-
-cdef CUresult cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuStreamGetGreenCtx' in found_functions}}
-
-cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuGreenCtxStreamCreate' in found_functions}}
-
-cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuProfilerStart' in found_functions}}
-
-cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if 'cuProfilerStop' in found_functions}}
-
-cdef CUresult cuProfilerStop() except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsEGLRegisterImage(CUgraphicsResource* pCudaResource, EGLImageKHR image, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerConnectWithFlags(CUeglStreamConnection* conn, EGLStreamKHR stream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerAcquireFrame(CUeglStreamConnection* conn, CUgraphicsResource* pCudaResource, CUstream* pStream, unsigned int timeout) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerReleaseFrame(CUeglStreamConnection* conn, CUgraphicsResource pCudaResource, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream, EGLint width, EGLint height) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerPresentFrame(CUeglStreamConnection* conn, CUeglFrame eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerReturnFrame(CUeglStreamConnection* conn, CUeglFrame* eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEventCreateFromEGLSync(CUevent* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGLGetDevices(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuVDPAUGetDevice(CUdevice* pDevice, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuVDPAUCtxCreate(CUcontext* pCtx, unsigned int flags, CUdevice device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResource, VdpVideoSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
-{{endif}}
-
-cdef enum: CUDA_VERSION = 12060
-
-cdef enum: CU_IPC_HANDLE_SIZE = 64
-
-cdef enum: CU_STREAM_LEGACY = 1
-
-cdef enum: CU_STREAM_PER_THREAD = 2
-
-cdef enum: CU_COMPUTE_ACCELERATED_TARGET_BASE = 65536
-
-cdef enum: CU_GRAPH_COND_ASSIGN_DEFAULT = 1
-
-cdef enum: CU_GRAPH_KERNEL_NODE_PORT_DEFAULT = 0
-
-cdef enum: CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC = 1
-
-cdef enum: CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER = 2
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION = 4
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_PRIORITY = 8
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13
-
-cdef enum: CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14
-
-cdef enum: CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
-
-cdef enum: CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
-
-cdef enum: CU_STREAM_ATTRIBUTE_PRIORITY = 8
-
-cdef enum: CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9
-
-cdef enum: CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
-
-cdef enum: CU_MEMHOSTALLOC_PORTABLE = 1
-
-cdef enum: CU_MEMHOSTALLOC_DEVICEMAP = 2
-
-cdef enum: CU_MEMHOSTALLOC_WRITECOMBINED = 4
-
-cdef enum: CU_MEMHOSTREGISTER_PORTABLE = 1
-
-cdef enum: CU_MEMHOSTREGISTER_DEVICEMAP = 2
-
-cdef enum: CU_MEMHOSTREGISTER_IOMEMORY = 4
-
-cdef enum: CU_MEMHOSTREGISTER_READ_ONLY = 8
-
-cdef enum: CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = 1
-
-cdef enum: CU_TENSOR_MAP_NUM_QWORDS = 16
-
-cdef enum: CUDA_EXTERNAL_MEMORY_DEDICATED = 1
-
-cdef enum: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC = 1
-
-cdef enum: CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC = 2
-
-cdef enum: CUDA_NVSCISYNC_ATTR_SIGNAL = 1
-
-cdef enum: CUDA_NVSCISYNC_ATTR_WAIT = 2
-
-cdef enum: CU_MEM_CREATE_USAGE_TILE_POOL = 1
-
-cdef enum: CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC = 1
-
-cdef enum: CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC = 2
-
-cdef enum: CUDA_ARRAY3D_LAYERED = 1
-
-cdef enum: CUDA_ARRAY3D_2DARRAY = 1
-
-cdef enum: CUDA_ARRAY3D_SURFACE_LDST = 2
-
-cdef enum: CUDA_ARRAY3D_CUBEMAP = 4
-
-cdef enum: CUDA_ARRAY3D_TEXTURE_GATHER = 8
-
-cdef enum: CUDA_ARRAY3D_DEPTH_TEXTURE = 16
-
-cdef enum: CUDA_ARRAY3D_COLOR_ATTACHMENT = 32
-
-cdef enum: CUDA_ARRAY3D_SPARSE = 64
-
-cdef enum: CUDA_ARRAY3D_DEFERRED_MAPPING = 128
-
-cdef enum: CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = 256
-
-cdef enum: CU_TRSA_OVERRIDE_FORMAT = 1
-
-cdef enum: CU_TRSF_READ_AS_INTEGER = 1
-
-cdef enum: CU_TRSF_NORMALIZED_COORDINATES = 2
-
-cdef enum: CU_TRSF_SRGB = 16
-
-cdef enum: CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = 32
-
-cdef enum: CU_TRSF_SEAMLESS_CUBEMAP = 64
-
-cdef enum: CU_LAUNCH_PARAM_END_AS_INT = 0
-
-cdef enum: CU_LAUNCH_PARAM_END = 0
-
-cdef enum: CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT = 1
-
-cdef enum: CU_LAUNCH_PARAM_BUFFER_POINTER = 1
-
-cdef enum: CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT = 2
-
-cdef enum: CU_LAUNCH_PARAM_BUFFER_SIZE = 2
-
-cdef enum: CU_PARAM_TR_DEFAULT = -1
-
-cdef enum: CU_DEVICE_CPU = -1
-
-cdef enum: CU_DEVICE_INVALID = -2
-
-cdef enum: RESOURCE_ABI_VERSION = 1
-
-cdef enum: RESOURCE_ABI_EXTERNAL_BYTES = 48
-
-cdef enum: MAX_PLANES = 3
-
-cdef enum: CUDA_EGL_INFINITE_TIMEOUT = 4294967295
\ No newline at end of file
diff --git a/cuda_bindings/cuda/bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/cydriver.pyx.in
deleted file mode 100644
index 3809e358..00000000
--- a/cuda_bindings/cuda/bindings/cydriver.pyx.in
+++ /dev/null
@@ -1,2744 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings._bindings.cydriver as cydriver
-
-{{if 'cuGetErrorString' in found_functions}}
-
-cdef CUresult cuGetErrorString(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGetErrorString(error, pStr)
-{{endif}}
-
-{{if 'cuGetErrorName' in found_functions}}
-
-cdef CUresult cuGetErrorName(CUresult error, const char** pStr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGetErrorName(error, pStr)
-{{endif}}
-
-{{if 'cuInit' in found_functions}}
-
-cdef CUresult cuInit(unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuInit(Flags)
-{{endif}}
-
-{{if 'cuDriverGetVersion' in found_functions}}
-
-cdef CUresult cuDriverGetVersion(int* driverVersion) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDriverGetVersion(driverVersion)
-{{endif}}
-
-{{if 'cuDeviceGet' in found_functions}}
-
-cdef CUresult cuDeviceGet(CUdevice* device, int ordinal) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGet(device, ordinal)
-{{endif}}
-
-{{if 'cuDeviceGetCount' in found_functions}}
-
-cdef CUresult cuDeviceGetCount(int* count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetCount(count)
-{{endif}}
-
-{{if 'cuDeviceGetName' in found_functions}}
-
-cdef CUresult cuDeviceGetName(char* name, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetName(name, length, dev)
-{{endif}}
-
-{{if 'cuDeviceGetUuid' in found_functions}}
-
-cdef CUresult cuDeviceGetUuid(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetUuid(uuid, dev)
-{{endif}}
-
-{{if 'cuDeviceGetUuid_v2' in found_functions}}
-
-cdef CUresult cuDeviceGetUuid_v2(CUuuid* uuid, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetUuid_v2(uuid, dev)
-{{endif}}
-
-{{if 'cuDeviceGetLuid' in found_functions}}
-
-cdef CUresult cuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetLuid(luid, deviceNodeMask, dev)
-{{endif}}
-
-{{if 'cuDeviceTotalMem_v2' in found_functions}}
-
-cdef CUresult cuDeviceTotalMem(size_t* numbytes, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceTotalMem_v2(numbytes, dev)
-{{endif}}
-
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format pformat, unsigned numChannels, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetTexture1DLinearMaxWidth(maxWidthInElements, pformat, numChannels, dev)
-{{endif}}
-
-{{if 'cuDeviceGetAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetAttribute(pi, attrib, dev)
-{{endif}}
-
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef CUresult cuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, dev, flags)
-{{endif}}
-
-{{if 'cuDeviceSetMemPool' in found_functions}}
-
-cdef CUresult cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceSetMemPool(dev, pool)
-{{endif}}
-
-{{if 'cuDeviceGetMemPool' in found_functions}}
-
-cdef CUresult cuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetMemPool(pool, dev)
-{{endif}}
-
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-
-cdef CUresult cuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetDefaultMemPool(pool_out, dev)
-{{endif}}
-
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-
-cdef CUresult cuDeviceGetExecAffinitySupport(int* pi, CUexecAffinityType typename, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetExecAffinitySupport(pi, typename, dev)
-{{endif}}
-
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFlushGPUDirectRDMAWrites(target, scope)
-{{endif}}
-
-{{if 'cuDeviceGetProperties' in found_functions}}
-
-cdef CUresult cuDeviceGetProperties(CUdevprop* prop, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetProperties(prop, dev)
-{{endif}}
-
-{{if 'cuDeviceComputeCapability' in found_functions}}
-
-cdef CUresult cuDeviceComputeCapability(int* major, int* minor, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceComputeCapability(major, minor, dev)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevicePrimaryCtxRetain(pctx, dev)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxRelease(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevicePrimaryCtxRelease_v2(dev)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevicePrimaryCtxSetFlags_v2(dev, flags)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevicePrimaryCtxGetState(dev, flags, active)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-
-cdef CUresult cuDevicePrimaryCtxReset(CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevicePrimaryCtxReset_v2(dev)
-{{endif}}
-
-{{if 'cuCtxCreate_v2' in found_functions}}
-
-cdef CUresult cuCtxCreate(CUcontext* pctx, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxCreate_v2(pctx, flags, dev)
-{{endif}}
-
-{{if 'cuCtxCreate_v3' in found_functions}}
-
-cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxCreate_v3(pctx, paramsArray, numParams, flags, dev)
-{{endif}}
-
-{{if 'cuCtxCreate_v4' in found_functions}}
-
-cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxCreate_v4(pctx, ctxCreateParams, flags, dev)
-{{endif}}
-
-{{if 'cuCtxDestroy_v2' in found_functions}}
-
-cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxDestroy_v2(ctx)
-{{endif}}
-
-{{if 'cuCtxPushCurrent_v2' in found_functions}}
-
-cdef CUresult cuCtxPushCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxPushCurrent_v2(ctx)
-{{endif}}
-
-{{if 'cuCtxPopCurrent_v2' in found_functions}}
-
-cdef CUresult cuCtxPopCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxPopCurrent_v2(pctx)
-{{endif}}
-
-{{if 'cuCtxSetCurrent' in found_functions}}
-
-cdef CUresult cuCtxSetCurrent(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSetCurrent(ctx)
-{{endif}}
-
-{{if 'cuCtxGetCurrent' in found_functions}}
-
-cdef CUresult cuCtxGetCurrent(CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetCurrent(pctx)
-{{endif}}
-
-{{if 'cuCtxGetDevice' in found_functions}}
-
-cdef CUresult cuCtxGetDevice(CUdevice* device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetDevice(device)
-{{endif}}
-
-{{if 'cuCtxGetFlags' in found_functions}}
-
-cdef CUresult cuCtxGetFlags(unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetFlags(flags)
-{{endif}}
-
-{{if 'cuCtxSetFlags' in found_functions}}
-
-cdef CUresult cuCtxSetFlags(unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSetFlags(flags)
-{{endif}}
-
-{{if 'cuCtxGetId' in found_functions}}
-
-cdef CUresult cuCtxGetId(CUcontext ctx, unsigned long long* ctxId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetId(ctx, ctxId)
-{{endif}}
-
-{{if 'cuCtxSynchronize' in found_functions}}
-
-cdef CUresult cuCtxSynchronize() except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSynchronize()
-{{endif}}
-
-{{if 'cuCtxSetLimit' in found_functions}}
-
-cdef CUresult cuCtxSetLimit(CUlimit limit, size_t value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSetLimit(limit, value)
-{{endif}}
-
-{{if 'cuCtxGetLimit' in found_functions}}
-
-cdef CUresult cuCtxGetLimit(size_t* pvalue, CUlimit limit) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetLimit(pvalue, limit)
-{{endif}}
-
-{{if 'cuCtxGetCacheConfig' in found_functions}}
-
-cdef CUresult cuCtxGetCacheConfig(CUfunc_cache* pconfig) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetCacheConfig(pconfig)
-{{endif}}
-
-{{if 'cuCtxSetCacheConfig' in found_functions}}
-
-cdef CUresult cuCtxSetCacheConfig(CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSetCacheConfig(config)
-{{endif}}
-
-{{if 'cuCtxGetApiVersion' in found_functions}}
-
-cdef CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int* version) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetApiVersion(ctx, version)
-{{endif}}
-
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-
-cdef CUresult cuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetStreamPriorityRange(leastPriority, greatestPriority)
-{{endif}}
-
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-
-cdef CUresult cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxResetPersistingL2Cache()
-{{endif}}
-
-{{if 'cuCtxGetExecAffinity' in found_functions}}
-
-cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetExecAffinity(pExecAffinity, typename)
-{{endif}}
-
-{{if 'cuCtxRecordEvent' in found_functions}}
-
-cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxRecordEvent(hCtx, hEvent)
-{{endif}}
-
-{{if 'cuCtxWaitEvent' in found_functions}}
-
-cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxWaitEvent(hCtx, hEvent)
-{{endif}}
-
-{{if 'cuCtxAttach' in found_functions}}
-
-cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxAttach(pctx, flags)
-{{endif}}
-
-{{if 'cuCtxDetach' in found_functions}}
-
-cdef CUresult cuCtxDetach(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxDetach(ctx)
-{{endif}}
-
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuCtxGetSharedMemConfig(CUsharedconfig* pConfig) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetSharedMemConfig(pConfig)
-{{endif}}
-
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuCtxSetSharedMemConfig(CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxSetSharedMemConfig(config)
-{{endif}}
-
-{{if 'cuModuleLoad' in found_functions}}
-
-cdef CUresult cuModuleLoad(CUmodule* module, const char* fname) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleLoad(module, fname)
-{{endif}}
-
-{{if 'cuModuleLoadData' in found_functions}}
-
-cdef CUresult cuModuleLoadData(CUmodule* module, const void* image) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleLoadData(module, image)
-{{endif}}
-
-{{if 'cuModuleLoadDataEx' in found_functions}}
-
-cdef CUresult cuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleLoadDataEx(module, image, numOptions, options, optionValues)
-{{endif}}
-
-{{if 'cuModuleLoadFatBinary' in found_functions}}
-
-cdef CUresult cuModuleLoadFatBinary(CUmodule* module, const void* fatCubin) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleLoadFatBinary(module, fatCubin)
-{{endif}}
-
-{{if 'cuModuleUnload' in found_functions}}
-
-cdef CUresult cuModuleUnload(CUmodule hmod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleUnload(hmod)
-{{endif}}
-
-{{if 'cuModuleGetLoadingMode' in found_functions}}
-
-cdef CUresult cuModuleGetLoadingMode(CUmoduleLoadingMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetLoadingMode(mode)
-{{endif}}
-
-{{if 'cuModuleGetFunction' in found_functions}}
-
-cdef CUresult cuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetFunction(hfunc, hmod, name)
-{{endif}}
-
-{{if 'cuModuleGetFunctionCount' in found_functions}}
-
-cdef CUresult cuModuleGetFunctionCount(unsigned int* count, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetFunctionCount(count, mod)
-{{endif}}
-
-{{if 'cuModuleEnumerateFunctions' in found_functions}}
-
-cdef CUresult cuModuleEnumerateFunctions(CUfunction* functions, unsigned int numFunctions, CUmodule mod) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleEnumerateFunctions(functions, numFunctions, mod)
-{{endif}}
-
-{{if 'cuModuleGetGlobal_v2' in found_functions}}
-
-cdef CUresult cuModuleGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetGlobal_v2(dptr, numbytes, hmod, name)
-{{endif}}
-
-{{if 'cuLinkCreate_v2' in found_functions}}
-
-cdef CUresult cuLinkCreate(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLinkCreate_v2(numOptions, options, optionValues, stateOut)
-{{endif}}
-
-{{if 'cuLinkAddData_v2' in found_functions}}
-
-cdef CUresult cuLinkAddData(CUlinkState state, CUjitInputType typename, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLinkAddData_v2(state, typename, data, size, name, numOptions, options, optionValues)
-{{endif}}
-
-{{if 'cuLinkAddFile_v2' in found_functions}}
-
-cdef CUresult cuLinkAddFile(CUlinkState state, CUjitInputType typename, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLinkAddFile_v2(state, typename, path, numOptions, options, optionValues)
-{{endif}}
-
-{{if 'cuLinkComplete' in found_functions}}
-
-cdef CUresult cuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLinkComplete(state, cubinOut, sizeOut)
-{{endif}}
-
-{{if 'cuLinkDestroy' in found_functions}}
-
-cdef CUresult cuLinkDestroy(CUlinkState state) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLinkDestroy(state)
-{{endif}}
-
-{{if 'cuModuleGetTexRef' in found_functions}}
-
-cdef CUresult cuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetTexRef(pTexRef, hmod, name)
-{{endif}}
-
-{{if 'cuModuleGetSurfRef' in found_functions}}
-
-cdef CUresult cuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuModuleGetSurfRef(pSurfRef, hmod, name)
-{{endif}}
-
-{{if 'cuLibraryLoadData' in found_functions}}
-
-cdef CUresult cuLibraryLoadData(CUlibrary* library, const void* code, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryLoadData(library, code, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions)
-{{endif}}
-
-{{if 'cuLibraryLoadFromFile' in found_functions}}
-
-cdef CUresult cuLibraryLoadFromFile(CUlibrary* library, const char* fileName, CUjit_option* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, CUlibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryLoadFromFile(library, fileName, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions)
-{{endif}}
-
-{{if 'cuLibraryUnload' in found_functions}}
-
-cdef CUresult cuLibraryUnload(CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryUnload(library)
-{{endif}}
-
-{{if 'cuLibraryGetKernel' in found_functions}}
-
-cdef CUresult cuLibraryGetKernel(CUkernel* pKernel, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetKernel(pKernel, library, name)
-{{endif}}
-
-{{if 'cuLibraryGetKernelCount' in found_functions}}
-
-cdef CUresult cuLibraryGetKernelCount(unsigned int* count, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetKernelCount(count, lib)
-{{endif}}
-
-{{if 'cuLibraryEnumerateKernels' in found_functions}}
-
-cdef CUresult cuLibraryEnumerateKernels(CUkernel* kernels, unsigned int numKernels, CUlibrary lib) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryEnumerateKernels(kernels, numKernels, lib)
-{{endif}}
-
-{{if 'cuLibraryGetModule' in found_functions}}
-
-cdef CUresult cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetModule(pMod, library)
-{{endif}}
-
-{{if 'cuKernelGetFunction' in found_functions}}
-
-cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelGetFunction(pFunc, kernel)
-{{endif}}
-
-{{if 'cuKernelGetLibrary' in found_functions}}
-
-cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelGetLibrary(pLib, kernel)
-{{endif}}
-
-{{if 'cuLibraryGetGlobal' in found_functions}}
-
-cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetGlobal(dptr, numbytes, library, name)
-{{endif}}
-
-{{if 'cuLibraryGetManaged' in found_functions}}
-
-cdef CUresult cuLibraryGetManaged(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetManaged(dptr, numbytes, library, name)
-{{endif}}
-
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-
-cdef CUresult cuLibraryGetUnifiedFunction(void** fptr, CUlibrary library, const char* symbol) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLibraryGetUnifiedFunction(fptr, library, symbol)
-{{endif}}
-
-{{if 'cuKernelGetAttribute' in found_functions}}
-
-cdef CUresult cuKernelGetAttribute(int* pi, CUfunction_attribute attrib, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelGetAttribute(pi, attrib, kernel, dev)
-{{endif}}
-
-{{if 'cuKernelSetAttribute' in found_functions}}
-
-cdef CUresult cuKernelSetAttribute(CUfunction_attribute attrib, int val, CUkernel kernel, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelSetAttribute(attrib, val, kernel, dev)
-{{endif}}
-
-{{if 'cuKernelSetCacheConfig' in found_functions}}
-
-cdef CUresult cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelSetCacheConfig(kernel, config, dev)
-{{endif}}
-
-{{if 'cuKernelGetName' in found_functions}}
-
-cdef CUresult cuKernelGetName(const char** name, CUkernel hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelGetName(name, hfunc)
-{{endif}}
-
-{{if 'cuKernelGetParamInfo' in found_functions}}
-
-cdef CUresult cuKernelGetParamInfo(CUkernel kernel, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuKernelGetParamInfo(kernel, paramIndex, paramOffset, paramSize)
-{{endif}}
-
-{{if 'cuMemGetInfo_v2' in found_functions}}
-
-cdef CUresult cuMemGetInfo(size_t* free, size_t* total) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetInfo_v2(free, total)
-{{endif}}
-
-{{if 'cuMemAlloc_v2' in found_functions}}
-
-cdef CUresult cuMemAlloc(CUdeviceptr* dptr, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAlloc_v2(dptr, bytesize)
-{{endif}}
-
-{{if 'cuMemAllocPitch_v2' in found_functions}}
-
-cdef CUresult cuMemAllocPitch(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAllocPitch_v2(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes)
-{{endif}}
-
-{{if 'cuMemFree_v2' in found_functions}}
-
-cdef CUresult cuMemFree(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemFree_v2(dptr)
-{{endif}}
-
-{{if 'cuMemGetAddressRange_v2' in found_functions}}
-
-cdef CUresult cuMemGetAddressRange(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetAddressRange_v2(pbase, psize, dptr)
-{{endif}}
-
-{{if 'cuMemAllocHost_v2' in found_functions}}
-
-cdef CUresult cuMemAllocHost(void** pp, size_t bytesize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAllocHost_v2(pp, bytesize)
-{{endif}}
-
-{{if 'cuMemFreeHost' in found_functions}}
-
-cdef CUresult cuMemFreeHost(void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemFreeHost(p)
-{{endif}}
-
-{{if 'cuMemHostAlloc' in found_functions}}
-
-cdef CUresult cuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemHostAlloc(pp, bytesize, Flags)
-{{endif}}
-
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-
-cdef CUresult cuMemHostGetDevicePointer(CUdeviceptr* pdptr, void* p, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemHostGetDevicePointer_v2(pdptr, p, Flags)
-{{endif}}
-
-{{if 'cuMemHostGetFlags' in found_functions}}
-
-cdef CUresult cuMemHostGetFlags(unsigned int* pFlags, void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemHostGetFlags(pFlags, p)
-{{endif}}
-
-{{if 'cuMemAllocManaged' in found_functions}}
-
-cdef CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAllocManaged(dptr, bytesize, flags)
-{{endif}}
-
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef CUresult cuDeviceRegisterAsyncNotification(CUdevice device, CUasyncCallback callbackFunc, void* userData, CUasyncCallbackHandle* callback) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceRegisterAsyncNotification(device, callbackFunc, userData, callback)
-{{endif}}
-
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef CUresult cuDeviceUnregisterAsyncNotification(CUdevice device, CUasyncCallbackHandle callback) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceUnregisterAsyncNotification(device, callback)
-{{endif}}
-
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}
-
-cdef CUresult cuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetByPCIBusId(dev, pciBusId)
-{{endif}}
-
-{{if 'cuDeviceGetPCIBusId' in found_functions}}
-
-cdef CUresult cuDeviceGetPCIBusId(char* pciBusId, int length, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetPCIBusId(pciBusId, length, dev)
-{{endif}}
-
-{{if 'cuIpcGetEventHandle' in found_functions}}
-
-cdef CUresult cuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuIpcGetEventHandle(pHandle, event)
-{{endif}}
-
-{{if 'cuIpcOpenEventHandle' in found_functions}}
-
-cdef CUresult cuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuIpcOpenEventHandle(phEvent, handle)
-{{endif}}
-
-{{if 'cuIpcGetMemHandle' in found_functions}}
-
-cdef CUresult cuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuIpcGetMemHandle(pHandle, dptr)
-{{endif}}
-
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-
-cdef CUresult cuIpcOpenMemHandle(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuIpcOpenMemHandle_v2(pdptr, handle, Flags)
-{{endif}}
-
-{{if 'cuIpcCloseMemHandle' in found_functions}}
-
-cdef CUresult cuIpcCloseMemHandle(CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuIpcCloseMemHandle(dptr)
-{{endif}}
-
-{{if 'cuMemHostRegister_v2' in found_functions}}
-
-cdef CUresult cuMemHostRegister(void* p, size_t bytesize, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemHostRegister_v2(p, bytesize, Flags)
-{{endif}}
-
-{{if 'cuMemHostUnregister' in found_functions}}
-
-cdef CUresult cuMemHostUnregister(void* p) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemHostUnregister(p)
-{{endif}}
-
-{{if 'cuMemcpy' in found_functions}}
-
-cdef CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy(dst, src, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyPeer' in found_functions}}
-
-cdef CUresult cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyHtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoD(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyHtoD_v2(dstDevice, srcHost, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyDtoH_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoH(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyDtoH_v2(dstHost, srcDevice, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyDtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyDtoD_v2(dstDevice, srcDevice, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyDtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyDtoA_v2(dstArray, dstOffset, srcDevice, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyAtoD_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyAtoD_v2(dstDevice, srcArray, srcOffset, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyHtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyHtoA_v2(dstArray, dstOffset, srcHost, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyAtoH_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoH(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyAtoH_v2(dstHost, srcArray, srcOffset, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpyAtoA_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyAtoA_v2(dstArray, dstOffset, srcArray, srcOffset, ByteCount)
-{{endif}}
-
-{{if 'cuMemcpy2D_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2D(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy2D_v2(pCopy)
-{{endif}}
-
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2DUnaligned(const CUDA_MEMCPY2D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy2DUnaligned_v2(pCopy)
-{{endif}}
-
-{{if 'cuMemcpy3D_v2' in found_functions}}
-
-cdef CUresult cuMemcpy3D(const CUDA_MEMCPY3D* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy3D_v2(pCopy)
-{{endif}}
-
-{{if 'cuMemcpy3DPeer' in found_functions}}
-
-cdef CUresult cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy3DPeer(pCopy)
-{{endif}}
-
-{{if 'cuMemcpyAsync' in found_functions}}
-
-cdef CUresult cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyAsync(dst, src, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyPeerAsync' in found_functions}}
-
-cdef CUresult cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyHtoDAsync_v2(dstDevice, srcHost, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoHAsync(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyDtoHAsync_v2(dstHost, srcDevice, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyDtoDAsync_v2(dstDevice, srcDevice, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyHtoAAsync_v2(dstArray, dstOffset, srcHost, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpyAtoHAsync(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpyAtoHAsync_v2(dstHost, srcArray, srcOffset, ByteCount, hStream)
-{{endif}}
-
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpy2DAsync(const CUDA_MEMCPY2D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy2DAsync_v2(pCopy, hStream)
-{{endif}}
-
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}
-
-cdef CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy3DAsync_v2(pCopy, hStream)
-{{endif}}
-
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}
-
-cdef CUresult cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemcpy3DPeerAsync(pCopy, hStream)
-{{endif}}
-
-{{if 'cuMemsetD8_v2' in found_functions}}
-
-cdef CUresult cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD8_v2(dstDevice, uc, N)
-{{endif}}
-
-{{if 'cuMemsetD16_v2' in found_functions}}
-
-cdef CUresult cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD16_v2(dstDevice, us, N)
-{{endif}}
-
-{{if 'cuMemsetD32_v2' in found_functions}}
-
-cdef CUresult cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD32_v2(dstDevice, ui, N)
-{{endif}}
-
-{{if 'cuMemsetD2D8_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D8_v2(dstDevice, dstPitch, uc, Width, Height)
-{{endif}}
-
-{{if 'cuMemsetD2D16_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D16_v2(dstDevice, dstPitch, us, Width, Height)
-{{endif}}
-
-{{if 'cuMemsetD2D32_v2' in found_functions}}
-
-cdef CUresult cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D32_v2(dstDevice, dstPitch, ui, Width, Height)
-{{endif}}
-
-{{if 'cuMemsetD8Async' in found_functions}}
-
-cdef CUresult cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD8Async(dstDevice, uc, N, hStream)
-{{endif}}
-
-{{if 'cuMemsetD16Async' in found_functions}}
-
-cdef CUresult cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD16Async(dstDevice, us, N, hStream)
-{{endif}}
-
-{{if 'cuMemsetD32Async' in found_functions}}
-
-cdef CUresult cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD32Async(dstDevice, ui, N, hStream)
-{{endif}}
-
-{{if 'cuMemsetD2D8Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D8Async(dstDevice, dstPitch, uc, Width, Height, hStream)
-{{endif}}
-
-{{if 'cuMemsetD2D16Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D16Async(dstDevice, dstPitch, us, Width, Height, hStream)
-{{endif}}
-
-{{if 'cuMemsetD2D32Async' in found_functions}}
-
-cdef CUresult cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemsetD2D32Async(dstDevice, dstPitch, ui, Width, Height, hStream)
-{{endif}}
-
-{{if 'cuArrayCreate_v2' in found_functions}}
-
-cdef CUresult cuArrayCreate(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayCreate_v2(pHandle, pAllocateArray)
-{{endif}}
-
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}
-
-cdef CUresult cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayGetDescriptor_v2(pArrayDescriptor, hArray)
-{{endif}}
-
-{{if 'cuArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayGetSparseProperties(sparseProperties, array)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMipmappedArrayGetSparseProperties(sparseProperties, mipmap)
-{{endif}}
-
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUarray array, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayGetMemoryRequirements(memoryRequirements, array, device)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS* memoryRequirements, CUmipmappedArray mipmap, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMipmappedArrayGetMemoryRequirements(memoryRequirements, mipmap, device)
-{{endif}}
-
-{{if 'cuArrayGetPlane' in found_functions}}
-
-cdef CUresult cuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayGetPlane(pPlaneArray, hArray, planeIdx)
-{{endif}}
-
-{{if 'cuArrayDestroy' in found_functions}}
-
-cdef CUresult cuArrayDestroy(CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArrayDestroy(hArray)
-{{endif}}
-
-{{if 'cuArray3DCreate_v2' in found_functions}}
-
-cdef CUresult cuArray3DCreate(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArray3DCreate_v2(pHandle, pAllocateArray)
-{{endif}}
-
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-
-cdef CUresult cuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuArray3DGetDescriptor_v2(pArrayDescriptor, hArray)
-{{endif}}
-
-{{if 'cuMipmappedArrayCreate' in found_functions}}
-
-cdef CUresult cuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMipmappedArrayCreate(pHandle, pMipmappedArrayDesc, numMipmapLevels)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}
-
-cdef CUresult cuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMipmappedArrayGetLevel(pLevelArray, hMipmappedArray, level)
-{{endif}}
-
-{{if 'cuMipmappedArrayDestroy' in found_functions}}
-
-cdef CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMipmappedArrayDestroy(hMipmappedArray)
-{{endif}}
-
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}
-
-cdef CUresult cuMemGetHandleForAddressRange(void* handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetHandleForAddressRange(handle, dptr, size, handleType, flags)
-{{endif}}
-
-{{if 'cuMemAddressReserve' in found_functions}}
-
-cdef CUresult cuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAddressReserve(ptr, size, alignment, addr, flags)
-{{endif}}
-
-{{if 'cuMemAddressFree' in found_functions}}
-
-cdef CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAddressFree(ptr, size)
-{{endif}}
-
-{{if 'cuMemCreate' in found_functions}}
-
-cdef CUresult cuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemCreate(handle, size, prop, flags)
-{{endif}}
-
-{{if 'cuMemRelease' in found_functions}}
-
-cdef CUresult cuMemRelease(CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemRelease(handle)
-{{endif}}
-
-{{if 'cuMemMap' in found_functions}}
-
-cdef CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemMap(ptr, size, offset, handle, flags)
-{{endif}}
-
-{{if 'cuMemMapArrayAsync' in found_functions}}
-
-cdef CUresult cuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemMapArrayAsync(mapInfoList, count, hStream)
-{{endif}}
-
-{{if 'cuMemUnmap' in found_functions}}
-
-cdef CUresult cuMemUnmap(CUdeviceptr ptr, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemUnmap(ptr, size)
-{{endif}}
-
-{{if 'cuMemSetAccess' in found_functions}}
-
-cdef CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemSetAccess(ptr, size, desc, count)
-{{endif}}
-
-{{if 'cuMemGetAccess' in found_functions}}
-
-cdef CUresult cuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetAccess(flags, location, ptr)
-{{endif}}
-
-{{if 'cuMemExportToShareableHandle' in found_functions}}
-
-cdef CUresult cuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemExportToShareableHandle(shareableHandle, handle, handleType, flags)
-{{endif}}
-
-{{if 'cuMemImportFromShareableHandle' in found_functions}}
-
-cdef CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemImportFromShareableHandle(handle, osHandle, shHandleType)
-{{endif}}
-
-{{if 'cuMemGetAllocationGranularity' in found_functions}}
-
-cdef CUresult cuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetAllocationGranularity(granularity, prop, option)
-{{endif}}
-
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-
-cdef CUresult cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemGetAllocationPropertiesFromHandle(prop, handle)
-{{endif}}
-
-{{if 'cuMemRetainAllocationHandle' in found_functions}}
-
-cdef CUresult cuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemRetainAllocationHandle(handle, addr)
-{{endif}}
-
-{{if 'cuMemFreeAsync' in found_functions}}
-
-cdef CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemFreeAsync(dptr, hStream)
-{{endif}}
-
-{{if 'cuMemAllocAsync' in found_functions}}
-
-cdef CUresult cuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAllocAsync(dptr, bytesize, hStream)
-{{endif}}
-
-{{if 'cuMemPoolTrimTo' in found_functions}}
-
-cdef CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolTrimTo(pool, minBytesToKeep)
-{{endif}}
-
-{{if 'cuMemPoolSetAttribute' in found_functions}}
-
-cdef CUresult cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolSetAttribute(pool, attr, value)
-{{endif}}
-
-{{if 'cuMemPoolGetAttribute' in found_functions}}
-
-cdef CUresult cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolGetAttribute(pool, attr, value)
-{{endif}}
-
-{{if 'cuMemPoolSetAccess' in found_functions}}
-
-cdef CUresult cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolSetAccess(pool, map, count)
-{{endif}}
-
-{{if 'cuMemPoolGetAccess' in found_functions}}
-
-cdef CUresult cuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolGetAccess(flags, memPool, location)
-{{endif}}
-
-{{if 'cuMemPoolCreate' in found_functions}}
-
-cdef CUresult cuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolCreate(pool, poolProps)
-{{endif}}
-
-{{if 'cuMemPoolDestroy' in found_functions}}
-
-cdef CUresult cuMemPoolDestroy(CUmemoryPool pool) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolDestroy(pool)
-{{endif}}
-
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}
-
-cdef CUresult cuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAllocFromPoolAsync(dptr, bytesize, pool, hStream)
-{{endif}}
-
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-
-cdef CUresult cuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolExportToShareableHandle(handle_out, pool, handleType, flags)
-{{endif}}
-
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef CUresult cuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolImportFromShareableHandle(pool_out, handle, handleType, flags)
-{{endif}}
-
-{{if 'cuMemPoolExportPointer' in found_functions}}
-
-cdef CUresult cuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolExportPointer(shareData_out, ptr)
-{{endif}}
-
-{{if 'cuMemPoolImportPointer' in found_functions}}
-
-cdef CUresult cuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPoolImportPointer(ptr_out, pool, shareData)
-{{endif}}
-
-{{if 'cuMulticastCreate' in found_functions}}
-
-cdef CUresult cuMulticastCreate(CUmemGenericAllocationHandle* mcHandle, const CUmulticastObjectProp* prop) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastCreate(mcHandle, prop)
-{{endif}}
-
-{{if 'cuMulticastAddDevice' in found_functions}}
-
-cdef CUresult cuMulticastAddDevice(CUmemGenericAllocationHandle mcHandle, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastAddDevice(mcHandle, dev)
-{{endif}}
-
-{{if 'cuMulticastBindMem' in found_functions}}
-
-cdef CUresult cuMulticastBindMem(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUmemGenericAllocationHandle memHandle, size_t memOffset, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastBindMem(mcHandle, mcOffset, memHandle, memOffset, size, flags)
-{{endif}}
-
-{{if 'cuMulticastBindAddr' in found_functions}}
-
-cdef CUresult cuMulticastBindAddr(CUmemGenericAllocationHandle mcHandle, size_t mcOffset, CUdeviceptr memptr, size_t size, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastBindAddr(mcHandle, mcOffset, memptr, size, flags)
-{{endif}}
-
-{{if 'cuMulticastUnbind' in found_functions}}
-
-cdef CUresult cuMulticastUnbind(CUmemGenericAllocationHandle mcHandle, CUdevice dev, size_t mcOffset, size_t size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastUnbind(mcHandle, dev, mcOffset, size)
-{{endif}}
-
-{{if 'cuMulticastGetGranularity' in found_functions}}
-
-cdef CUresult cuMulticastGetGranularity(size_t* granularity, const CUmulticastObjectProp* prop, CUmulticastGranularity_flags option) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMulticastGetGranularity(granularity, prop, option)
-{{endif}}
-
-{{if 'cuPointerGetAttribute' in found_functions}}
-
-cdef CUresult cuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuPointerGetAttribute(data, attribute, ptr)
-{{endif}}
-
-{{if 'cuMemPrefetchAsync' in found_functions}}
-
-cdef CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPrefetchAsync(devPtr, count, dstDevice, hStream)
-{{endif}}
-
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}
-
-cdef CUresult cuMemPrefetchAsync_v2(CUdeviceptr devPtr, size_t count, CUmemLocation location, unsigned int flags, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemPrefetchAsync_v2(devPtr, count, location, flags, hStream)
-{{endif}}
-
-{{if 'cuMemAdvise' in found_functions}}
-
-cdef CUresult cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAdvise(devPtr, count, advice, device)
-{{endif}}
-
-{{if 'cuMemAdvise_v2' in found_functions}}
-
-cdef CUresult cuMemAdvise_v2(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUmemLocation location) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemAdvise_v2(devPtr, count, advice, location)
-{{endif}}
-
-{{if 'cuMemRangeGetAttribute' in found_functions}}
-
-cdef CUresult cuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemRangeGetAttribute(data, dataSize, attribute, devPtr, count)
-{{endif}}
-
-{{if 'cuMemRangeGetAttributes' in found_functions}}
-
-cdef CUresult cuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuMemRangeGetAttributes(data, dataSizes, attributes, numAttributes, devPtr, count)
-{{endif}}
-
-{{if 'cuPointerSetAttribute' in found_functions}}
-
-cdef CUresult cuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuPointerSetAttribute(value, attribute, ptr)
-{{endif}}
-
-{{if 'cuPointerGetAttributes' in found_functions}}
-
-cdef CUresult cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuPointerGetAttributes(numAttributes, attributes, data, ptr)
-{{endif}}
-
-{{if 'cuStreamCreate' in found_functions}}
-
-cdef CUresult cuStreamCreate(CUstream* phStream, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamCreate(phStream, Flags)
-{{endif}}
-
-{{if 'cuStreamCreateWithPriority' in found_functions}}
-
-cdef CUresult cuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamCreateWithPriority(phStream, flags, priority)
-{{endif}}
-
-{{if 'cuStreamGetPriority' in found_functions}}
-
-cdef CUresult cuStreamGetPriority(CUstream hStream, int* priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetPriority(hStream, priority)
-{{endif}}
-
-{{if 'cuStreamGetFlags' in found_functions}}
-
-cdef CUresult cuStreamGetFlags(CUstream hStream, unsigned int* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetFlags(hStream, flags)
-{{endif}}
-
-{{if 'cuStreamGetId' in found_functions}}
-
-cdef CUresult cuStreamGetId(CUstream hStream, unsigned long long* streamId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetId(hStream, streamId)
-{{endif}}
-
-{{if 'cuStreamGetCtx' in found_functions}}
-
-cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetCtx(hStream, pctx)
-{{endif}}
-
-{{if 'cuStreamGetCtx_v2' in found_functions}}
-
-cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetCtx_v2(hStream, pCtx, pGreenCtx)
-{{endif}}
-
-{{if 'cuStreamWaitEvent' in found_functions}}
-
-cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamWaitEvent(hStream, hEvent, Flags)
-{{endif}}
-
-{{if 'cuStreamAddCallback' in found_functions}}
-
-cdef CUresult cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamAddCallback(hStream, callback, userData, flags)
-{{endif}}
-
-{{if 'cuStreamBeginCapture_v2' in found_functions}}
-
-cdef CUresult cuStreamBeginCapture(CUstream hStream, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamBeginCapture_v2(hStream, mode)
-{{endif}}
-
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-
-cdef CUresult cuStreamBeginCaptureToGraph(CUstream hStream, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUstreamCaptureMode mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies, dependencyData, numDependencies, mode)
-{{endif}}
-
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef CUresult cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuThreadExchangeStreamCaptureMode(mode)
-{{endif}}
-
-{{if 'cuStreamEndCapture' in found_functions}}
-
-cdef CUresult cuStreamEndCapture(CUstream hStream, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamEndCapture(hStream, phGraph)
-{{endif}}
-
-{{if 'cuStreamIsCapturing' in found_functions}}
-
-cdef CUresult cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamIsCapturing(hStream, captureStatus)
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef CUresult cuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetCaptureInfo_v2(hStream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef CUresult cuStreamGetCaptureInfo_v3(CUstream hStream, CUstreamCaptureStatus* captureStatus_out, cuuint64_t* id_out, CUgraph* graph_out, const CUgraphNode** dependencies_out, const CUgraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetCaptureInfo_v3(hStream, captureStatus_out, id_out, graph_out, dependencies_out, edgeData_out, numDependencies_out)
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef CUresult cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode* dependencies, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamUpdateCaptureDependencies(hStream, dependencies, numDependencies, flags)
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef CUresult cuStreamUpdateCaptureDependencies_v2(CUstream hStream, CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamUpdateCaptureDependencies_v2(hStream, dependencies, dependencyData, numDependencies, flags)
-{{endif}}
-
-{{if 'cuStreamAttachMemAsync' in found_functions}}
-
-cdef CUresult cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamAttachMemAsync(hStream, dptr, length, flags)
-{{endif}}
-
-{{if 'cuStreamQuery' in found_functions}}
-
-cdef CUresult cuStreamQuery(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamQuery(hStream)
-{{endif}}
-
-{{if 'cuStreamSynchronize' in found_functions}}
-
-cdef CUresult cuStreamSynchronize(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamSynchronize(hStream)
-{{endif}}
-
-{{if 'cuStreamDestroy_v2' in found_functions}}
-
-cdef CUresult cuStreamDestroy(CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamDestroy_v2(hStream)
-{{endif}}
-
-{{if 'cuStreamCopyAttributes' in found_functions}}
-
-cdef CUresult cuStreamCopyAttributes(CUstream dst, CUstream src) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamCopyAttributes(dst, src)
-{{endif}}
-
-{{if 'cuStreamGetAttribute' in found_functions}}
-
-cdef CUresult cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetAttribute(hStream, attr, value_out)
-{{endif}}
-
-{{if 'cuStreamSetAttribute' in found_functions}}
-
-cdef CUresult cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamSetAttribute(hStream, attr, value)
-{{endif}}
-
-{{if 'cuEventCreate' in found_functions}}
-
-cdef CUresult cuEventCreate(CUevent* phEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventCreate(phEvent, Flags)
-{{endif}}
-
-{{if 'cuEventRecord' in found_functions}}
-
-cdef CUresult cuEventRecord(CUevent hEvent, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventRecord(hEvent, hStream)
-{{endif}}
-
-{{if 'cuEventRecordWithFlags' in found_functions}}
-
-cdef CUresult cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventRecordWithFlags(hEvent, hStream, flags)
-{{endif}}
-
-{{if 'cuEventQuery' in found_functions}}
-
-cdef CUresult cuEventQuery(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventQuery(hEvent)
-{{endif}}
-
-{{if 'cuEventSynchronize' in found_functions}}
-
-cdef CUresult cuEventSynchronize(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventSynchronize(hEvent)
-{{endif}}
-
-{{if 'cuEventDestroy_v2' in found_functions}}
-
-cdef CUresult cuEventDestroy(CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventDestroy_v2(hEvent)
-{{endif}}
-
-{{if 'cuEventElapsedTime' in found_functions}}
-
-cdef CUresult cuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventElapsedTime(pMilliseconds, hStart, hEnd)
-{{endif}}
-
-{{if 'cuImportExternalMemory' in found_functions}}
-
-cdef CUresult cuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuImportExternalMemory(extMem_out, memHandleDesc)
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuExternalMemoryGetMappedBuffer(devPtr, extMem, bufferDesc)
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuExternalMemoryGetMappedMipmappedArray(mipmap, extMem, mipmapDesc)
-{{endif}}
-
-{{if 'cuDestroyExternalMemory' in found_functions}}
-
-cdef CUresult cuDestroyExternalMemory(CUexternalMemory extMem) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDestroyExternalMemory(extMem)
-{{endif}}
-
-{{if 'cuImportExternalSemaphore' in found_functions}}
-
-cdef CUresult cuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuImportExternalSemaphore(extSem_out, semHandleDesc)
-{{endif}}
-
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSignalExternalSemaphoresAsync(extSemArray, paramsArray, numExtSems, stream)
-{{endif}}
-
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-
-cdef CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuWaitExternalSemaphoresAsync(extSemArray, paramsArray, numExtSems, stream)
-{{endif}}
-
-{{if 'cuDestroyExternalSemaphore' in found_functions}}
-
-cdef CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDestroyExternalSemaphore(extSem)
-{{endif}}
-
-{{if 'cuStreamWaitValue32_v2' in found_functions}}
-
-cdef CUresult cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamWaitValue32_v2(stream, addr, value, flags)
-{{endif}}
-
-{{if 'cuStreamWaitValue64_v2' in found_functions}}
-
-cdef CUresult cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamWaitValue64_v2(stream, addr, value, flags)
-{{endif}}
-
-{{if 'cuStreamWriteValue32_v2' in found_functions}}
-
-cdef CUresult cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamWriteValue32_v2(stream, addr, value, flags)
-{{endif}}
-
-{{if 'cuStreamWriteValue64_v2' in found_functions}}
-
-cdef CUresult cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamWriteValue64_v2(stream, addr, value, flags)
-{{endif}}
-
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}
-
-cdef CUresult cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamBatchMemOp_v2(stream, count, paramArray, flags)
-{{endif}}
-
-{{if 'cuFuncGetAttribute' in found_functions}}
-
-cdef CUresult cuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncGetAttribute(pi, attrib, hfunc)
-{{endif}}
-
-{{if 'cuFuncSetAttribute' in found_functions}}
-
-cdef CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncSetAttribute(hfunc, attrib, value)
-{{endif}}
-
-{{if 'cuFuncSetCacheConfig' in found_functions}}
-
-cdef CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncSetCacheConfig(hfunc, config)
-{{endif}}
-
-{{if 'cuFuncGetModule' in found_functions}}
-
-cdef CUresult cuFuncGetModule(CUmodule* hmod, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncGetModule(hmod, hfunc)
-{{endif}}
-
-{{if 'cuFuncGetName' in found_functions}}
-
-cdef CUresult cuFuncGetName(const char** name, CUfunction hfunc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncGetName(name, hfunc)
-{{endif}}
-
-{{if 'cuFuncGetParamInfo' in found_functions}}
-
-cdef CUresult cuFuncGetParamInfo(CUfunction func, size_t paramIndex, size_t* paramOffset, size_t* paramSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncGetParamInfo(func, paramIndex, paramOffset, paramSize)
-{{endif}}
-
-{{if 'cuFuncIsLoaded' in found_functions}}
-
-cdef CUresult cuFuncIsLoaded(CUfunctionLoadingState* state, CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncIsLoaded(state, function)
-{{endif}}
-
-{{if 'cuFuncLoad' in found_functions}}
-
-cdef CUresult cuFuncLoad(CUfunction function) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncLoad(function)
-{{endif}}
-
-{{if 'cuLaunchKernel' in found_functions}}
-
-cdef CUresult cuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra)
-{{endif}}
-
-{{if 'cuLaunchKernelEx' in found_functions}}
-
-cdef CUresult cuLaunchKernelEx(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchKernelEx(config, f, kernelParams, extra)
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernel' in found_functions}}
-
-cdef CUresult cuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams)
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-
-cdef CUresult cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)
-{{endif}}
-
-{{if 'cuLaunchHostFunc' in found_functions}}
-
-cdef CUresult cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchHostFunc(hStream, fn, userData)
-{{endif}}
-
-{{if 'cuFuncSetBlockShape' in found_functions}}
-
-cdef CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncSetBlockShape(hfunc, x, y, z)
-{{endif}}
-
-{{if 'cuFuncSetSharedSize' in found_functions}}
-
-cdef CUresult cuFuncSetSharedSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncSetSharedSize(hfunc, numbytes)
-{{endif}}
-
-{{if 'cuParamSetSize' in found_functions}}
-
-cdef CUresult cuParamSetSize(CUfunction hfunc, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuParamSetSize(hfunc, numbytes)
-{{endif}}
-
-{{if 'cuParamSeti' in found_functions}}
-
-cdef CUresult cuParamSeti(CUfunction hfunc, int offset, unsigned int value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuParamSeti(hfunc, offset, value)
-{{endif}}
-
-{{if 'cuParamSetf' in found_functions}}
-
-cdef CUresult cuParamSetf(CUfunction hfunc, int offset, float value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuParamSetf(hfunc, offset, value)
-{{endif}}
-
-{{if 'cuParamSetv' in found_functions}}
-
-cdef CUresult cuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuParamSetv(hfunc, offset, ptr, numbytes)
-{{endif}}
-
-{{if 'cuLaunch' in found_functions}}
-
-cdef CUresult cuLaunch(CUfunction f) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunch(f)
-{{endif}}
-
-{{if 'cuLaunchGrid' in found_functions}}
-
-cdef CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchGrid(f, grid_width, grid_height)
-{{endif}}
-
-{{if 'cuLaunchGridAsync' in found_functions}}
-
-cdef CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuLaunchGridAsync(f, grid_width, grid_height, hStream)
-{{endif}}
-
-{{if 'cuParamSetTexRef' in found_functions}}
-
-cdef CUresult cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuParamSetTexRef(hfunc, texunit, hTexRef)
-{{endif}}
-
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}
-
-cdef CUresult cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuFuncSetSharedMemConfig(hfunc, config)
-{{endif}}
-
-{{if 'cuGraphCreate' in found_functions}}
-
-cdef CUresult cuGraphCreate(CUgraph* phGraph, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphCreate(phGraph, flags)
-{{endif}}
-
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}
-
-cdef CUresult cuGraphAddKernelNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddKernelNode_v2(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphKernelNodeGetParams_v2(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphKernelNodeSetParams_v2(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddMemcpyNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddMemcpyNode(phGraphNode, hGraph, dependencies, numDependencies, copyParams, ctx)
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemcpyNodeGetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemcpyNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddMemsetNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddMemsetNode(phGraphNode, hGraph, dependencies, numDependencies, memsetParams, ctx)
-{{endif}}
-
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemsetNodeGetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemsetNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddHostNode' in found_functions}}
-
-cdef CUresult cuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddHostNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphHostNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphHostNodeGetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphHostNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphHostNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddChildGraphNode' in found_functions}}
-
-cdef CUresult cuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddChildGraphNode(phGraphNode, hGraph, dependencies, numDependencies, childGraph)
-{{endif}}
-
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef CUresult cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphChildGraphNodeGetGraph(hNode, phGraph)
-{{endif}}
-
-{{if 'cuGraphAddEmptyNode' in found_functions}}
-
-cdef CUresult cuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddEmptyNode(phGraphNode, hGraph, dependencies, numDependencies)
-{{endif}}
-
-{{if 'cuGraphAddEventRecordNode' in found_functions}}
-
-cdef CUresult cuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddEventRecordNode(phGraphNode, hGraph, dependencies, numDependencies, event)
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphEventRecordNodeGetEvent(hNode, event_out)
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphEventRecordNodeSetEvent(hNode, event)
-{{endif}}
-
-{{if 'cuGraphAddEventWaitNode' in found_functions}}
-
-cdef CUresult cuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddEventWaitNode(phGraphNode, hGraph, dependencies, numDependencies, event)
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphEventWaitNodeGetEvent(hNode, event_out)
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphEventWaitNodeSetEvent(hNode, event)
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddExternalSemaphoresSignalNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExternalSemaphoresSignalNodeGetParams(hNode, params_out)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddExternalSemaphoresWaitNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExternalSemaphoresWaitNodeGetParams(hNode, params_out)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-
-cdef CUresult cuGraphAddBatchMemOpNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddBatchMemOpNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphBatchMemOpNodeGetParams(hNode, nodeParams_out)
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphBatchMemOpNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddMemAllocNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemAllocNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddMemAllocNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS* params_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemAllocNodeGetParams(hNode, params_out)
-{{endif}}
-
-{{if 'cuGraphAddMemFreeNode' in found_functions}}
-
-cdef CUresult cuGraphAddMemFreeNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUdeviceptr dptr) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddMemFreeNode(phGraphNode, hGraph, dependencies, numDependencies, dptr)
-{{endif}}
-
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef CUresult cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr* dptr_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphMemFreeNodeGetParams(hNode, dptr_out)
-{{endif}}
-
-{{if 'cuDeviceGraphMemTrim' in found_functions}}
-
-cdef CUresult cuDeviceGraphMemTrim(CUdevice device) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGraphMemTrim(device)
-{{endif}}
-
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetGraphMemAttribute(device, attr, value)
-{{endif}}
-
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef CUresult cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceSetGraphMemAttribute(device, attr, value)
-{{endif}}
-
-{{if 'cuGraphClone' in found_functions}}
-
-cdef CUresult cuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphClone(phGraphClone, originalGraph)
-{{endif}}
-
-{{if 'cuGraphNodeFindInClone' in found_functions}}
-
-cdef CUresult cuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeFindInClone(phNode, hOriginalNode, hClonedGraph)
-{{endif}}
-
-{{if 'cuGraphNodeGetType' in found_functions}}
-
-cdef CUresult cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetType(hNode, typename)
-{{endif}}
-
-{{if 'cuGraphGetNodes' in found_functions}}
-
-cdef CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphGetNodes(hGraph, nodes, numNodes)
-{{endif}}
-
-{{if 'cuGraphGetRootNodes' in found_functions}}
-
-cdef CUresult cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphGetRootNodes(hGraph, rootNodes, numRootNodes)
-{{endif}}
-
-{{if 'cuGraphGetEdges' in found_functions}}
-
-cdef CUresult cuGraphGetEdges(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphGetEdges(hGraph, from_, to, numEdges)
-{{endif}}
-
-{{if 'cuGraphGetEdges_v2' in found_functions}}
-
-cdef CUresult cuGraphGetEdges_v2(CUgraph hGraph, CUgraphNode* from_, CUgraphNode* to, CUgraphEdgeData* edgeData, size_t* numEdges) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphGetEdges_v2(hGraph, from_, to, edgeData, numEdges)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetDependencies(hNode, dependencies, numDependencies)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependencies_v2(CUgraphNode hNode, CUgraphNode* dependencies, CUgraphEdgeData* edgeData, size_t* numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetDependencies_v2(hNode, dependencies, edgeData, numDependencies)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetDependentNodes(hNode, dependentNodes, numDependentNodes)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef CUresult cuGraphNodeGetDependentNodes_v2(CUgraphNode hNode, CUgraphNode* dependentNodes, CUgraphEdgeData* edgeData, size_t* numDependentNodes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetDependentNodes_v2(hNode, dependentNodes, edgeData, numDependentNodes)
-{{endif}}
-
-{{if 'cuGraphAddDependencies' in found_functions}}
-
-cdef CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddDependencies(hGraph, from_, to, numDependencies)
-{{endif}}
-
-{{if 'cuGraphAddDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphAddDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddDependencies_v2(hGraph, from_, to, edgeData, numDependencies)
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies' in found_functions}}
-
-cdef CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphRemoveDependencies(hGraph, from_, to, numDependencies)
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-
-cdef CUresult cuGraphRemoveDependencies_v2(CUgraph hGraph, const CUgraphNode* from_, const CUgraphNode* to, const CUgraphEdgeData* edgeData, size_t numDependencies) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphRemoveDependencies_v2(hGraph, from_, to, edgeData, numDependencies)
-{{endif}}
-
-{{if 'cuGraphDestroyNode' in found_functions}}
-
-cdef CUresult cuGraphDestroyNode(CUgraphNode hNode) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphDestroyNode(hNode)
-{{endif}}
-
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}
-
-cdef CUresult cuGraphInstantiate(CUgraphExec* phGraphExec, CUgraph hGraph, unsigned long long flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphInstantiateWithFlags(phGraphExec, hGraph, flags)
-{{endif}}
-
-{{if 'cuGraphInstantiateWithParams' in found_functions}}
-
-cdef CUresult cuGraphInstantiateWithParams(CUgraphExec* phGraphExec, CUgraph hGraph, CUDA_GRAPH_INSTANTIATE_PARAMS* instantiateParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphInstantiateWithParams(phGraphExec, hGraph, instantiateParams)
-{{endif}}
-
-{{if 'cuGraphExecGetFlags' in found_functions}}
-
-cdef CUresult cuGraphExecGetFlags(CUgraphExec hGraphExec, cuuint64_t* flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecGetFlags(hGraphExec, flags)
-{{endif}}
-
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-
-cdef CUresult cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecKernelNodeSetParams_v2(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams, ctx)
-{{endif}}
-
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams, ctx)
-{{endif}}
-
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecChildGraphNodeSetParams(hGraphExec, hNode, childGraph)
-{{endif}}
-
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event)
-{{endif}}
-
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef CUresult cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event)
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphNodeSetEnabled' in found_functions}}
-
-cdef CUresult cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeSetEnabled(hGraphExec, hNode, isEnabled)
-{{endif}}
-
-{{if 'cuGraphNodeGetEnabled' in found_functions}}
-
-cdef CUresult cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int* isEnabled) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeGetEnabled(hGraphExec, hNode, isEnabled)
-{{endif}}
-
-{{if 'cuGraphUpload' in found_functions}}
-
-cdef CUresult cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphUpload(hGraphExec, hStream)
-{{endif}}
-
-{{if 'cuGraphLaunch' in found_functions}}
-
-cdef CUresult cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphLaunch(hGraphExec, hStream)
-{{endif}}
-
-{{if 'cuGraphExecDestroy' in found_functions}}
-
-cdef CUresult cuGraphExecDestroy(CUgraphExec hGraphExec) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecDestroy(hGraphExec)
-{{endif}}
-
-{{if 'cuGraphDestroy' in found_functions}}
-
-cdef CUresult cuGraphDestroy(CUgraph hGraph) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphDestroy(hGraph)
-{{endif}}
-
-{{if 'cuGraphExecUpdate_v2' in found_functions}}
-
-cdef CUresult cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphExecUpdateResultInfo* resultInfo) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecUpdate_v2(hGraphExec, hGraph, resultInfo)
-{{endif}}
-
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphKernelNodeCopyAttributes(dst, src)
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphKernelNodeGetAttribute(hNode, attr, value_out)
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef CUresult cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphKernelNodeSetAttribute(hNode, attr, value)
-{{endif}}
-
-{{if 'cuGraphDebugDotPrint' in found_functions}}
-
-cdef CUresult cuGraphDebugDotPrint(CUgraph hGraph, const char* path, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphDebugDotPrint(hGraph, path, flags)
-{{endif}}
-
-{{if 'cuUserObjectCreate' in found_functions}}
-
-cdef CUresult cuUserObjectCreate(CUuserObject* object_out, void* ptr, CUhostFn destroy, unsigned int initialRefcount, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags)
-{{endif}}
-
-{{if 'cuUserObjectRetain' in found_functions}}
-
-cdef CUresult cuUserObjectRetain(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuUserObjectRetain(object, count)
-{{endif}}
-
-{{if 'cuUserObjectRelease' in found_functions}}
-
-cdef CUresult cuUserObjectRelease(CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuUserObjectRelease(object, count)
-{{endif}}
-
-{{if 'cuGraphRetainUserObject' in found_functions}}
-
-cdef CUresult cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphRetainUserObject(graph, object, count, flags)
-{{endif}}
-
-{{if 'cuGraphReleaseUserObject' in found_functions}}
-
-cdef CUresult cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphReleaseUserObject(graph, object, count)
-{{endif}}
-
-{{if 'cuGraphAddNode' in found_functions}}
-
-cdef CUresult cuGraphAddNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddNode(phGraphNode, hGraph, dependencies, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphAddNode_v2' in found_functions}}
-
-cdef CUresult cuGraphAddNode_v2(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, const CUgraphEdgeData* dependencyData, size_t numDependencies, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphAddNode_v2(phGraphNode, hGraph, dependencies, dependencyData, numDependencies, nodeParams)
-{{endif}}
-
-{{if 'cuGraphNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphNodeSetParams(CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphNodeSetParams(hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphExecNodeSetParams' in found_functions}}
-
-cdef CUresult cuGraphExecNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraphNodeParams* nodeParams) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphExecNodeSetParams(hGraphExec, hNode, nodeParams)
-{{endif}}
-
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}
-
-cdef CUresult cuGraphConditionalHandleCreate(CUgraphConditionalHandle* pHandle_out, CUgraph hGraph, CUcontext ctx, unsigned int defaultLaunchValue, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphConditionalHandleCreate(pHandle_out, hGraph, ctx, defaultLaunchValue, flags)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, blockSize, dynamicSMemSize, flags)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxPotentialBlockSizeWithFlags(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags)
-{{endif}}
-
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef CUresult cuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, func, numBlocks, blockSize)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-
-cdef CUresult cuOccupancyMaxPotentialClusterSize(int* clusterSize, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxPotentialClusterSize(clusterSize, func, config)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-
-cdef CUresult cuOccupancyMaxActiveClusters(int* numClusters, CUfunction func, const CUlaunchConfig* config) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuOccupancyMaxActiveClusters(numClusters, func, config)
-{{endif}}
-
-{{if 'cuTexRefSetArray' in found_functions}}
-
-cdef CUresult cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetArray(hTexRef, hArray, Flags)
-{{endif}}
-
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetMipmappedArray(hTexRef, hMipmappedArray, Flags)
-{{endif}}
-
-{{if 'cuTexRefSetAddress_v2' in found_functions}}
-
-cdef CUresult cuTexRefSetAddress(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t numbytes) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetAddress_v2(ByteOffset, hTexRef, dptr, numbytes)
-{{endif}}
-
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-
-cdef CUresult cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetAddress2D_v3(hTexRef, desc, dptr, Pitch)
-{{endif}}
-
-{{if 'cuTexRefSetFormat' in found_functions}}
-
-cdef CUresult cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetFormat(hTexRef, fmt, NumPackedComponents)
-{{endif}}
-
-{{if 'cuTexRefSetAddressMode' in found_functions}}
-
-cdef CUresult cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetAddressMode(hTexRef, dim, am)
-{{endif}}
-
-{{if 'cuTexRefSetFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetFilterMode(hTexRef, fm)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetMipmapFilterMode(hTexRef, fm)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetMipmapLevelBias(hTexRef, bias)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetMipmapLevelClamp(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp)
-{{endif}}
-
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-
-cdef CUresult cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetMaxAnisotropy(hTexRef, maxAniso)
-{{endif}}
-
-{{if 'cuTexRefSetBorderColor' in found_functions}}
-
-cdef CUresult cuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetBorderColor(hTexRef, pBorderColor)
-{{endif}}
-
-{{if 'cuTexRefSetFlags' in found_functions}}
-
-cdef CUresult cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefSetFlags(hTexRef, Flags)
-{{endif}}
-
-{{if 'cuTexRefGetAddress_v2' in found_functions}}
-
-cdef CUresult cuTexRefGetAddress(CUdeviceptr* pdptr, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetAddress_v2(pdptr, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetArray' in found_functions}}
-
-cdef CUresult cuTexRefGetArray(CUarray* phArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetArray(phArray, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetMipmappedArray(phMipmappedArray, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetAddressMode' in found_functions}}
-
-cdef CUresult cuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetAddressMode(pam, hTexRef, dim)
-{{endif}}
-
-{{if 'cuTexRefGetFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetFilterMode(pfm, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetFormat' in found_functions}}
-
-cdef CUresult cuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetFormat(pFormat, pNumChannels, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetMipmapFilterMode(pfm, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetMipmapLevelBias(pbias, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-
-cdef CUresult cuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetMipmapLevelClamp(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-
-cdef CUresult cuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetMaxAnisotropy(pmaxAniso, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetBorderColor' in found_functions}}
-
-cdef CUresult cuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetBorderColor(pBorderColor, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefGetFlags' in found_functions}}
-
-cdef CUresult cuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefGetFlags(pFlags, hTexRef)
-{{endif}}
-
-{{if 'cuTexRefCreate' in found_functions}}
-
-cdef CUresult cuTexRefCreate(CUtexref* pTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefCreate(pTexRef)
-{{endif}}
-
-{{if 'cuTexRefDestroy' in found_functions}}
-
-cdef CUresult cuTexRefDestroy(CUtexref hTexRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexRefDestroy(hTexRef)
-{{endif}}
-
-{{if 'cuSurfRefSetArray' in found_functions}}
-
-cdef CUresult cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSurfRefSetArray(hSurfRef, hArray, Flags)
-{{endif}}
-
-{{if 'cuSurfRefGetArray' in found_functions}}
-
-cdef CUresult cuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSurfRefGetArray(phArray, hSurfRef)
-{{endif}}
-
-{{if 'cuTexObjectCreate' in found_functions}}
-
-cdef CUresult cuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexObjectCreate(pTexObject, pResDesc, pTexDesc, pResViewDesc)
-{{endif}}
-
-{{if 'cuTexObjectDestroy' in found_functions}}
-
-cdef CUresult cuTexObjectDestroy(CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexObjectDestroy(texObject)
-{{endif}}
-
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexObjectGetResourceDesc(pResDesc, texObject)
-{{endif}}
-
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexObjectGetTextureDesc(pTexDesc, texObject)
-{{endif}}
-
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-
-cdef CUresult cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTexObjectGetResourceViewDesc(pResViewDesc, texObject)
-{{endif}}
-
-{{if 'cuSurfObjectCreate' in found_functions}}
-
-cdef CUresult cuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSurfObjectCreate(pSurfObject, pResDesc)
-{{endif}}
-
-{{if 'cuSurfObjectDestroy' in found_functions}}
-
-cdef CUresult cuSurfObjectDestroy(CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSurfObjectDestroy(surfObject)
-{{endif}}
-
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-
-cdef CUresult cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuSurfObjectGetResourceDesc(pResDesc, surfObject)
-{{endif}}
-
-{{if 'cuTensorMapEncodeTiled' in found_functions}}
-
-cdef CUresult cuTensorMapEncodeTiled(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const cuuint32_t* boxDim, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTensorMapEncodeTiled(tensorMap, tensorDataType, tensorRank, globalAddress, globalDim, globalStrides, boxDim, elementStrides, interleave, swizzle, l2Promotion, oobFill)
-{{endif}}
-
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}
-
-cdef CUresult cuTensorMapEncodeIm2col(CUtensorMap* tensorMap, CUtensorMapDataType tensorDataType, cuuint32_t tensorRank, void* globalAddress, const cuuint64_t* globalDim, const cuuint64_t* globalStrides, const int* pixelBoxLowerCorner, const int* pixelBoxUpperCorner, cuuint32_t channelsPerPixel, cuuint32_t pixelsPerColumn, const cuuint32_t* elementStrides, CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTensorMapEncodeIm2col(tensorMap, tensorDataType, tensorRank, globalAddress, globalDim, globalStrides, pixelBoxLowerCorner, pixelBoxUpperCorner, channelsPerPixel, pixelsPerColumn, elementStrides, interleave, swizzle, l2Promotion, oobFill)
-{{endif}}
-
-{{if 'cuTensorMapReplaceAddress' in found_functions}}
-
-cdef CUresult cuTensorMapReplaceAddress(CUtensorMap* tensorMap, void* globalAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuTensorMapReplaceAddress(tensorMap, globalAddress)
-{{endif}}
-
-{{if 'cuDeviceCanAccessPeer' in found_functions}}
-
-cdef CUresult cuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceCanAccessPeer(canAccessPeer, dev, peerDev)
-{{endif}}
-
-{{if 'cuCtxEnablePeerAccess' in found_functions}}
-
-cdef CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxEnablePeerAccess(peerContext, Flags)
-{{endif}}
-
-{{if 'cuCtxDisablePeerAccess' in found_functions}}
-
-cdef CUresult cuCtxDisablePeerAccess(CUcontext peerContext) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxDisablePeerAccess(peerContext)
-{{endif}}
-
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}
-
-cdef CUresult cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetP2PAttribute(value, attrib, srcDevice, dstDevice)
-{{endif}}
-
-{{if 'cuGraphicsUnregisterResource' in found_functions}}
-
-cdef CUresult cuGraphicsUnregisterResource(CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsUnregisterResource(resource)
-{{endif}}
-
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef CUresult cuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsSubResourceGetMappedArray(pArray, resource, arrayIndex, mipLevel)
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef CUresult cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsResourceGetMappedMipmappedArray(pMipmappedArray, resource)
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-
-cdef CUresult cuGraphicsResourceGetMappedPointer(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsResourceGetMappedPointer_v2(pDevPtr, pSize, resource)
-{{endif}}
-
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-
-cdef CUresult cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsResourceSetMapFlags_v2(resource, flags)
-{{endif}}
-
-{{if 'cuGraphicsMapResources' in found_functions}}
-
-cdef CUresult cuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsMapResources(count, resources, hStream)
-{{endif}}
-
-{{if 'cuGraphicsUnmapResources' in found_functions}}
-
-cdef CUresult cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsUnmapResources(count, resources, hStream)
-{{endif}}
-
-{{if 'cuGetProcAddress_v2' in found_functions}}
-
-cdef CUresult cuGetProcAddress(const char* symbol, void** pfn, int cudaVersion, cuuint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGetProcAddress_v2(symbol, pfn, cudaVersion, flags, symbolStatus)
-{{endif}}
-
-{{if 'cuCoredumpGetAttribute' in found_functions}}
-
-cdef CUresult cuCoredumpGetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCoredumpGetAttribute(attrib, value, size)
-{{endif}}
-
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-
-cdef CUresult cuCoredumpGetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCoredumpGetAttributeGlobal(attrib, value, size)
-{{endif}}
-
-{{if 'cuCoredumpSetAttribute' in found_functions}}
-
-cdef CUresult cuCoredumpSetAttribute(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCoredumpSetAttribute(attrib, value, size)
-{{endif}}
-
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-
-cdef CUresult cuCoredumpSetAttributeGlobal(CUcoredumpSettings attrib, void* value, size_t* size) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCoredumpSetAttributeGlobal(attrib, value, size)
-{{endif}}
-
-{{if 'cuGetExportTable' in found_functions}}
-
-cdef CUresult cuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGetExportTable(ppExportTable, pExportTableId)
-{{endif}}
-
-{{if 'cuGreenCtxCreate' in found_functions}}
-
-cdef CUresult cuGreenCtxCreate(CUgreenCtx* phCtx, CUdevResourceDesc desc, CUdevice dev, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxCreate(phCtx, desc, dev, flags)
-{{endif}}
-
-{{if 'cuGreenCtxDestroy' in found_functions}}
-
-cdef CUresult cuGreenCtxDestroy(CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxDestroy(hCtx)
-{{endif}}
-
-{{if 'cuCtxFromGreenCtx' in found_functions}}
-
-cdef CUresult cuCtxFromGreenCtx(CUcontext* pContext, CUgreenCtx hCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxFromGreenCtx(pContext, hCtx)
-{{endif}}
-
-{{if 'cuDeviceGetDevResource' in found_functions}}
-
-cdef CUresult cuDeviceGetDevResource(CUdevice device, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDeviceGetDevResource(device, resource, typename)
-{{endif}}
-
-{{if 'cuCtxGetDevResource' in found_functions}}
-
-cdef CUresult cuCtxGetDevResource(CUcontext hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuCtxGetDevResource(hCtx, resource, typename)
-{{endif}}
-
-{{if 'cuGreenCtxGetDevResource' in found_functions}}
-
-cdef CUresult cuGreenCtxGetDevResource(CUgreenCtx hCtx, CUdevResource* resource, CUdevResourceType typename) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxGetDevResource(hCtx, resource, typename)
-{{endif}}
-
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}
-
-cdef CUresult cuDevSmResourceSplitByCount(CUdevResource* result, unsigned int* nbGroups, const CUdevResource* input, CUdevResource* remaining, unsigned int useFlags, unsigned int minCount) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevSmResourceSplitByCount(result, nbGroups, input, remaining, useFlags, minCount)
-{{endif}}
-
-{{if 'cuDevResourceGenerateDesc' in found_functions}}
-
-cdef CUresult cuDevResourceGenerateDesc(CUdevResourceDesc* phDesc, CUdevResource* resources, unsigned int nbResources) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuDevResourceGenerateDesc(phDesc, resources, nbResources)
-{{endif}}
-
-{{if 'cuGreenCtxRecordEvent' in found_functions}}
-
-cdef CUresult cuGreenCtxRecordEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxRecordEvent(hCtx, hEvent)
-{{endif}}
-
-{{if 'cuGreenCtxWaitEvent' in found_functions}}
-
-cdef CUresult cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxWaitEvent(hCtx, hEvent)
-{{endif}}
-
-{{if 'cuStreamGetGreenCtx' in found_functions}}
-
-cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuStreamGetGreenCtx(hStream, phCtx)
-{{endif}}
-
-{{if 'cuGreenCtxStreamCreate' in found_functions}}
-
-cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGreenCtxStreamCreate(phStream, greenCtx, flags, priority)
-{{endif}}
-
-{{if 'cuProfilerStart' in found_functions}}
-
-cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuProfilerStart()
-{{endif}}
-
-{{if 'cuProfilerStop' in found_functions}}
-
-cdef CUresult cuProfilerStop() except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuProfilerStop()
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsEGLRegisterImage(CUgraphicsResource* pCudaResource, EGLImageKHR image, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsEGLRegisterImage(pCudaResource, image, flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamConsumerConnect(conn, stream)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerConnectWithFlags(CUeglStreamConnection* conn, EGLStreamKHR stream, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamConsumerConnectWithFlags(conn, stream, flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamConsumerDisconnect(conn)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerAcquireFrame(CUeglStreamConnection* conn, CUgraphicsResource* pCudaResource, CUstream* pStream, unsigned int timeout) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, timeout)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamConsumerReleaseFrame(CUeglStreamConnection* conn, CUgraphicsResource pCudaResource, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerConnect(CUeglStreamConnection* conn, EGLStreamKHR stream, EGLint width, EGLint height) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamProducerConnect(conn, stream, width, height)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerDisconnect(CUeglStreamConnection* conn) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamProducerDisconnect(conn)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerPresentFrame(CUeglStreamConnection* conn, CUeglFrame eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamProducerPresentFrame(conn, eglframe, pStream)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEGLStreamProducerReturnFrame(CUeglStreamConnection* conn, CUeglFrame* eglframe, CUstream* pStream) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEGLStreamProducerReturnFrame(conn, eglframe, pStream)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsResourceGetMappedEglFrame(CUeglFrame* eglFrame, CUgraphicsResource resource, unsigned int index, unsigned int mipLevel) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsResourceGetMappedEglFrame(eglFrame, resource, index, mipLevel)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuEventCreateFromEGLSync(CUevent* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuEventCreateFromEGLSync(phEvent, eglSync, flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsGLRegisterBuffer(pCudaResource, buffer, Flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsGLRegisterImage(pCudaResource, image, target, Flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGLGetDevices(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGLGetDevices_v2(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, deviceList)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuVDPAUGetDevice(CUdevice* pDevice, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuVDPAUGetDevice(pDevice, vdpDevice, vdpGetProcAddress)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuVDPAUCtxCreate(CUcontext* pCtx, unsigned int flags, CUdevice device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuVDPAUCtxCreate_v2(pCtx, flags, device, vdpDevice, vdpGetProcAddress)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResource, VdpVideoSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsVDPAURegisterVideoSurface(pCudaResource, vdpSurface, flags)
-{{endif}}
-
-{{if True}}
-
-cdef CUresult cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil:
-    return cydriver._cuGraphicsVDPAURegisterOutputSurface(pCudaResource, vdpSurface, flags)
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/cynvrtc.pxd.in b/cuda_bindings/cuda/bindings/cynvrtc.pxd.in
deleted file mode 100644
index 20e3fef3..00000000
--- a/cuda_bindings/cuda/bindings/cynvrtc.pxd.in
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-from libc.stdint cimport uint32_t, uint64_t
-
-cdef extern from "nvrtc.h":
-
-    ctypedef enum nvrtcResult:
-        NVRTC_SUCCESS = 0
-        NVRTC_ERROR_OUT_OF_MEMORY = 1
-        NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2
-        NVRTC_ERROR_INVALID_INPUT = 3
-        NVRTC_ERROR_INVALID_PROGRAM = 4
-        NVRTC_ERROR_INVALID_OPTION = 5
-        NVRTC_ERROR_COMPILATION = 6
-        NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7
-        NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8
-        NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9
-        NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10
-        NVRTC_ERROR_INTERNAL_ERROR = 11
-        NVRTC_ERROR_TIME_FILE_WRITE_FAILED = 12
-
-    cdef struct _nvrtcProgram:
-        pass
-    ctypedef _nvrtcProgram* nvrtcProgram
-
-{{if 'nvrtcGetErrorString' in found_functions}}
-
-cdef const char* nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil
-{{endif}}
-
-{{if 'nvrtcVersion' in found_functions}}
-
-cdef nvrtcResult nvrtcVersion(int* major, int* minor) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNumSupportedArchs(int* numArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetSupportedArchs' in found_functions}}
-
-cdef nvrtcResult nvrtcGetSupportedArchs(int* supportedArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcCreateProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcDestroyProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcDestroyProgram(nvrtcProgram* prog) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcCompileProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetPTXSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetPTX' in found_functions}}
-
-cdef nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char* ptx) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetCUBINSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetCUBIN' in found_functions}}
-
-cdef nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char* cubin) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNVVMSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNVVMSize(nvrtcProgram prog, size_t* nvvmSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetNVVM' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNVVM(nvrtcProgram prog, char* nvvm) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLTOIRSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t* LTOIRSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLTOIR' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char* LTOIR) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t* optixirSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetOptiXIR' in found_functions}}
-
-cdef nvrtcResult nvrtcGetOptiXIR(nvrtcProgram prog, char* optixir) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetProgramLogSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetProgramLog' in found_functions}}
-
-cdef nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char* log) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcAddNameExpression' in found_functions}}
-
-cdef nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
-{{if 'nvrtcGetLoweredName' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name) except ?NVRTC_ERROR_INVALID_INPUT nogil
-{{endif}}
-
diff --git a/cuda_bindings/cuda/bindings/cynvrtc.pyx.in b/cuda_bindings/cuda/bindings/cynvrtc.pyx.in
deleted file mode 100644
index cf02cce9..00000000
--- a/cuda_bindings/cuda/bindings/cynvrtc.pyx.in
+++ /dev/null
@@ -1,134 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings._bindings.cynvrtc as cynvrtc
-
-{{if 'nvrtcGetErrorString' in found_functions}}
-
-cdef const char* nvrtcGetErrorString(nvrtcResult result) except ?NULL nogil:
-    return cynvrtc._nvrtcGetErrorString(result)
-{{endif}}
-
-{{if 'nvrtcVersion' in found_functions}}
-
-cdef nvrtcResult nvrtcVersion(int* major, int* minor) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcVersion(major, minor)
-{{endif}}
-
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNumSupportedArchs(int* numArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetNumSupportedArchs(numArchs)
-{{endif}}
-
-{{if 'nvrtcGetSupportedArchs' in found_functions}}
-
-cdef nvrtcResult nvrtcGetSupportedArchs(int* supportedArchs) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetSupportedArchs(supportedArchs)
-{{endif}}
-
-{{if 'nvrtcCreateProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcCreateProgram(prog, src, name, numHeaders, headers, includeNames)
-{{endif}}
-
-{{if 'nvrtcDestroyProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcDestroyProgram(nvrtcProgram* prog) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcDestroyProgram(prog)
-{{endif}}
-
-{{if 'nvrtcCompileProgram' in found_functions}}
-
-cdef nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcCompileProgram(prog, numOptions, options)
-{{endif}}
-
-{{if 'nvrtcGetPTXSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetPTXSize(prog, ptxSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetPTX' in found_functions}}
-
-cdef nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char* ptx) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetPTX(prog, ptx)
-{{endif}}
-
-{{if 'nvrtcGetCUBINSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetCUBINSize(prog, cubinSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetCUBIN' in found_functions}}
-
-cdef nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char* cubin) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetCUBIN(prog, cubin)
-{{endif}}
-
-{{if 'nvrtcGetNVVMSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNVVMSize(nvrtcProgram prog, size_t* nvvmSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetNVVMSize(prog, nvvmSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetNVVM' in found_functions}}
-
-cdef nvrtcResult nvrtcGetNVVM(nvrtcProgram prog, char* nvvm) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetNVVM(prog, nvvm)
-{{endif}}
-
-{{if 'nvrtcGetLTOIRSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t* LTOIRSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetLTOIRSize(prog, LTOIRSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetLTOIR' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char* LTOIR) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetLTOIR(prog, LTOIR)
-{{endif}}
-
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t* optixirSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetOptiXIRSize(prog, optixirSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetOptiXIR' in found_functions}}
-
-cdef nvrtcResult nvrtcGetOptiXIR(nvrtcProgram prog, char* optixir) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetOptiXIR(prog, optixir)
-{{endif}}
-
-{{if 'nvrtcGetProgramLogSize' in found_functions}}
-
-cdef nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetProgramLogSize(prog, logSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetProgramLog' in found_functions}}
-
-cdef nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char* log) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetProgramLog(prog, log)
-{{endif}}
-
-{{if 'nvrtcAddNameExpression' in found_functions}}
-
-cdef nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcAddNameExpression(prog, name_expression)
-{{endif}}
-
-{{if 'nvrtcGetLoweredName' in found_functions}}
-
-cdef nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name) except ?NVRTC_ERROR_INVALID_INPUT nogil:
-    return cynvrtc._nvrtcGetLoweredName(prog, name_expression, lowered_name)
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/cyruntime.pxd.in b/cuda_bindings/cuda/bindings/cyruntime.pxd.in
deleted file mode 100644
index 4372558c..00000000
--- a/cuda_bindings/cuda/bindings/cyruntime.pxd.in
+++ /dev/null
@@ -1,3312 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-from libc.stdint cimport uint32_t, uint64_t
-
-cdef extern from "vector_types.h":
-
-    cdef struct dim3:
-        unsigned int x
-        unsigned int y
-        unsigned int z
-
-cdef extern from "driver_types.h":
-
-    cdef enum cudaError:
-        cudaSuccess = 0
-        cudaErrorInvalidValue = 1
-        cudaErrorMemoryAllocation = 2
-        cudaErrorInitializationError = 3
-        cudaErrorCudartUnloading = 4
-        cudaErrorProfilerDisabled = 5
-        cudaErrorProfilerNotInitialized = 6
-        cudaErrorProfilerAlreadyStarted = 7
-        cudaErrorProfilerAlreadyStopped = 8
-        cudaErrorInvalidConfiguration = 9
-        cudaErrorInvalidPitchValue = 12
-        cudaErrorInvalidSymbol = 13
-        cudaErrorInvalidHostPointer = 16
-        cudaErrorInvalidDevicePointer = 17
-        cudaErrorInvalidTexture = 18
-        cudaErrorInvalidTextureBinding = 19
-        cudaErrorInvalidChannelDescriptor = 20
-        cudaErrorInvalidMemcpyDirection = 21
-        cudaErrorAddressOfConstant = 22
-        cudaErrorTextureFetchFailed = 23
-        cudaErrorTextureNotBound = 24
-        cudaErrorSynchronizationError = 25
-        cudaErrorInvalidFilterSetting = 26
-        cudaErrorInvalidNormSetting = 27
-        cudaErrorMixedDeviceExecution = 28
-        cudaErrorNotYetImplemented = 31
-        cudaErrorMemoryValueTooLarge = 32
-        cudaErrorStubLibrary = 34
-        cudaErrorInsufficientDriver = 35
-        cudaErrorCallRequiresNewerDriver = 36
-        cudaErrorInvalidSurface = 37
-        cudaErrorDuplicateVariableName = 43
-        cudaErrorDuplicateTextureName = 44
-        cudaErrorDuplicateSurfaceName = 45
-        cudaErrorDevicesUnavailable = 46
-        cudaErrorIncompatibleDriverContext = 49
-        cudaErrorMissingConfiguration = 52
-        cudaErrorPriorLaunchFailure = 53
-        cudaErrorLaunchMaxDepthExceeded = 65
-        cudaErrorLaunchFileScopedTex = 66
-        cudaErrorLaunchFileScopedSurf = 67
-        cudaErrorSyncDepthExceeded = 68
-        cudaErrorLaunchPendingCountExceeded = 69
-        cudaErrorInvalidDeviceFunction = 98
-        cudaErrorNoDevice = 100
-        cudaErrorInvalidDevice = 101
-        cudaErrorDeviceNotLicensed = 102
-        cudaErrorSoftwareValidityNotEstablished = 103
-        cudaErrorStartupFailure = 127
-        cudaErrorInvalidKernelImage = 200
-        cudaErrorDeviceUninitialized = 201
-        cudaErrorMapBufferObjectFailed = 205
-        cudaErrorUnmapBufferObjectFailed = 206
-        cudaErrorArrayIsMapped = 207
-        cudaErrorAlreadyMapped = 208
-        cudaErrorNoKernelImageForDevice = 209
-        cudaErrorAlreadyAcquired = 210
-        cudaErrorNotMapped = 211
-        cudaErrorNotMappedAsArray = 212
-        cudaErrorNotMappedAsPointer = 213
-        cudaErrorECCUncorrectable = 214
-        cudaErrorUnsupportedLimit = 215
-        cudaErrorDeviceAlreadyInUse = 216
-        cudaErrorPeerAccessUnsupported = 217
-        cudaErrorInvalidPtx = 218
-        cudaErrorInvalidGraphicsContext = 219
-        cudaErrorNvlinkUncorrectable = 220
-        cudaErrorJitCompilerNotFound = 221
-        cudaErrorUnsupportedPtxVersion = 222
-        cudaErrorJitCompilationDisabled = 223
-        cudaErrorUnsupportedExecAffinity = 224
-        cudaErrorUnsupportedDevSideSync = 225
-        cudaErrorInvalidSource = 300
-        cudaErrorFileNotFound = 301
-        cudaErrorSharedObjectSymbolNotFound = 302
-        cudaErrorSharedObjectInitFailed = 303
-        cudaErrorOperatingSystem = 304
-        cudaErrorInvalidResourceHandle = 400
-        cudaErrorIllegalState = 401
-        cudaErrorLossyQuery = 402
-        cudaErrorSymbolNotFound = 500
-        cudaErrorNotReady = 600
-        cudaErrorIllegalAddress = 700
-        cudaErrorLaunchOutOfResources = 701
-        cudaErrorLaunchTimeout = 702
-        cudaErrorLaunchIncompatibleTexturing = 703
-        cudaErrorPeerAccessAlreadyEnabled = 704
-        cudaErrorPeerAccessNotEnabled = 705
-        cudaErrorSetOnActiveProcess = 708
-        cudaErrorContextIsDestroyed = 709
-        cudaErrorAssert = 710
-        cudaErrorTooManyPeers = 711
-        cudaErrorHostMemoryAlreadyRegistered = 712
-        cudaErrorHostMemoryNotRegistered = 713
-        cudaErrorHardwareStackError = 714
-        cudaErrorIllegalInstruction = 715
-        cudaErrorMisalignedAddress = 716
-        cudaErrorInvalidAddressSpace = 717
-        cudaErrorInvalidPc = 718
-        cudaErrorLaunchFailure = 719
-        cudaErrorCooperativeLaunchTooLarge = 720
-        cudaErrorNotPermitted = 800
-        cudaErrorNotSupported = 801
-        cudaErrorSystemNotReady = 802
-        cudaErrorSystemDriverMismatch = 803
-        cudaErrorCompatNotSupportedOnDevice = 804
-        cudaErrorMpsConnectionFailed = 805
-        cudaErrorMpsRpcFailure = 806
-        cudaErrorMpsServerNotReady = 807
-        cudaErrorMpsMaxClientsReached = 808
-        cudaErrorMpsMaxConnectionsReached = 809
-        cudaErrorMpsClientTerminated = 810
-        cudaErrorCdpNotSupported = 811
-        cudaErrorCdpVersionMismatch = 812
-        cudaErrorStreamCaptureUnsupported = 900
-        cudaErrorStreamCaptureInvalidated = 901
-        cudaErrorStreamCaptureMerge = 902
-        cudaErrorStreamCaptureUnmatched = 903
-        cudaErrorStreamCaptureUnjoined = 904
-        cudaErrorStreamCaptureIsolation = 905
-        cudaErrorStreamCaptureImplicit = 906
-        cudaErrorCapturedEvent = 907
-        cudaErrorStreamCaptureWrongThread = 908
-        cudaErrorTimeout = 909
-        cudaErrorGraphExecUpdateFailure = 910
-        cudaErrorExternalDevice = 911
-        cudaErrorInvalidClusterSize = 912
-        cudaErrorFunctionNotLoaded = 913
-        cudaErrorInvalidResourceType = 914
-        cudaErrorInvalidResourceConfiguration = 915
-        cudaErrorUnknown = 999
-        cudaErrorApiFailureBase = 10000
-
-    ctypedef cudaError cudaError_t
-
-    cdef struct cudaChannelFormatDesc:
-        int x
-        int y
-        int z
-        int w
-        cudaChannelFormatKind f
-
-    cdef struct cudaArray:
-        pass
-    ctypedef cudaArray* cudaArray_t
-
-    cdef struct cudaArray:
-        pass
-    ctypedef cudaArray* cudaArray_const_t
-
-    cdef struct cudaMipmappedArray:
-        pass
-    ctypedef cudaMipmappedArray* cudaMipmappedArray_t
-
-    cdef struct cudaMipmappedArray:
-        pass
-    ctypedef cudaMipmappedArray* cudaMipmappedArray_const_t
-
-    cdef struct anon_struct0:
-        unsigned int width
-        unsigned int height
-        unsigned int depth
-
-    cdef struct cudaArraySparseProperties:
-        anon_struct0 tileExtent
-        unsigned int miptailFirstLevel
-        unsigned long long miptailSize
-        unsigned int flags
-        unsigned int reserved[4]
-
-    cdef struct cudaArrayMemoryRequirements:
-        size_t size
-        size_t alignment
-        unsigned int reserved[4]
-
-    cdef struct cudaPitchedPtr:
-        void* ptr
-        size_t pitch
-        size_t xsize
-        size_t ysize
-
-    cdef struct cudaExtent:
-        size_t width
-        size_t height
-        size_t depth
-
-    cdef struct cudaPos:
-        size_t x
-        size_t y
-        size_t z
-
-    cdef struct cudaMemcpy3DParms:
-        cudaArray_t srcArray
-        cudaPos srcPos
-        cudaPitchedPtr srcPtr
-        cudaArray_t dstArray
-        cudaPos dstPos
-        cudaPitchedPtr dstPtr
-        cudaExtent extent
-        cudaMemcpyKind kind
-
-    cdef struct cudaMemcpyNodeParams:
-        int flags
-        int reserved[3]
-        cudaMemcpy3DParms copyParams
-
-    cdef struct cudaMemcpy3DPeerParms:
-        cudaArray_t srcArray
-        cudaPos srcPos
-        cudaPitchedPtr srcPtr
-        int srcDevice
-        cudaArray_t dstArray
-        cudaPos dstPos
-        cudaPitchedPtr dstPtr
-        int dstDevice
-        cudaExtent extent
-
-    cdef struct cudaMemsetParams:
-        void* dst
-        size_t pitch
-        unsigned int value
-        unsigned int elementSize
-        size_t width
-        size_t height
-
-    cdef struct cudaMemsetParamsV2:
-        void* dst
-        size_t pitch
-        unsigned int value
-        unsigned int elementSize
-        size_t width
-        size_t height
-
-    cdef struct cudaAccessPolicyWindow:
-        void* base_ptr
-        size_t num_bytes
-        float hitRatio
-        cudaAccessProperty hitProp
-        cudaAccessProperty missProp
-
-    ctypedef void (*cudaHostFn_t)(void* userData)
-
-    cdef struct cudaHostNodeParams:
-        cudaHostFn_t fn
-        void* userData
-
-    cdef struct cudaHostNodeParamsV2:
-        cudaHostFn_t fn
-        void* userData
-
-    cdef struct anon_struct1:
-        cudaArray_t array
-
-    cdef struct anon_struct2:
-        cudaMipmappedArray_t mipmap
-
-    cdef struct anon_struct3:
-        void* devPtr
-        cudaChannelFormatDesc desc
-        size_t sizeInBytes
-
-    cdef struct anon_struct4:
-        void* devPtr
-        cudaChannelFormatDesc desc
-        size_t width
-        size_t height
-        size_t pitchInBytes
-
-    cdef union anon_union0:
-        anon_struct1 array
-        anon_struct2 mipmap
-        anon_struct3 linear
-        anon_struct4 pitch2D
-
-    cdef struct cudaResourceDesc:
-        cudaResourceType resType
-        anon_union0 res
-
-    cdef struct cudaResourceViewDesc:
-        cudaResourceViewFormat format
-        size_t width
-        size_t height
-        size_t depth
-        unsigned int firstMipmapLevel
-        unsigned int lastMipmapLevel
-        unsigned int firstLayer
-        unsigned int lastLayer
-
-    cdef struct cudaPointerAttributes:
-        cudaMemoryType type
-        int device
-        void* devicePointer
-        void* hostPointer
-
-    cdef struct cudaFuncAttributes:
-        size_t sharedSizeBytes
-        size_t constSizeBytes
-        size_t localSizeBytes
-        int maxThreadsPerBlock
-        int numRegs
-        int ptxVersion
-        int binaryVersion
-        int cacheModeCA
-        int maxDynamicSharedSizeBytes
-        int preferredShmemCarveout
-        int clusterDimMustBeSet
-        int requiredClusterWidth
-        int requiredClusterHeight
-        int requiredClusterDepth
-        int clusterSchedulingPolicyPreference
-        int nonPortableClusterSizeAllowed
-        int reserved[16]
-
-    cdef struct cudaMemLocation:
-        cudaMemLocationType type
-        int id
-
-    cdef struct cudaMemAccessDesc:
-        cudaMemLocation location
-        cudaMemAccessFlags flags
-
-    cdef struct cudaMemPoolProps:
-        cudaMemAllocationType allocType
-        cudaMemAllocationHandleType handleTypes
-        cudaMemLocation location
-        void* win32SecurityAttributes
-        size_t maxSize
-        unsigned short usage
-        unsigned char reserved[54]
-
-    cdef struct cudaMemPoolPtrExportData:
-        unsigned char reserved[64]
-
-    cdef struct cudaMemAllocNodeParams:
-        cudaMemPoolProps poolProps
-        const cudaMemAccessDesc* accessDescs
-        size_t accessDescCount
-        size_t bytesize
-        void* dptr
-
-    cdef struct cudaMemAllocNodeParamsV2:
-        cudaMemPoolProps poolProps
-        const cudaMemAccessDesc* accessDescs
-        size_t accessDescCount
-        size_t bytesize
-        void* dptr
-
-    cdef struct cudaMemFreeNodeParams:
-        void* dptr
-
-    cdef struct CUuuid_st:
-        char bytes[16]
-
-    ctypedef CUuuid_st CUuuid
-
-    ctypedef CUuuid_st cudaUUID_t
-
-    cdef struct cudaDeviceProp:
-        char name[256]
-        cudaUUID_t uuid
-        char luid[8]
-        unsigned int luidDeviceNodeMask
-        size_t totalGlobalMem
-        size_t sharedMemPerBlock
-        int regsPerBlock
-        int warpSize
-        size_t memPitch
-        int maxThreadsPerBlock
-        int maxThreadsDim[3]
-        int maxGridSize[3]
-        int clockRate
-        size_t totalConstMem
-        int major
-        int minor
-        size_t textureAlignment
-        size_t texturePitchAlignment
-        int deviceOverlap
-        int multiProcessorCount
-        int kernelExecTimeoutEnabled
-        int integrated
-        int canMapHostMemory
-        int computeMode
-        int maxTexture1D
-        int maxTexture1DMipmap
-        int maxTexture1DLinear
-        int maxTexture2D[2]
-        int maxTexture2DMipmap[2]
-        int maxTexture2DLinear[3]
-        int maxTexture2DGather[2]
-        int maxTexture3D[3]
-        int maxTexture3DAlt[3]
-        int maxTextureCubemap
-        int maxTexture1DLayered[2]
-        int maxTexture2DLayered[3]
-        int maxTextureCubemapLayered[2]
-        int maxSurface1D
-        int maxSurface2D[2]
-        int maxSurface3D[3]
-        int maxSurface1DLayered[2]
-        int maxSurface2DLayered[3]
-        int maxSurfaceCubemap
-        int maxSurfaceCubemapLayered[2]
-        size_t surfaceAlignment
-        int concurrentKernels
-        int ECCEnabled
-        int pciBusID
-        int pciDeviceID
-        int pciDomainID
-        int tccDriver
-        int asyncEngineCount
-        int unifiedAddressing
-        int memoryClockRate
-        int memoryBusWidth
-        int l2CacheSize
-        int persistingL2CacheMaxSize
-        int maxThreadsPerMultiProcessor
-        int streamPrioritiesSupported
-        int globalL1CacheSupported
-        int localL1CacheSupported
-        size_t sharedMemPerMultiprocessor
-        int regsPerMultiprocessor
-        int managedMemory
-        int isMultiGpuBoard
-        int multiGpuBoardGroupID
-        int hostNativeAtomicSupported
-        int singleToDoublePrecisionPerfRatio
-        int pageableMemoryAccess
-        int concurrentManagedAccess
-        int computePreemptionSupported
-        int canUseHostPointerForRegisteredMem
-        int cooperativeLaunch
-        int cooperativeMultiDeviceLaunch
-        size_t sharedMemPerBlockOptin
-        int pageableMemoryAccessUsesHostPageTables
-        int directManagedMemAccessFromHost
-        int maxBlocksPerMultiProcessor
-        int accessPolicyMaxWindowSize
-        size_t reservedSharedMemPerBlock
-        int hostRegisterSupported
-        int sparseCudaArraySupported
-        int hostRegisterReadOnlySupported
-        int timelineSemaphoreInteropSupported
-        int memoryPoolsSupported
-        int gpuDirectRDMASupported
-        unsigned int gpuDirectRDMAFlushWritesOptions
-        int gpuDirectRDMAWritesOrdering
-        unsigned int memoryPoolSupportedHandleTypes
-        int deferredMappingCudaArraySupported
-        int ipcEventSupported
-        int clusterLaunch
-        int unifiedFunctionPointers
-        int reserved2[2]
-        int reserved1[1]
-        int reserved[60]
-
-    cdef struct cudaIpcEventHandle_st:
-        char reserved[64]
-
-    ctypedef cudaIpcEventHandle_st cudaIpcEventHandle_t
-
-    cdef struct cudaIpcMemHandle_st:
-        char reserved[64]
-
-    ctypedef cudaIpcMemHandle_st cudaIpcMemHandle_t
-
-    cdef struct cudaMemFabricHandle_st:
-        char reserved[64]
-
-    ctypedef cudaMemFabricHandle_st cudaMemFabricHandle_t
-
-    cdef struct anon_struct5:
-        void* handle
-        const void* name
-
-    cdef union anon_union1:
-        int fd
-        anon_struct5 win32
-        const void* nvSciBufObject
-
-    cdef struct cudaExternalMemoryHandleDesc:
-        cudaExternalMemoryHandleType type
-        anon_union1 handle
-        unsigned long long size
-        unsigned int flags
-
-    cdef struct cudaExternalMemoryBufferDesc:
-        unsigned long long offset
-        unsigned long long size
-        unsigned int flags
-
-    cdef struct cudaExternalMemoryMipmappedArrayDesc:
-        unsigned long long offset
-        cudaChannelFormatDesc formatDesc
-        cudaExtent extent
-        unsigned int flags
-        unsigned int numLevels
-
-    cdef struct anon_struct6:
-        void* handle
-        const void* name
-
-    cdef union anon_union2:
-        int fd
-        anon_struct6 win32
-        const void* nvSciSyncObj
-
-    cdef struct cudaExternalSemaphoreHandleDesc:
-        cudaExternalSemaphoreHandleType type
-        anon_union2 handle
-        unsigned int flags
-
-    cdef struct anon_struct13:
-        unsigned long long value
-
-    cdef union anon_union5:
-        void* fence
-        unsigned long long reserved
-
-    cdef struct anon_struct14:
-        unsigned long long key
-
-    cdef struct anon_struct15:
-        anon_struct13 fence
-        anon_union5 nvSciSync
-        anon_struct14 keyedMutex
-        unsigned int reserved[12]
-
-    cdef struct cudaExternalSemaphoreSignalParams:
-        anon_struct15 params
-        unsigned int flags
-        unsigned int reserved[16]
-
-    cdef struct anon_struct16:
-        unsigned long long value
-
-    cdef union anon_union6:
-        void* fence
-        unsigned long long reserved
-
-    cdef struct anon_struct17:
-        unsigned long long key
-        unsigned int timeoutMs
-
-    cdef struct anon_struct18:
-        anon_struct16 fence
-        anon_union6 nvSciSync
-        anon_struct17 keyedMutex
-        unsigned int reserved[10]
-
-    cdef struct cudaExternalSemaphoreWaitParams:
-        anon_struct18 params
-        unsigned int flags
-        unsigned int reserved[16]
-
-    cdef struct CUstream_st:
-        pass
-    ctypedef CUstream_st* cudaStream_t
-
-    cdef struct CUevent_st:
-        pass
-    ctypedef CUevent_st* cudaEvent_t
-
-    cdef struct cudaGraphicsResource:
-        pass
-    ctypedef cudaGraphicsResource* cudaGraphicsResource_t
-
-    cdef struct CUexternalMemory_st:
-        pass
-    ctypedef CUexternalMemory_st* cudaExternalMemory_t
-
-    cdef struct CUexternalSemaphore_st:
-        pass
-    ctypedef CUexternalSemaphore_st* cudaExternalSemaphore_t
-
-    cdef struct CUgraph_st:
-        pass
-    ctypedef CUgraph_st* cudaGraph_t
-
-    cdef struct CUgraphNode_st:
-        pass
-    ctypedef CUgraphNode_st* cudaGraphNode_t
-
-    cdef struct CUuserObject_st:
-        pass
-    ctypedef CUuserObject_st* cudaUserObject_t
-
-    ctypedef unsigned long long cudaGraphConditionalHandle
-
-    cdef struct CUfunc_st:
-        pass
-    ctypedef CUfunc_st* cudaFunction_t
-
-    cdef struct CUkern_st:
-        pass
-    ctypedef CUkern_st* cudaKernel_t
-
-    cdef struct CUmemPoolHandle_st:
-        pass
-    ctypedef CUmemPoolHandle_st* cudaMemPool_t
-
-    cdef struct cudaKernelNodeParams:
-        void* func
-        dim3 gridDim
-        dim3 blockDim
-        unsigned int sharedMemBytes
-        void** kernelParams
-        void** extra
-
-    cdef struct cudaKernelNodeParamsV2:
-        void* func
-        dim3 gridDim
-        dim3 blockDim
-        unsigned int sharedMemBytes
-        void** kernelParams
-        void** extra
-
-    cdef struct cudaExternalSemaphoreSignalNodeParams:
-        cudaExternalSemaphore_t* extSemArray
-        const cudaExternalSemaphoreSignalParams* paramsArray
-        unsigned int numExtSems
-
-    cdef struct cudaExternalSemaphoreSignalNodeParamsV2:
-        cudaExternalSemaphore_t* extSemArray
-        const cudaExternalSemaphoreSignalParams* paramsArray
-        unsigned int numExtSems
-
-    cdef struct cudaExternalSemaphoreWaitNodeParams:
-        cudaExternalSemaphore_t* extSemArray
-        const cudaExternalSemaphoreWaitParams* paramsArray
-        unsigned int numExtSems
-
-    cdef struct cudaExternalSemaphoreWaitNodeParamsV2:
-        cudaExternalSemaphore_t* extSemArray
-        const cudaExternalSemaphoreWaitParams* paramsArray
-        unsigned int numExtSems
-
-    cdef struct cudaConditionalNodeParams:
-        cudaGraphConditionalHandle handle
-        cudaGraphConditionalNodeType type
-        unsigned int size
-        cudaGraph_t* phGraph_out
-
-    cdef struct cudaChildGraphNodeParams:
-        cudaGraph_t graph
-
-    cdef struct cudaEventRecordNodeParams:
-        cudaEvent_t event
-
-    cdef struct cudaEventWaitNodeParams:
-        cudaEvent_t event
-
-    cdef struct cudaGraphNodeParams:
-        cudaGraphNodeType type
-        int reserved0[3]
-        long long reserved1[29]
-        cudaKernelNodeParamsV2 kernel
-        cudaMemcpyNodeParams memcpy
-        cudaMemsetParamsV2 memset
-        cudaHostNodeParamsV2 host
-        cudaChildGraphNodeParams graph
-        cudaEventWaitNodeParams eventWait
-        cudaEventRecordNodeParams eventRecord
-        cudaExternalSemaphoreSignalNodeParamsV2 extSemSignal
-        cudaExternalSemaphoreWaitNodeParamsV2 extSemWait
-        cudaMemAllocNodeParamsV2 alloc
-        cudaMemFreeNodeParams free
-        cudaConditionalNodeParams conditional
-        long long reserved2
-
-    cdef enum cudaGraphDependencyType_enum:
-        cudaGraphDependencyTypeDefault = 0
-        cudaGraphDependencyTypeProgrammatic = 1
-
-    ctypedef cudaGraphDependencyType_enum cudaGraphDependencyType
-
-    cdef struct cudaGraphEdgeData_st:
-        unsigned char from_port
-        unsigned char to_port
-        unsigned char type
-        unsigned char reserved[5]
-
-    ctypedef cudaGraphEdgeData_st cudaGraphEdgeData
-
-    cdef struct CUgraphExec_st:
-        pass
-    ctypedef CUgraphExec_st* cudaGraphExec_t
-
-    cdef enum cudaGraphInstantiateResult:
-        cudaGraphInstantiateSuccess = 0
-        cudaGraphInstantiateError = 1
-        cudaGraphInstantiateInvalidStructure = 2
-        cudaGraphInstantiateNodeOperationNotSupported = 3
-        cudaGraphInstantiateMultipleDevicesNotSupported = 4
-
-    cdef struct cudaGraphInstantiateParams_st:
-        unsigned long long flags
-        cudaStream_t uploadStream
-        cudaGraphNode_t errNode_out
-        cudaGraphInstantiateResult result_out
-
-    ctypedef cudaGraphInstantiateParams_st cudaGraphInstantiateParams
-
-    cdef struct cudaGraphExecUpdateResultInfo_st:
-        cudaGraphExecUpdateResult result
-        cudaGraphNode_t errorNode
-        cudaGraphNode_t errorFromNode
-
-    ctypedef cudaGraphExecUpdateResultInfo_st cudaGraphExecUpdateResultInfo
-
-    cdef struct CUgraphDeviceUpdatableNode_st:
-        pass
-    ctypedef CUgraphDeviceUpdatableNode_st* cudaGraphDeviceNode_t
-
-    cdef struct anon_struct19:
-        const void* pValue
-        size_t offset
-        size_t size
-
-    cdef union anon_union8:
-        dim3 gridDim
-        anon_struct19 param
-        unsigned int isEnabled
-
-    cdef struct cudaGraphKernelNodeUpdate:
-        cudaGraphDeviceNode_t node
-        cudaGraphKernelNodeField field
-        anon_union8 updateData
-
-    cdef enum cudaLaunchMemSyncDomain:
-        cudaLaunchMemSyncDomainDefault = 0
-        cudaLaunchMemSyncDomainRemote = 1
-
-    cdef struct cudaLaunchMemSyncDomainMap_st:
-        unsigned char default_
-        unsigned char remote
-
-    ctypedef cudaLaunchMemSyncDomainMap_st cudaLaunchMemSyncDomainMap
-
-    cdef enum cudaLaunchAttributeID:
-        cudaLaunchAttributeIgnore = 0
-        cudaLaunchAttributeAccessPolicyWindow = 1
-        cudaLaunchAttributeCooperative = 2
-        cudaLaunchAttributeSynchronizationPolicy = 3
-        cudaLaunchAttributeClusterDimension = 4
-        cudaLaunchAttributeClusterSchedulingPolicyPreference = 5
-        cudaLaunchAttributeProgrammaticStreamSerialization = 6
-        cudaLaunchAttributeProgrammaticEvent = 7
-        cudaLaunchAttributePriority = 8
-        cudaLaunchAttributeMemSyncDomainMap = 9
-        cudaLaunchAttributeMemSyncDomain = 10
-        cudaLaunchAttributeLaunchCompletionEvent = 12
-        cudaLaunchAttributeDeviceUpdatableKernelNode = 13
-        cudaLaunchAttributePreferredSharedMemoryCarveout = 14
-
-    cdef struct anon_struct20:
-        unsigned int x
-        unsigned int y
-        unsigned int z
-
-    cdef struct anon_struct21:
-        cudaEvent_t event
-        int flags
-        int triggerAtBlockStart
-
-    cdef struct anon_struct22:
-        cudaEvent_t event
-        int flags
-
-    cdef struct anon_struct23:
-        int deviceUpdatable
-        cudaGraphDeviceNode_t devNode
-
-    cdef union cudaLaunchAttributeValue:
-        char pad[64]
-        cudaAccessPolicyWindow accessPolicyWindow
-        int cooperative
-        cudaSynchronizationPolicy syncPolicy
-        anon_struct20 clusterDim
-        cudaClusterSchedulingPolicy clusterSchedulingPolicyPreference
-        int programmaticStreamSerializationAllowed
-        anon_struct21 programmaticEvent
-        int priority
-        cudaLaunchMemSyncDomainMap memSyncDomainMap
-        cudaLaunchMemSyncDomain memSyncDomain
-        anon_struct22 launchCompletionEvent
-        anon_struct23 deviceUpdatableKernelNode
-        unsigned int sharedMemCarveout
-
-    cdef struct cudaLaunchAttribute_st:
-        cudaLaunchAttributeID id
-        cudaLaunchAttributeValue val
-
-    ctypedef cudaLaunchAttribute_st cudaLaunchAttribute
-
-    cdef struct cudaAsyncCallbackEntry:
-        pass
-    ctypedef cudaAsyncCallbackEntry* cudaAsyncCallbackHandle_t
-
-    cdef enum cudaAsyncNotificationType_enum:
-        cudaAsyncNotificationTypeOverBudget = 1
-
-    ctypedef cudaAsyncNotificationType_enum cudaAsyncNotificationType
-
-    cdef struct anon_struct24:
-        unsigned long long bytesOverBudget
-
-    cdef union anon_union9:
-        anon_struct24 overBudget
-
-    cdef struct cudaAsyncNotificationInfo:
-        cudaAsyncNotificationType type
-        anon_union9 info
-
-    ctypedef cudaAsyncNotificationInfo cudaAsyncNotificationInfo_t
-
-    ctypedef void (*cudaAsyncCallback)(cudaAsyncNotificationInfo_t* , void* , cudaAsyncCallbackHandle_t )
-
-    cdef enum cudaChannelFormatKind:
-        cudaChannelFormatKindSigned = 0
-        cudaChannelFormatKindUnsigned = 1
-        cudaChannelFormatKindFloat = 2
-        cudaChannelFormatKindNone = 3
-        cudaChannelFormatKindNV12 = 4
-        cudaChannelFormatKindUnsignedNormalized8X1 = 5
-        cudaChannelFormatKindUnsignedNormalized8X2 = 6
-        cudaChannelFormatKindUnsignedNormalized8X4 = 7
-        cudaChannelFormatKindUnsignedNormalized16X1 = 8
-        cudaChannelFormatKindUnsignedNormalized16X2 = 9
-        cudaChannelFormatKindUnsignedNormalized16X4 = 10
-        cudaChannelFormatKindSignedNormalized8X1 = 11
-        cudaChannelFormatKindSignedNormalized8X2 = 12
-        cudaChannelFormatKindSignedNormalized8X4 = 13
-        cudaChannelFormatKindSignedNormalized16X1 = 14
-        cudaChannelFormatKindSignedNormalized16X2 = 15
-        cudaChannelFormatKindSignedNormalized16X4 = 16
-        cudaChannelFormatKindUnsignedBlockCompressed1 = 17
-        cudaChannelFormatKindUnsignedBlockCompressed1SRGB = 18
-        cudaChannelFormatKindUnsignedBlockCompressed2 = 19
-        cudaChannelFormatKindUnsignedBlockCompressed2SRGB = 20
-        cudaChannelFormatKindUnsignedBlockCompressed3 = 21
-        cudaChannelFormatKindUnsignedBlockCompressed3SRGB = 22
-        cudaChannelFormatKindUnsignedBlockCompressed4 = 23
-        cudaChannelFormatKindSignedBlockCompressed4 = 24
-        cudaChannelFormatKindUnsignedBlockCompressed5 = 25
-        cudaChannelFormatKindSignedBlockCompressed5 = 26
-        cudaChannelFormatKindUnsignedBlockCompressed6H = 27
-        cudaChannelFormatKindSignedBlockCompressed6H = 28
-        cudaChannelFormatKindUnsignedBlockCompressed7 = 29
-        cudaChannelFormatKindUnsignedBlockCompressed7SRGB = 30
-
-    cdef enum cudaMemoryType:
-        cudaMemoryTypeUnregistered = 0
-        cudaMemoryTypeHost = 1
-        cudaMemoryTypeDevice = 2
-        cudaMemoryTypeManaged = 3
-
-    cdef enum cudaMemcpyKind:
-        cudaMemcpyHostToHost = 0
-        cudaMemcpyHostToDevice = 1
-        cudaMemcpyDeviceToHost = 2
-        cudaMemcpyDeviceToDevice = 3
-        cudaMemcpyDefault = 4
-
-    cdef enum cudaAccessProperty:
-        cudaAccessPropertyNormal = 0
-        cudaAccessPropertyStreaming = 1
-        cudaAccessPropertyPersisting = 2
-
-    cdef enum cudaStreamCaptureStatus:
-        cudaStreamCaptureStatusNone = 0
-        cudaStreamCaptureStatusActive = 1
-        cudaStreamCaptureStatusInvalidated = 2
-
-    cdef enum cudaStreamCaptureMode:
-        cudaStreamCaptureModeGlobal = 0
-        cudaStreamCaptureModeThreadLocal = 1
-        cudaStreamCaptureModeRelaxed = 2
-
-    cdef enum cudaSynchronizationPolicy:
-        cudaSyncPolicyAuto = 1
-        cudaSyncPolicySpin = 2
-        cudaSyncPolicyYield = 3
-        cudaSyncPolicyBlockingSync = 4
-
-    cdef enum cudaClusterSchedulingPolicy:
-        cudaClusterSchedulingPolicyDefault = 0
-        cudaClusterSchedulingPolicySpread = 1
-        cudaClusterSchedulingPolicyLoadBalancing = 2
-
-    cdef enum cudaStreamUpdateCaptureDependenciesFlags:
-        cudaStreamAddCaptureDependencies = 0
-        cudaStreamSetCaptureDependencies = 1
-
-    cdef enum cudaUserObjectFlags:
-        cudaUserObjectNoDestructorSync = 1
-
-    cdef enum cudaUserObjectRetainFlags:
-        cudaGraphUserObjectMove = 1
-
-    cdef enum cudaGraphicsRegisterFlags:
-        cudaGraphicsRegisterFlagsNone = 0
-        cudaGraphicsRegisterFlagsReadOnly = 1
-        cudaGraphicsRegisterFlagsWriteDiscard = 2
-        cudaGraphicsRegisterFlagsSurfaceLoadStore = 4
-        cudaGraphicsRegisterFlagsTextureGather = 8
-
-    cdef enum cudaGraphicsMapFlags:
-        cudaGraphicsMapFlagsNone = 0
-        cudaGraphicsMapFlagsReadOnly = 1
-        cudaGraphicsMapFlagsWriteDiscard = 2
-
-    cdef enum cudaGraphicsCubeFace:
-        cudaGraphicsCubeFacePositiveX = 0
-        cudaGraphicsCubeFaceNegativeX = 1
-        cudaGraphicsCubeFacePositiveY = 2
-        cudaGraphicsCubeFaceNegativeY = 3
-        cudaGraphicsCubeFacePositiveZ = 4
-        cudaGraphicsCubeFaceNegativeZ = 5
-
-    cdef enum cudaResourceType:
-        cudaResourceTypeArray = 0
-        cudaResourceTypeMipmappedArray = 1
-        cudaResourceTypeLinear = 2
-        cudaResourceTypePitch2D = 3
-
-    cdef enum cudaResourceViewFormat:
-        cudaResViewFormatNone = 0
-        cudaResViewFormatUnsignedChar1 = 1
-        cudaResViewFormatUnsignedChar2 = 2
-        cudaResViewFormatUnsignedChar4 = 3
-        cudaResViewFormatSignedChar1 = 4
-        cudaResViewFormatSignedChar2 = 5
-        cudaResViewFormatSignedChar4 = 6
-        cudaResViewFormatUnsignedShort1 = 7
-        cudaResViewFormatUnsignedShort2 = 8
-        cudaResViewFormatUnsignedShort4 = 9
-        cudaResViewFormatSignedShort1 = 10
-        cudaResViewFormatSignedShort2 = 11
-        cudaResViewFormatSignedShort4 = 12
-        cudaResViewFormatUnsignedInt1 = 13
-        cudaResViewFormatUnsignedInt2 = 14
-        cudaResViewFormatUnsignedInt4 = 15
-        cudaResViewFormatSignedInt1 = 16
-        cudaResViewFormatSignedInt2 = 17
-        cudaResViewFormatSignedInt4 = 18
-        cudaResViewFormatHalf1 = 19
-        cudaResViewFormatHalf2 = 20
-        cudaResViewFormatHalf4 = 21
-        cudaResViewFormatFloat1 = 22
-        cudaResViewFormatFloat2 = 23
-        cudaResViewFormatFloat4 = 24
-        cudaResViewFormatUnsignedBlockCompressed1 = 25
-        cudaResViewFormatUnsignedBlockCompressed2 = 26
-        cudaResViewFormatUnsignedBlockCompressed3 = 27
-        cudaResViewFormatUnsignedBlockCompressed4 = 28
-        cudaResViewFormatSignedBlockCompressed4 = 29
-        cudaResViewFormatUnsignedBlockCompressed5 = 30
-        cudaResViewFormatSignedBlockCompressed5 = 31
-        cudaResViewFormatUnsignedBlockCompressed6H = 32
-        cudaResViewFormatSignedBlockCompressed6H = 33
-        cudaResViewFormatUnsignedBlockCompressed7 = 34
-
-    cdef enum cudaFuncAttribute:
-        cudaFuncAttributeMaxDynamicSharedMemorySize = 8
-        cudaFuncAttributePreferredSharedMemoryCarveout = 9
-        cudaFuncAttributeClusterDimMustBeSet = 10
-        cudaFuncAttributeRequiredClusterWidth = 11
-        cudaFuncAttributeRequiredClusterHeight = 12
-        cudaFuncAttributeRequiredClusterDepth = 13
-        cudaFuncAttributeNonPortableClusterSizeAllowed = 14
-        cudaFuncAttributeClusterSchedulingPolicyPreference = 15
-        cudaFuncAttributeMax = 16
-
-    cdef enum cudaFuncCache:
-        cudaFuncCachePreferNone = 0
-        cudaFuncCachePreferShared = 1
-        cudaFuncCachePreferL1 = 2
-        cudaFuncCachePreferEqual = 3
-
-    cdef enum cudaSharedMemConfig:
-        cudaSharedMemBankSizeDefault = 0
-        cudaSharedMemBankSizeFourByte = 1
-        cudaSharedMemBankSizeEightByte = 2
-
-    cdef enum cudaSharedCarveout:
-        cudaSharedmemCarveoutDefault = -1
-        cudaSharedmemCarveoutMaxL1 = 0
-        cudaSharedmemCarveoutMaxShared = 100
-
-    cdef enum cudaComputeMode:
-        cudaComputeModeDefault = 0
-        cudaComputeModeExclusive = 1
-        cudaComputeModeProhibited = 2
-        cudaComputeModeExclusiveProcess = 3
-
-    cdef enum cudaLimit:
-        cudaLimitStackSize = 0
-        cudaLimitPrintfFifoSize = 1
-        cudaLimitMallocHeapSize = 2
-        cudaLimitDevRuntimeSyncDepth = 3
-        cudaLimitDevRuntimePendingLaunchCount = 4
-        cudaLimitMaxL2FetchGranularity = 5
-        cudaLimitPersistingL2CacheSize = 6
-
-    cdef enum cudaMemoryAdvise:
-        cudaMemAdviseSetReadMostly = 1
-        cudaMemAdviseUnsetReadMostly = 2
-        cudaMemAdviseSetPreferredLocation = 3
-        cudaMemAdviseUnsetPreferredLocation = 4
-        cudaMemAdviseSetAccessedBy = 5
-        cudaMemAdviseUnsetAccessedBy = 6
-
-    cdef enum cudaMemRangeAttribute:
-        cudaMemRangeAttributeReadMostly = 1
-        cudaMemRangeAttributePreferredLocation = 2
-        cudaMemRangeAttributeAccessedBy = 3
-        cudaMemRangeAttributeLastPrefetchLocation = 4
-        cudaMemRangeAttributePreferredLocationType = 5
-        cudaMemRangeAttributePreferredLocationId = 6
-        cudaMemRangeAttributeLastPrefetchLocationType = 7
-        cudaMemRangeAttributeLastPrefetchLocationId = 8
-
-    cdef enum cudaFlushGPUDirectRDMAWritesOptions:
-        cudaFlushGPUDirectRDMAWritesOptionHost = 1
-        cudaFlushGPUDirectRDMAWritesOptionMemOps = 2
-
-    cdef enum cudaGPUDirectRDMAWritesOrdering:
-        cudaGPUDirectRDMAWritesOrderingNone = 0
-        cudaGPUDirectRDMAWritesOrderingOwner = 100
-        cudaGPUDirectRDMAWritesOrderingAllDevices = 200
-
-    cdef enum cudaFlushGPUDirectRDMAWritesScope:
-        cudaFlushGPUDirectRDMAWritesToOwner = 100
-        cudaFlushGPUDirectRDMAWritesToAllDevices = 200
-
-    cdef enum cudaFlushGPUDirectRDMAWritesTarget:
-        cudaFlushGPUDirectRDMAWritesTargetCurrentDevice = 0
-
-    cdef enum cudaDeviceAttr:
-        cudaDevAttrMaxThreadsPerBlock = 1
-        cudaDevAttrMaxBlockDimX = 2
-        cudaDevAttrMaxBlockDimY = 3
-        cudaDevAttrMaxBlockDimZ = 4
-        cudaDevAttrMaxGridDimX = 5
-        cudaDevAttrMaxGridDimY = 6
-        cudaDevAttrMaxGridDimZ = 7
-        cudaDevAttrMaxSharedMemoryPerBlock = 8
-        cudaDevAttrTotalConstantMemory = 9
-        cudaDevAttrWarpSize = 10
-        cudaDevAttrMaxPitch = 11
-        cudaDevAttrMaxRegistersPerBlock = 12
-        cudaDevAttrClockRate = 13
-        cudaDevAttrTextureAlignment = 14
-        cudaDevAttrGpuOverlap = 15
-        cudaDevAttrMultiProcessorCount = 16
-        cudaDevAttrKernelExecTimeout = 17
-        cudaDevAttrIntegrated = 18
-        cudaDevAttrCanMapHostMemory = 19
-        cudaDevAttrComputeMode = 20
-        cudaDevAttrMaxTexture1DWidth = 21
-        cudaDevAttrMaxTexture2DWidth = 22
-        cudaDevAttrMaxTexture2DHeight = 23
-        cudaDevAttrMaxTexture3DWidth = 24
-        cudaDevAttrMaxTexture3DHeight = 25
-        cudaDevAttrMaxTexture3DDepth = 26
-        cudaDevAttrMaxTexture2DLayeredWidth = 27
-        cudaDevAttrMaxTexture2DLayeredHeight = 28
-        cudaDevAttrMaxTexture2DLayeredLayers = 29
-        cudaDevAttrSurfaceAlignment = 30
-        cudaDevAttrConcurrentKernels = 31
-        cudaDevAttrEccEnabled = 32
-        cudaDevAttrPciBusId = 33
-        cudaDevAttrPciDeviceId = 34
-        cudaDevAttrTccDriver = 35
-        cudaDevAttrMemoryClockRate = 36
-        cudaDevAttrGlobalMemoryBusWidth = 37
-        cudaDevAttrL2CacheSize = 38
-        cudaDevAttrMaxThreadsPerMultiProcessor = 39
-        cudaDevAttrAsyncEngineCount = 40
-        cudaDevAttrUnifiedAddressing = 41
-        cudaDevAttrMaxTexture1DLayeredWidth = 42
-        cudaDevAttrMaxTexture1DLayeredLayers = 43
-        cudaDevAttrMaxTexture2DGatherWidth = 45
-        cudaDevAttrMaxTexture2DGatherHeight = 46
-        cudaDevAttrMaxTexture3DWidthAlt = 47
-        cudaDevAttrMaxTexture3DHeightAlt = 48
-        cudaDevAttrMaxTexture3DDepthAlt = 49
-        cudaDevAttrPciDomainId = 50
-        cudaDevAttrTexturePitchAlignment = 51
-        cudaDevAttrMaxTextureCubemapWidth = 52
-        cudaDevAttrMaxTextureCubemapLayeredWidth = 53
-        cudaDevAttrMaxTextureCubemapLayeredLayers = 54
-        cudaDevAttrMaxSurface1DWidth = 55
-        cudaDevAttrMaxSurface2DWidth = 56
-        cudaDevAttrMaxSurface2DHeight = 57
-        cudaDevAttrMaxSurface3DWidth = 58
-        cudaDevAttrMaxSurface3DHeight = 59
-        cudaDevAttrMaxSurface3DDepth = 60
-        cudaDevAttrMaxSurface1DLayeredWidth = 61
-        cudaDevAttrMaxSurface1DLayeredLayers = 62
-        cudaDevAttrMaxSurface2DLayeredWidth = 63
-        cudaDevAttrMaxSurface2DLayeredHeight = 64
-        cudaDevAttrMaxSurface2DLayeredLayers = 65
-        cudaDevAttrMaxSurfaceCubemapWidth = 66
-        cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67
-        cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68
-        cudaDevAttrMaxTexture1DLinearWidth = 69
-        cudaDevAttrMaxTexture2DLinearWidth = 70
-        cudaDevAttrMaxTexture2DLinearHeight = 71
-        cudaDevAttrMaxTexture2DLinearPitch = 72
-        cudaDevAttrMaxTexture2DMipmappedWidth = 73
-        cudaDevAttrMaxTexture2DMipmappedHeight = 74
-        cudaDevAttrComputeCapabilityMajor = 75
-        cudaDevAttrComputeCapabilityMinor = 76
-        cudaDevAttrMaxTexture1DMipmappedWidth = 77
-        cudaDevAttrStreamPrioritiesSupported = 78
-        cudaDevAttrGlobalL1CacheSupported = 79
-        cudaDevAttrLocalL1CacheSupported = 80
-        cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81
-        cudaDevAttrMaxRegistersPerMultiprocessor = 82
-        cudaDevAttrManagedMemory = 83
-        cudaDevAttrIsMultiGpuBoard = 84
-        cudaDevAttrMultiGpuBoardGroupID = 85
-        cudaDevAttrHostNativeAtomicSupported = 86
-        cudaDevAttrSingleToDoublePrecisionPerfRatio = 87
-        cudaDevAttrPageableMemoryAccess = 88
-        cudaDevAttrConcurrentManagedAccess = 89
-        cudaDevAttrComputePreemptionSupported = 90
-        cudaDevAttrCanUseHostPointerForRegisteredMem = 91
-        cudaDevAttrReserved92 = 92
-        cudaDevAttrReserved93 = 93
-        cudaDevAttrReserved94 = 94
-        cudaDevAttrCooperativeLaunch = 95
-        cudaDevAttrCooperativeMultiDeviceLaunch = 96
-        cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
-        cudaDevAttrCanFlushRemoteWrites = 98
-        cudaDevAttrHostRegisterSupported = 99
-        cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100
-        cudaDevAttrDirectManagedMemAccessFromHost = 101
-        cudaDevAttrMaxBlocksPerMultiprocessor = 106
-        cudaDevAttrMaxPersistingL2CacheSize = 108
-        cudaDevAttrMaxAccessPolicyWindowSize = 109
-        cudaDevAttrReservedSharedMemoryPerBlock = 111
-        cudaDevAttrSparseCudaArraySupported = 112
-        cudaDevAttrHostRegisterReadOnlySupported = 113
-        cudaDevAttrTimelineSemaphoreInteropSupported = 114
-        cudaDevAttrMaxTimelineSemaphoreInteropSupported = 114
-        cudaDevAttrMemoryPoolsSupported = 115
-        cudaDevAttrGPUDirectRDMASupported = 116
-        cudaDevAttrGPUDirectRDMAFlushWritesOptions = 117
-        cudaDevAttrGPUDirectRDMAWritesOrdering = 118
-        cudaDevAttrMemoryPoolSupportedHandleTypes = 119
-        cudaDevAttrClusterLaunch = 120
-        cudaDevAttrDeferredMappingCudaArraySupported = 121
-        cudaDevAttrReserved122 = 122
-        cudaDevAttrReserved123 = 123
-        cudaDevAttrReserved124 = 124
-        cudaDevAttrIpcEventSupport = 125
-        cudaDevAttrMemSyncDomainCount = 126
-        cudaDevAttrReserved127 = 127
-        cudaDevAttrReserved128 = 128
-        cudaDevAttrReserved129 = 129
-        cudaDevAttrNumaConfig = 130
-        cudaDevAttrNumaId = 131
-        cudaDevAttrReserved132 = 132
-        cudaDevAttrMpsEnabled = 133
-        cudaDevAttrHostNumaId = 134
-        cudaDevAttrD3D12CigSupported = 135
-        cudaDevAttrMax = 136
-
-    cdef enum cudaMemPoolAttr:
-        cudaMemPoolReuseFollowEventDependencies = 1
-        cudaMemPoolReuseAllowOpportunistic = 2
-        cudaMemPoolReuseAllowInternalDependencies = 3
-        cudaMemPoolAttrReleaseThreshold = 4
-        cudaMemPoolAttrReservedMemCurrent = 5
-        cudaMemPoolAttrReservedMemHigh = 6
-        cudaMemPoolAttrUsedMemCurrent = 7
-        cudaMemPoolAttrUsedMemHigh = 8
-
-    cdef enum cudaMemLocationType:
-        cudaMemLocationTypeInvalid = 0
-        cudaMemLocationTypeDevice = 1
-        cudaMemLocationTypeHost = 2
-        cudaMemLocationTypeHostNuma = 3
-        cudaMemLocationTypeHostNumaCurrent = 4
-
-    cdef enum cudaMemAccessFlags:
-        cudaMemAccessFlagsProtNone = 0
-        cudaMemAccessFlagsProtRead = 1
-        cudaMemAccessFlagsProtReadWrite = 3
-
-    cdef enum cudaMemAllocationType:
-        cudaMemAllocationTypeInvalid = 0
-        cudaMemAllocationTypePinned = 1
-        cudaMemAllocationTypeMax = 2147483647
-
-    cdef enum cudaMemAllocationHandleType:
-        cudaMemHandleTypeNone = 0
-        cudaMemHandleTypePosixFileDescriptor = 1
-        cudaMemHandleTypeWin32 = 2
-        cudaMemHandleTypeWin32Kmt = 4
-        cudaMemHandleTypeFabric = 8
-
-    cdef enum cudaGraphMemAttributeType:
-        cudaGraphMemAttrUsedMemCurrent = 0
-        cudaGraphMemAttrUsedMemHigh = 1
-        cudaGraphMemAttrReservedMemCurrent = 2
-        cudaGraphMemAttrReservedMemHigh = 3
-
-    cdef enum cudaDeviceP2PAttr:
-        cudaDevP2PAttrPerformanceRank = 1
-        cudaDevP2PAttrAccessSupported = 2
-        cudaDevP2PAttrNativeAtomicSupported = 3
-        cudaDevP2PAttrCudaArrayAccessSupported = 4
-
-    cdef enum cudaExternalMemoryHandleType:
-        cudaExternalMemoryHandleTypeOpaqueFd = 1
-        cudaExternalMemoryHandleTypeOpaqueWin32 = 2
-        cudaExternalMemoryHandleTypeOpaqueWin32Kmt = 3
-        cudaExternalMemoryHandleTypeD3D12Heap = 4
-        cudaExternalMemoryHandleTypeD3D12Resource = 5
-        cudaExternalMemoryHandleTypeD3D11Resource = 6
-        cudaExternalMemoryHandleTypeD3D11ResourceKmt = 7
-        cudaExternalMemoryHandleTypeNvSciBuf = 8
-
-    cdef enum cudaExternalSemaphoreHandleType:
-        cudaExternalSemaphoreHandleTypeOpaqueFd = 1
-        cudaExternalSemaphoreHandleTypeOpaqueWin32 = 2
-        cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3
-        cudaExternalSemaphoreHandleTypeD3D12Fence = 4
-        cudaExternalSemaphoreHandleTypeD3D11Fence = 5
-        cudaExternalSemaphoreHandleTypeNvSciSync = 6
-        cudaExternalSemaphoreHandleTypeKeyedMutex = 7
-        cudaExternalSemaphoreHandleTypeKeyedMutexKmt = 8
-        cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9
-        cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10
-
-    cdef enum cudaCGScope:
-        cudaCGScopeInvalid = 0
-        cudaCGScopeGrid = 1
-        cudaCGScopeMultiGrid = 2
-
-    cdef enum cudaGraphConditionalHandleFlags:
-        cudaGraphCondAssignDefault = 1
-
-    cdef enum cudaGraphConditionalNodeType:
-        cudaGraphCondTypeIf = 0
-        cudaGraphCondTypeWhile = 1
-
-    cdef enum cudaGraphNodeType:
-        cudaGraphNodeTypeKernel = 0
-        cudaGraphNodeTypeMemcpy = 1
-        cudaGraphNodeTypeMemset = 2
-        cudaGraphNodeTypeHost = 3
-        cudaGraphNodeTypeGraph = 4
-        cudaGraphNodeTypeEmpty = 5
-        cudaGraphNodeTypeWaitEvent = 6
-        cudaGraphNodeTypeEventRecord = 7
-        cudaGraphNodeTypeExtSemaphoreSignal = 8
-        cudaGraphNodeTypeExtSemaphoreWait = 9
-        cudaGraphNodeTypeMemAlloc = 10
-        cudaGraphNodeTypeMemFree = 11
-        cudaGraphNodeTypeConditional = 13
-        cudaGraphNodeTypeCount = 14
-
-    cdef enum cudaGraphExecUpdateResult:
-        cudaGraphExecUpdateSuccess = 0
-        cudaGraphExecUpdateError = 1
-        cudaGraphExecUpdateErrorTopologyChanged = 2
-        cudaGraphExecUpdateErrorNodeTypeChanged = 3
-        cudaGraphExecUpdateErrorFunctionChanged = 4
-        cudaGraphExecUpdateErrorParametersChanged = 5
-        cudaGraphExecUpdateErrorNotSupported = 6
-        cudaGraphExecUpdateErrorUnsupportedFunctionChange = 7
-        cudaGraphExecUpdateErrorAttributesChanged = 8
-
-    cdef enum cudaGraphKernelNodeField:
-        cudaGraphKernelNodeFieldInvalid = 0
-        cudaGraphKernelNodeFieldGridDim = 1
-        cudaGraphKernelNodeFieldParam = 2
-        cudaGraphKernelNodeFieldEnabled = 3
-
-    cdef enum cudaGetDriverEntryPointFlags:
-        cudaEnableDefault = 0
-        cudaEnableLegacyStream = 1
-        cudaEnablePerThreadDefaultStream = 2
-
-    cdef enum cudaDriverEntryPointQueryResult:
-        cudaDriverEntryPointSuccess = 0
-        cudaDriverEntryPointSymbolNotFound = 1
-        cudaDriverEntryPointVersionNotSufficent = 2
-
-    cdef enum cudaGraphDebugDotFlags:
-        cudaGraphDebugDotFlagsVerbose = 1
-        cudaGraphDebugDotFlagsKernelNodeParams = 4
-        cudaGraphDebugDotFlagsMemcpyNodeParams = 8
-        cudaGraphDebugDotFlagsMemsetNodeParams = 16
-        cudaGraphDebugDotFlagsHostNodeParams = 32
-        cudaGraphDebugDotFlagsEventNodeParams = 64
-        cudaGraphDebugDotFlagsExtSemasSignalNodeParams = 128
-        cudaGraphDebugDotFlagsExtSemasWaitNodeParams = 256
-        cudaGraphDebugDotFlagsKernelNodeAttributes = 512
-        cudaGraphDebugDotFlagsHandles = 1024
-        cudaGraphDebugDotFlagsConditionalNodeParams = 32768
-
-    cdef enum cudaGraphInstantiateFlags:
-        cudaGraphInstantiateFlagAutoFreeOnLaunch = 1
-        cudaGraphInstantiateFlagUpload = 2
-        cudaGraphInstantiateFlagDeviceLaunch = 4
-        cudaGraphInstantiateFlagUseNodePriority = 8
-
-    cdef enum cudaDeviceNumaConfig:
-        cudaDeviceNumaConfigNone = 0
-        cudaDeviceNumaConfigNumaNode = 1
-
-cdef extern from "surface_types.h":
-
-    ctypedef unsigned long long cudaSurfaceObject_t
-
-    cdef enum cudaSurfaceBoundaryMode:
-        cudaBoundaryModeZero = 0
-        cudaBoundaryModeClamp = 1
-        cudaBoundaryModeTrap = 2
-
-    cdef enum cudaSurfaceFormatMode:
-        cudaFormatModeForced = 0
-        cudaFormatModeAuto = 1
-
-cdef extern from "texture_types.h":
-
-    cdef struct cudaTextureDesc:
-        cudaTextureAddressMode addressMode[3]
-        cudaTextureFilterMode filterMode
-        cudaTextureReadMode readMode
-        int sRGB
-        float borderColor[4]
-        int normalizedCoords
-        unsigned int maxAnisotropy
-        cudaTextureFilterMode mipmapFilterMode
-        float mipmapLevelBias
-        float minMipmapLevelClamp
-        float maxMipmapLevelClamp
-        int disableTrilinearOptimization
-        int seamlessCubemap
-
-    ctypedef unsigned long long cudaTextureObject_t
-
-    cdef enum cudaTextureAddressMode:
-        cudaAddressModeWrap = 0
-        cudaAddressModeClamp = 1
-        cudaAddressModeMirror = 2
-        cudaAddressModeBorder = 3
-
-    cdef enum cudaTextureFilterMode:
-        cudaFilterModePoint = 0
-        cudaFilterModeLinear = 1
-
-    cdef enum cudaTextureReadMode:
-        cudaReadModeElementType = 0
-        cudaReadModeNormalizedFloat = 1
-
-cdef extern from "library_types.h":
-
-    cdef enum cudaDataType_t:
-        CUDA_R_32F = 0
-        CUDA_R_64F = 1
-        CUDA_R_16F = 2
-        CUDA_R_8I = 3
-        CUDA_C_32F = 4
-        CUDA_C_64F = 5
-        CUDA_C_16F = 6
-        CUDA_C_8I = 7
-        CUDA_R_8U = 8
-        CUDA_C_8U = 9
-        CUDA_R_32I = 10
-        CUDA_C_32I = 11
-        CUDA_R_32U = 12
-        CUDA_C_32U = 13
-        CUDA_R_16BF = 14
-        CUDA_C_16BF = 15
-        CUDA_R_4I = 16
-        CUDA_C_4I = 17
-        CUDA_R_4U = 18
-        CUDA_C_4U = 19
-        CUDA_R_16I = 20
-        CUDA_C_16I = 21
-        CUDA_R_16U = 22
-        CUDA_C_16U = 23
-        CUDA_R_64I = 24
-        CUDA_C_64I = 25
-        CUDA_R_64U = 26
-        CUDA_C_64U = 27
-        CUDA_R_8F_E4M3 = 28
-        CUDA_R_8F_E5M2 = 29
-
-    ctypedef cudaDataType_t cudaDataType
-
-    cdef enum libraryPropertyType_t:
-        MAJOR_VERSION = 0
-        MINOR_VERSION = 1
-        PATCH_LEVEL = 2
-
-    ctypedef libraryPropertyType_t libraryPropertyType
-
-cdef extern from "cuda_runtime_api.h":
-
-    ctypedef void (*cudaStreamCallback_t)(cudaStream_t stream, cudaError_t status, void* userData)
-
-cdef extern from "device_types.h":
-
-    cdef enum cudaRoundMode:
-        cudaRoundNearest = 0
-        cudaRoundZero = 1
-        cudaRoundPosInf = 2
-        cudaRoundMinInf = 3
-
-ctypedef unsigned int GLenum
-
-ctypedef unsigned int GLuint
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLImageKHR
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLStreamKHR
-
-ctypedef unsigned int EGLint
-
-cdef extern from "":
-    cdef struct void:
-        pass
-ctypedef void* EGLSyncKHR
-
-ctypedef uint32_t VdpDevice
-
-ctypedef unsigned long long VdpGetProcAddress
-
-ctypedef uint32_t VdpVideoSurface
-
-ctypedef uint32_t VdpOutputSurface
-
-ctypedef cudaLaunchAttributeID cudaStreamAttrID
-
-ctypedef cudaLaunchAttributeID cudaKernelNodeAttrID
-
-ctypedef cudaLaunchAttributeValue cudaStreamAttrValue
-
-ctypedef cudaLaunchAttributeValue cudaKernelNodeAttrValue
-
-cdef enum cudaEglFrameType_enum:
-    cudaEglFrameTypeArray = 0
-    cudaEglFrameTypePitch = 1
-
-ctypedef cudaEglFrameType_enum cudaEglFrameType
-
-cdef enum cudaEglResourceLocationFlags_enum:
-    cudaEglResourceLocationSysmem = 0
-    cudaEglResourceLocationVidmem = 1
-
-ctypedef cudaEglResourceLocationFlags_enum cudaEglResourceLocationFlags
-
-cdef enum cudaEglColorFormat_enum:
-    cudaEglColorFormatYUV420Planar = 0
-    cudaEglColorFormatYUV420SemiPlanar = 1
-    cudaEglColorFormatYUV422Planar = 2
-    cudaEglColorFormatYUV422SemiPlanar = 3
-    cudaEglColorFormatARGB = 6
-    cudaEglColorFormatRGBA = 7
-    cudaEglColorFormatL = 8
-    cudaEglColorFormatR = 9
-    cudaEglColorFormatYUV444Planar = 10
-    cudaEglColorFormatYUV444SemiPlanar = 11
-    cudaEglColorFormatYUYV422 = 12
-    cudaEglColorFormatUYVY422 = 13
-    cudaEglColorFormatABGR = 14
-    cudaEglColorFormatBGRA = 15
-    cudaEglColorFormatA = 16
-    cudaEglColorFormatRG = 17
-    cudaEglColorFormatAYUV = 18
-    cudaEglColorFormatYVU444SemiPlanar = 19
-    cudaEglColorFormatYVU422SemiPlanar = 20
-    cudaEglColorFormatYVU420SemiPlanar = 21
-    cudaEglColorFormatY10V10U10_444SemiPlanar = 22
-    cudaEglColorFormatY10V10U10_420SemiPlanar = 23
-    cudaEglColorFormatY12V12U12_444SemiPlanar = 24
-    cudaEglColorFormatY12V12U12_420SemiPlanar = 25
-    cudaEglColorFormatVYUY_ER = 26
-    cudaEglColorFormatUYVY_ER = 27
-    cudaEglColorFormatYUYV_ER = 28
-    cudaEglColorFormatYVYU_ER = 29
-    cudaEglColorFormatYUVA_ER = 31
-    cudaEglColorFormatAYUV_ER = 32
-    cudaEglColorFormatYUV444Planar_ER = 33
-    cudaEglColorFormatYUV422Planar_ER = 34
-    cudaEglColorFormatYUV420Planar_ER = 35
-    cudaEglColorFormatYUV444SemiPlanar_ER = 36
-    cudaEglColorFormatYUV422SemiPlanar_ER = 37
-    cudaEglColorFormatYUV420SemiPlanar_ER = 38
-    cudaEglColorFormatYVU444Planar_ER = 39
-    cudaEglColorFormatYVU422Planar_ER = 40
-    cudaEglColorFormatYVU420Planar_ER = 41
-    cudaEglColorFormatYVU444SemiPlanar_ER = 42
-    cudaEglColorFormatYVU422SemiPlanar_ER = 43
-    cudaEglColorFormatYVU420SemiPlanar_ER = 44
-    cudaEglColorFormatBayerRGGB = 45
-    cudaEglColorFormatBayerBGGR = 46
-    cudaEglColorFormatBayerGRBG = 47
-    cudaEglColorFormatBayerGBRG = 48
-    cudaEglColorFormatBayer10RGGB = 49
-    cudaEglColorFormatBayer10BGGR = 50
-    cudaEglColorFormatBayer10GRBG = 51
-    cudaEglColorFormatBayer10GBRG = 52
-    cudaEglColorFormatBayer12RGGB = 53
-    cudaEglColorFormatBayer12BGGR = 54
-    cudaEglColorFormatBayer12GRBG = 55
-    cudaEglColorFormatBayer12GBRG = 56
-    cudaEglColorFormatBayer14RGGB = 57
-    cudaEglColorFormatBayer14BGGR = 58
-    cudaEglColorFormatBayer14GRBG = 59
-    cudaEglColorFormatBayer14GBRG = 60
-    cudaEglColorFormatBayer20RGGB = 61
-    cudaEglColorFormatBayer20BGGR = 62
-    cudaEglColorFormatBayer20GRBG = 63
-    cudaEglColorFormatBayer20GBRG = 64
-    cudaEglColorFormatYVU444Planar = 65
-    cudaEglColorFormatYVU422Planar = 66
-    cudaEglColorFormatYVU420Planar = 67
-    cudaEglColorFormatBayerIspRGGB = 68
-    cudaEglColorFormatBayerIspBGGR = 69
-    cudaEglColorFormatBayerIspGRBG = 70
-    cudaEglColorFormatBayerIspGBRG = 71
-    cudaEglColorFormatBayerBCCR = 72
-    cudaEglColorFormatBayerRCCB = 73
-    cudaEglColorFormatBayerCRBC = 74
-    cudaEglColorFormatBayerCBRC = 75
-    cudaEglColorFormatBayer10CCCC = 76
-    cudaEglColorFormatBayer12BCCR = 77
-    cudaEglColorFormatBayer12RCCB = 78
-    cudaEglColorFormatBayer12CRBC = 79
-    cudaEglColorFormatBayer12CBRC = 80
-    cudaEglColorFormatBayer12CCCC = 81
-    cudaEglColorFormatY = 82
-    cudaEglColorFormatYUV420SemiPlanar_2020 = 83
-    cudaEglColorFormatYVU420SemiPlanar_2020 = 84
-    cudaEglColorFormatYUV420Planar_2020 = 85
-    cudaEglColorFormatYVU420Planar_2020 = 86
-    cudaEglColorFormatYUV420SemiPlanar_709 = 87
-    cudaEglColorFormatYVU420SemiPlanar_709 = 88
-    cudaEglColorFormatYUV420Planar_709 = 89
-    cudaEglColorFormatYVU420Planar_709 = 90
-    cudaEglColorFormatY10V10U10_420SemiPlanar_709 = 91
-    cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = 92
-    cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = 93
-    cudaEglColorFormatY10V10U10_422SemiPlanar = 94
-    cudaEglColorFormatY10V10U10_422SemiPlanar_709 = 95
-    cudaEglColorFormatY_ER = 96
-    cudaEglColorFormatY_709_ER = 97
-    cudaEglColorFormatY10_ER = 98
-    cudaEglColorFormatY10_709_ER = 99
-    cudaEglColorFormatY12_ER = 100
-    cudaEglColorFormatY12_709_ER = 101
-    cudaEglColorFormatYUVA = 102
-    cudaEglColorFormatYVYU = 104
-    cudaEglColorFormatVYUY = 105
-    cudaEglColorFormatY10V10U10_420SemiPlanar_ER = 106
-    cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = 107
-    cudaEglColorFormatY10V10U10_444SemiPlanar_ER = 108
-    cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = 109
-    cudaEglColorFormatY12V12U12_420SemiPlanar_ER = 110
-    cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = 111
-    cudaEglColorFormatY12V12U12_444SemiPlanar_ER = 112
-    cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = 113
-
-ctypedef cudaEglColorFormat_enum cudaEglColorFormat
-
-cdef struct cudaEglPlaneDesc_st:
-    unsigned int width
-    unsigned int height
-    unsigned int depth
-    unsigned int pitch
-    unsigned int numChannels
-    cudaChannelFormatDesc channelDesc
-    unsigned int reserved[4]
-
-ctypedef cudaEglPlaneDesc_st cudaEglPlaneDesc
-
-cdef union anon_union10:
-    cudaArray_t pArray[3]
-    cudaPitchedPtr pPitch[3]
-
-cdef struct cudaEglFrame_st:
-    anon_union10 frame
-    cudaEglPlaneDesc planeDesc[3]
-    unsigned int planeCount
-    cudaEglFrameType frameType
-    cudaEglColorFormat eglColorFormat
-
-ctypedef cudaEglFrame_st cudaEglFrame
-
-cdef extern from "":
-    cdef struct CUeglStreamConnection_st:
-        pass
-ctypedef CUeglStreamConnection_st* cudaEglStreamConnection
-
-cdef enum cudaGLDeviceList:
-    cudaGLDeviceListAll = 1
-    cudaGLDeviceListCurrentFrame = 2
-    cudaGLDeviceListNextFrame = 3
-
-cdef enum cudaGLMapFlags:
-    cudaGLMapFlagsNone = 0
-    cudaGLMapFlagsReadOnly = 1
-    cudaGLMapFlagsWriteDiscard = 2
-
-{{if 'cudaDeviceReset' in found_functions}}
-
-cdef cudaError_t cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSynchronize' in found_functions}}
-
-cdef cudaError_t cudaDeviceSynchronize() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSetLimit' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetLimit(cudaLimit limit, size_t value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetLimit' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, const cudaChannelFormatDesc* fmtDesc, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetCacheConfig(cudaFuncCache* pCacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetStreamPriorityRange' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetCacheConfig(cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetByPCIBusId' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetByPCIBusId(int* device, const char* pciBusId) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetPCIBusId' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetPCIBusId(char* pciBusId, int length, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaIpcGetEventHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaIpcOpenEventHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaIpcGetMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaIpcOpenMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaIpcCloseMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcCloseMemHandle(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef cudaError_t cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTarget target, cudaFlushGPUDirectRDMAWritesScope scope) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetSharedMemConfig(cudaSharedMemConfig* pConfig) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetLastError' in found_functions}}
-
-cdef cudaError_t cudaGetLastError() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaPeekAtLastError' in found_functions}}
-
-cdef cudaError_t cudaPeekAtLastError() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetErrorName' in found_functions}}
-
-cdef const char* cudaGetErrorName(cudaError_t error) except ?NULL nogil
-{{endif}}
-
-{{if 'cudaGetErrorString' in found_functions}}
-
-cdef const char* cudaGetErrorString(cudaError_t error) except ?NULL nogil
-{{endif}}
-
-{{if 'cudaGetDeviceCount' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceCount(int* count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetDeviceProperties_v2' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetDefaultMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetDefaultMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSetMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, int device, int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetP2PAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetP2PAttribute(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaChooseDevice' in found_functions}}
-
-cdef cudaError_t cudaChooseDevice(int* device, const cudaDeviceProp* prop) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaInitDevice' in found_functions}}
-
-cdef cudaError_t cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaSetDevice' in found_functions}}
-
-cdef cudaError_t cudaSetDevice(int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetDevice' in found_functions}}
-
-cdef cudaError_t cudaGetDevice(int* device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaSetDeviceFlags' in found_functions}}
-
-cdef cudaError_t cudaSetDeviceFlags(unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetDeviceFlags' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceFlags(unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamCreate' in found_functions}}
-
-cdef cudaError_t cudaStreamCreate(cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamCreateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamCreateWithPriority' in found_functions}}
-
-cdef cudaError_t cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetPriority' in found_functions}}
-
-cdef cudaError_t cudaStreamGetPriority(cudaStream_t hStream, int* priority) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetFlags' in found_functions}}
-
-cdef cudaError_t cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetId' in found_functions}}
-
-cdef cudaError_t cudaStreamGetId(cudaStream_t hStream, unsigned long long* streamId) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaCtxResetPersistingL2Cache' in found_functions}}
-
-cdef cudaError_t cudaCtxResetPersistingL2Cache() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamCopyAttributes' in found_functions}}
-
-cdef cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, cudaStreamAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, const cudaStreamAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamDestroy' in found_functions}}
-
-cdef cudaError_t cudaStreamDestroy(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamWaitEvent' in found_functions}}
-
-cdef cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamAddCallback' in found_functions}}
-
-cdef cudaError_t cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void* userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamSynchronize' in found_functions}}
-
-cdef cudaError_t cudaStreamSynchronize(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamQuery' in found_functions}}
-
-cdef cudaError_t cudaStreamQuery(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamAttachMemAsync' in found_functions}}
-
-cdef cudaError_t cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamBeginCapture' in found_functions}}
-
-cdef cudaError_t cudaStreamBeginCapture(cudaStream_t stream, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamBeginCaptureToGraph' in found_functions}}
-
-cdef cudaError_t cudaStreamBeginCaptureToGraph(cudaStream_t stream, cudaGraph_t graph, const cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef cudaError_t cudaThreadExchangeStreamCaptureMode(cudaStreamCaptureMode* mode) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamEndCapture' in found_functions}}
-
-cdef cudaError_t cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamIsCapturing' in found_functions}}
-
-cdef cudaError_t cudaStreamIsCapturing(cudaStream_t stream, cudaStreamCaptureStatus* pCaptureStatus) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef cudaError_t cudaStreamGetCaptureInfo(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef cudaError_t cudaStreamGetCaptureInfo_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef cudaError_t cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t* dependencies, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaStreamUpdateCaptureDependencies_v2(cudaStream_t stream, cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventCreate' in found_functions}}
-
-cdef cudaError_t cudaEventCreate(cudaEvent_t* event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventCreateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaEventCreateWithFlags(cudaEvent_t* event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventRecord' in found_functions}}
-
-cdef cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventRecordWithFlags' in found_functions}}
-
-cdef cudaError_t cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventQuery' in found_functions}}
-
-cdef cudaError_t cudaEventQuery(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventSynchronize' in found_functions}}
-
-cdef cudaError_t cudaEventSynchronize(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventDestroy' in found_functions}}
-
-cdef cudaError_t cudaEventDestroy(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaEventElapsedTime' in found_functions}}
-
-cdef cudaError_t cudaEventElapsedTime(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaImportExternalMemory' in found_functions}}
-
-cdef cudaError_t cudaImportExternalMemory(cudaExternalMemory_t* extMem_out, const cudaExternalMemoryHandleDesc* memHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef cudaError_t cudaExternalMemoryGetMappedBuffer(void** devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc* bufferDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t* mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc* mipmapDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDestroyExternalMemory' in found_functions}}
-
-cdef cudaError_t cudaDestroyExternalMemory(cudaExternalMemory_t extMem) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaImportExternalSemaphore' in found_functions}}
-
-cdef cudaError_t cudaImportExternalSemaphore(cudaExternalSemaphore_t* extSem_out, const cudaExternalSemaphoreHandleDesc* semHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaSignalExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaSignalExternalSemaphoresAsync(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreSignalParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaWaitExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaWaitExternalSemaphoresAsync(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreWaitParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDestroyExternalSemaphore' in found_functions}}
-
-cdef cudaError_t cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFuncSetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaFuncSetCacheConfig(const void* func, cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFuncGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaFuncGetAttributes(cudaFuncAttributes* attr, const void* func) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFuncSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaLaunchHostFunc' in found_functions}}
-
-cdef cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFuncSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaFuncSetSharedMemConfig(const void* func, cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef cudaError_t cudaOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* func, int numBlocks, int blockSize) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocManaged' in found_functions}}
-
-cdef cudaError_t cudaMallocManaged(void** devPtr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMalloc' in found_functions}}
-
-cdef cudaError_t cudaMalloc(void** devPtr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocHost' in found_functions}}
-
-cdef cudaError_t cudaMallocHost(void** ptr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocPitch' in found_functions}}
-
-cdef cudaError_t cudaMallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocArray' in found_functions}}
-
-cdef cudaError_t cudaMallocArray(cudaArray_t* array, const cudaChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFree' in found_functions}}
-
-cdef cudaError_t cudaFree(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFreeHost' in found_functions}}
-
-cdef cudaError_t cudaFreeHost(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFreeArray' in found_functions}}
-
-cdef cudaError_t cudaFreeArray(cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFreeMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaHostAlloc' in found_functions}}
-
-cdef cudaError_t cudaHostAlloc(void** pHost, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaHostRegister' in found_functions}}
-
-cdef cudaError_t cudaHostRegister(void* ptr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaHostUnregister' in found_functions}}
-
-cdef cudaError_t cudaHostUnregister(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaHostGetDevicePointer' in found_functions}}
-
-cdef cudaError_t cudaHostGetDevicePointer(void** pDevice, void* pHost, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaHostGetFlags' in found_functions}}
-
-cdef cudaError_t cudaHostGetFlags(unsigned int* pFlags, void* pHost) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMalloc3D' in found_functions}}
-
-cdef cudaError_t cudaMalloc3D(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMalloc3DArray' in found_functions}}
-
-cdef cudaError_t cudaMalloc3DArray(cudaArray_t* array, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaMallocMipmappedArray(cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetMipmappedArrayLevel' in found_functions}}
-
-cdef cudaError_t cudaGetMipmappedArrayLevel(cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy3D' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3D(const cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy3DPeer' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DPeer(const cudaMemcpy3DPeerParms* p) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy3DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DAsync(const cudaMemcpy3DParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy3DPeerAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DPeerAsync(const cudaMemcpy3DPeerParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemGetInfo' in found_functions}}
-
-cdef cudaError_t cudaMemGetInfo(size_t* free, size_t* total) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaArrayGetInfo' in found_functions}}
-
-cdef cudaError_t cudaArrayGetInfo(cudaChannelFormatDesc* desc, cudaExtent* extent, unsigned int* flags, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaArrayGetPlane' in found_functions}}
-
-cdef cudaError_t cudaArrayGetPlane(cudaArray_t* pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t cudaArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t cudaMipmappedArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaMipmappedArray_t mipmap) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy' in found_functions}}
-
-cdef cudaError_t cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyPeer' in found_functions}}
-
-cdef cudaError_t cudaMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2D' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DFromArray(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DArrayToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyPeerAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DToArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DFromArrayAsync(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemset' in found_functions}}
-
-cdef cudaError_t cudaMemset(void* devPtr, int value, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemset2D' in found_functions}}
-
-cdef cudaError_t cudaMemset2D(void* devPtr, size_t pitch, int value, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemset3D' in found_functions}}
-
-cdef cudaError_t cudaMemset3D(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemsetAsync' in found_functions}}
-
-cdef cudaError_t cudaMemsetAsync(void* devPtr, int value, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemset2DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemset2DAsync(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemset3DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync' in found_functions}}
-
-cdef cudaError_t cudaMemPrefetchAsync(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaMemPrefetchAsync_v2(const void* devPtr, size_t count, cudaMemLocation location, unsigned int flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemAdvise' in found_functions}}
-
-cdef cudaError_t cudaMemAdvise(const void* devPtr, size_t count, cudaMemoryAdvise advice, int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemAdvise_v2' in found_functions}}
-
-cdef cudaError_t cudaMemAdvise_v2(const void* devPtr, size_t count, cudaMemoryAdvise advice, cudaMemLocation location) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemRangeGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemRangeGetAttribute(void* data, size_t dataSize, cudaMemRangeAttribute attribute, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemRangeGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaMemRangeGetAttributes(void** data, size_t* dataSizes, cudaMemRangeAttribute* attributes, size_t numAttributes, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyFromArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyFromArray(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyArrayToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyToArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemcpyFromArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyFromArrayAsync(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocAsync' in found_functions}}
-
-cdef cudaError_t cudaMallocAsync(void** devPtr, size_t size, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaFreeAsync' in found_functions}}
-
-cdef cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolTrimTo' in found_functions}}
-
-cdef cudaError_t cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemPoolSetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemPoolGetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolSetAccess' in found_functions}}
-
-cdef cudaError_t cudaMemPoolSetAccess(cudaMemPool_t memPool, const cudaMemAccessDesc* descList, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolGetAccess' in found_functions}}
-
-cdef cudaError_t cudaMemPoolGetAccess(cudaMemAccessFlags* flags, cudaMemPool_t memPool, cudaMemLocation* location) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolCreate' in found_functions}}
-
-cdef cudaError_t cudaMemPoolCreate(cudaMemPool_t* memPool, const cudaMemPoolProps* poolProps) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolDestroy' in found_functions}}
-
-cdef cudaError_t cudaMemPoolDestroy(cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMallocFromPoolAsync' in found_functions}}
-
-cdef cudaError_t cudaMallocFromPoolAsync(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolExportToShareableHandle' in found_functions}}
-
-cdef cudaError_t cudaMemPoolExportToShareableHandle(void* shareableHandle, cudaMemPool_t memPool, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef cudaError_t cudaMemPoolImportFromShareableHandle(cudaMemPool_t* memPool, void* shareableHandle, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolExportPointer' in found_functions}}
-
-cdef cudaError_t cudaMemPoolExportPointer(cudaMemPoolPtrExportData* exportData, void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaMemPoolImportPointer' in found_functions}}
-
-cdef cudaError_t cudaMemPoolImportPointer(void** ptr, cudaMemPool_t memPool, cudaMemPoolPtrExportData* exportData) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaPointerGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceCanAccessPeer' in found_functions}}
-
-cdef cudaError_t cudaDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceEnablePeerAccess' in found_functions}}
-
-cdef cudaError_t cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceDisablePeerAccess' in found_functions}}
-
-cdef cudaError_t cudaDeviceDisablePeerAccess(int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsUnregisterResource' in found_functions}}
-
-cdef cudaError_t cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsResourceSetMapFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsMapResources' in found_functions}}
-
-cdef cudaError_t cudaGraphicsMapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsUnmapResources' in found_functions}}
-
-cdef cudaError_t cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedPointer' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef cudaError_t cudaGraphicsSubResourceGetMappedArray(cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t* mipmappedArray, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetChannelDesc' in found_functions}}
-
-cdef cudaError_t cudaGetChannelDesc(cudaChannelFormatDesc* desc, cudaArray_const_t array) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaCreateChannelDesc' in found_functions}}
-
-cdef cudaChannelFormatDesc cudaCreateChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) nogil
-{{endif}}
-
-{{if 'cudaCreateTextureObject' in found_functions}}
-
-cdef cudaError_t cudaCreateTextureObject(cudaTextureObject_t* pTexObject, const cudaResourceDesc* pResDesc, const cudaTextureDesc* pTexDesc, const cudaResourceViewDesc* pResViewDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDestroyTextureObject' in found_functions}}
-
-cdef cudaError_t cudaDestroyTextureObject(cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetTextureObjectTextureDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectTextureDesc(cudaTextureDesc* pTexDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceViewDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectResourceViewDesc(cudaResourceViewDesc* pResViewDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaCreateSurfaceObject' in found_functions}}
-
-cdef cudaError_t cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDestroySurfaceObject' in found_functions}}
-
-cdef cudaError_t cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetSurfaceObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t cudaGetSurfaceObjectResourceDesc(cudaResourceDesc* pResDesc, cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDriverGetVersion' in found_functions}}
-
-cdef cudaError_t cudaDriverGetVersion(int* driverVersion) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaRuntimeGetVersion' in found_functions}}
-
-cdef cudaError_t cudaRuntimeGetVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphCreate' in found_functions}}
-
-cdef cudaError_t cudaGraphCreate(cudaGraph_t* pGraph, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddKernelNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddKernelNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeGetParams(cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemcpyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode1D' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddMemsetNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemsetNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddHostNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddHostNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphHostNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphHostNodeGetParams(cudaGraphNode_t node, cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphHostNodeSetParams(cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddChildGraphNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddChildGraphNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef cudaError_t cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddEmptyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEmptyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddEventRecordNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEventRecordNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddEventWaitNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEventWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreSignalNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreWaitNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddMemAllocNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemAllocNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaMemAllocNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, cudaMemAllocNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddMemFreeNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemFreeNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dptr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void* dptr_out) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGraphMemTrim' in found_functions}}
-
-cdef cudaError_t cudaDeviceGraphMemTrim(int device) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphClone' in found_functions}}
-
-cdef cudaError_t cudaGraphClone(cudaGraph_t* pGraphClone, cudaGraph_t originalGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeFindInClone' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeFindInClone(cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetType' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetType(cudaGraphNode_t node, cudaGraphNodeType* pType) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphGetNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphGetRootNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphGetEdges' in found_functions}}
-
-cdef cudaError_t cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphGetEdges_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphGetEdges_v2(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, cudaGraphEdgeData* edgeData, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependencies_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, cudaGraphEdgeData* edgeData, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependentNodes_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, cudaGraphEdgeData* edgeData, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphAddDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphRemoveDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphDestroyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphDestroyNode(cudaGraphNode_t node) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphInstantiate' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiate(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiateWithFlags(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithParams' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiateWithParams(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams* instantiateParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecGetFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long* flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeSetEnabled' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeGetEnabled' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int* isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecUpdate' in found_functions}}
-
-cdef cudaError_t cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo* resultInfo) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphUpload' in found_functions}}
-
-cdef cudaError_t cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphLaunch' in found_functions}}
-
-cdef cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecDestroy' in found_functions}}
-
-cdef cudaError_t cudaGraphExecDestroy(cudaGraphExec_t graphExec) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphDestroy' in found_functions}}
-
-cdef cudaError_t cudaGraphDestroy(cudaGraph_t graph) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphDebugDotPrint' in found_functions}}
-
-cdef cudaError_t cudaGraphDebugDotPrint(cudaGraph_t graph, const char* path, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaUserObjectCreate' in found_functions}}
-
-cdef cudaError_t cudaUserObjectCreate(cudaUserObject_t* object_out, void* ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaUserObjectRetain' in found_functions}}
-
-cdef cudaError_t cudaUserObjectRetain(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaUserObjectRelease' in found_functions}}
-
-cdef cudaError_t cudaUserObjectRelease(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphRetainUserObject' in found_functions}}
-
-cdef cudaError_t cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphReleaseUserObject' in found_functions}}
-
-cdef cudaError_t cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphAddNode_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphAddNode_v2(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeSetParams(cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphExecNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGraphConditionalHandleCreate' in found_functions}}
-
-cdef cudaError_t cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pHandle_out, cudaGraph_t graph, unsigned int defaultLaunchValue, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetDriverEntryPoint' in found_functions}}
-
-cdef cudaError_t cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}
-
-cdef cudaError_t cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetExportTable' in found_functions}}
-
-cdef cudaError_t cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'cudaGetKernel' in found_functions}}
-
-cdef cudaError_t cudaGetKernel(cudaKernel_t* kernelPtr, const void* entryFuncAddr) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if 'make_cudaPitchedPtr' in found_functions}}
-
-cdef cudaPitchedPtr make_cudaPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) nogil
-{{endif}}
-
-{{if 'make_cudaPos' in found_functions}}
-
-cdef cudaPos make_cudaPos(size_t x, size_t y, size_t z) nogil
-{{endif}}
-
-{{if 'make_cudaExtent' in found_functions}}
-
-cdef cudaExtent make_cudaExtent(size_t w, size_t h, size_t d) nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsEGLRegisterImage(cudaGraphicsResource** pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t* pCudaResource, cudaStream_t* pStream, unsigned int timeout) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t pCudaResource, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, EGLint width, EGLint height) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEventCreateFromEGLSync(cudaEvent_t* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaProfilerStart() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaProfilerStop() except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGLGetDevices(unsigned int* pCudaDeviceCount, int* pCudaDevices, unsigned int cudaDeviceCount, cudaGLDeviceList deviceList) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsGLRegisterImage(cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsGLRegisterBuffer(cudaGraphicsResource** resource, GLuint buffer, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaVDPAUGetDevice(int* device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsVDPAURegisterVideoSurface(cudaGraphicsResource** resource, VdpVideoSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** resource, VdpOutputSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil
-{{endif}}
-
-cdef enum: cudaHostAllocDefault = 0
-
-cdef enum: cudaHostAllocPortable = 1
-
-cdef enum: cudaHostAllocMapped = 2
-
-cdef enum: cudaHostAllocWriteCombined = 4
-
-cdef enum: cudaHostRegisterDefault = 0
-
-cdef enum: cudaHostRegisterPortable = 1
-
-cdef enum: cudaHostRegisterMapped = 2
-
-cdef enum: cudaHostRegisterIoMemory = 4
-
-cdef enum: cudaHostRegisterReadOnly = 8
-
-cdef enum: cudaPeerAccessDefault = 0
-
-cdef enum: cudaStreamDefault = 0
-
-cdef enum: cudaStreamNonBlocking = 1
-
-cdef enum: cudaStreamLegacy = 1
-
-cdef enum: cudaStreamPerThread = 2
-
-cdef enum: cudaEventDefault = 0
-
-cdef enum: cudaEventBlockingSync = 1
-
-cdef enum: cudaEventDisableTiming = 2
-
-cdef enum: cudaEventInterprocess = 4
-
-cdef enum: cudaEventRecordDefault = 0
-
-cdef enum: cudaEventRecordExternal = 1
-
-cdef enum: cudaEventWaitDefault = 0
-
-cdef enum: cudaEventWaitExternal = 1
-
-cdef enum: cudaDeviceScheduleAuto = 0
-
-cdef enum: cudaDeviceScheduleSpin = 1
-
-cdef enum: cudaDeviceScheduleYield = 2
-
-cdef enum: cudaDeviceScheduleBlockingSync = 4
-
-cdef enum: cudaDeviceBlockingSync = 4
-
-cdef enum: cudaDeviceScheduleMask = 7
-
-cdef enum: cudaDeviceMapHost = 8
-
-cdef enum: cudaDeviceLmemResizeToMax = 16
-
-cdef enum: cudaDeviceSyncMemops = 128
-
-cdef enum: cudaDeviceMask = 255
-
-cdef enum: cudaArrayDefault = 0
-
-cdef enum: cudaArrayLayered = 1
-
-cdef enum: cudaArraySurfaceLoadStore = 2
-
-cdef enum: cudaArrayCubemap = 4
-
-cdef enum: cudaArrayTextureGather = 8
-
-cdef enum: cudaArrayColorAttachment = 32
-
-cdef enum: cudaArraySparse = 64
-
-cdef enum: cudaArrayDeferredMapping = 128
-
-cdef enum: cudaIpcMemLazyEnablePeerAccess = 1
-
-cdef enum: cudaMemAttachGlobal = 1
-
-cdef enum: cudaMemAttachHost = 2
-
-cdef enum: cudaMemAttachSingle = 4
-
-cdef enum: cudaOccupancyDefault = 0
-
-cdef enum: cudaOccupancyDisableCachingOverride = 1
-
-cdef enum: cudaCpuDeviceId = -1
-
-cdef enum: cudaInvalidDeviceId = -2
-
-cdef enum: cudaInitDeviceFlagsAreValid = 1
-
-cdef enum: cudaCooperativeLaunchMultiDeviceNoPreSync = 1
-
-cdef enum: cudaCooperativeLaunchMultiDeviceNoPostSync = 2
-
-cdef enum: cudaArraySparsePropertiesSingleMipTail = 1
-
-cdef enum: CUDA_IPC_HANDLE_SIZE = 64
-
-cdef enum: cudaExternalMemoryDedicated = 1
-
-cdef enum: cudaExternalSemaphoreSignalSkipNvSciBufMemSync = 1
-
-cdef enum: cudaExternalSemaphoreWaitSkipNvSciBufMemSync = 2
-
-cdef enum: cudaNvSciSyncAttrSignal = 1
-
-cdef enum: cudaNvSciSyncAttrWait = 2
-
-cdef enum: cudaGraphKernelNodePortDefault = 0
-
-cdef enum: cudaGraphKernelNodePortProgrammatic = 1
-
-cdef enum: cudaGraphKernelNodePortLaunchCompletion = 2
-
-cdef enum: cudaStreamAttributeAccessPolicyWindow = 1
-
-cdef enum: cudaStreamAttributeSynchronizationPolicy = 3
-
-cdef enum: cudaStreamAttributeMemSyncDomainMap = 9
-
-cdef enum: cudaStreamAttributeMemSyncDomain = 10
-
-cdef enum: cudaStreamAttributePriority = 8
-
-cdef enum: cudaKernelNodeAttributeAccessPolicyWindow = 1
-
-cdef enum: cudaKernelNodeAttributeCooperative = 2
-
-cdef enum: cudaKernelNodeAttributePriority = 8
-
-cdef enum: cudaKernelNodeAttributeClusterDimension = 4
-
-cdef enum: cudaKernelNodeAttributeClusterSchedulingPolicyPreference = 5
-
-cdef enum: cudaKernelNodeAttributeMemSyncDomainMap = 9
-
-cdef enum: cudaKernelNodeAttributeMemSyncDomain = 10
-
-cdef enum: cudaKernelNodeAttributePreferredSharedMemoryCarveout = 14
-
-cdef enum: cudaKernelNodeAttributeDeviceUpdatableKernelNode = 13
-
-cdef enum: cudaSurfaceType1D = 1
-
-cdef enum: cudaSurfaceType2D = 2
-
-cdef enum: cudaSurfaceType3D = 3
-
-cdef enum: cudaSurfaceTypeCubemap = 12
-
-cdef enum: cudaSurfaceType1DLayered = 241
-
-cdef enum: cudaSurfaceType2DLayered = 242
-
-cdef enum: cudaSurfaceTypeCubemapLayered = 252
-
-cdef enum: cudaTextureType1D = 1
-
-cdef enum: cudaTextureType2D = 2
-
-cdef enum: cudaTextureType3D = 3
-
-cdef enum: cudaTextureTypeCubemap = 12
-
-cdef enum: cudaTextureType1DLayered = 241
-
-cdef enum: cudaTextureType2DLayered = 242
-
-cdef enum: cudaTextureTypeCubemapLayered = 252
-
-cdef enum: CUDART_VERSION = 12060
-
-cdef enum: __CUDART_API_VERSION = 12060
-
-cdef enum: CUDA_EGL_MAX_PLANES = 3
\ No newline at end of file
diff --git a/cuda_bindings/cuda/bindings/cyruntime.pyx.in b/cuda_bindings/cuda/bindings/cyruntime.pyx.in
deleted file mode 100644
index 18b9fe40..00000000
--- a/cuda_bindings/cuda/bindings/cyruntime.pyx.in
+++ /dev/null
@@ -1,2501 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-# cython: show_performance_hints=False
-
-cimport cuda.bindings._bindings.cydriver as cydriver
-from cuda.bindings._lib.cyruntime.cyruntime cimport *
-from cuda.bindings._lib.cyruntime.utils cimport *
-from libc.stdlib cimport malloc, free, calloc
-from libc cimport string
-from libcpp cimport bool
-
-cdef cudaPythonGlobal m_global = globalGetInstance()
-
-{{if 'cudaDeviceReset' in found_functions}}
-
-cdef cudaError_t cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceReset()
-
-{{endif}}
-
-{{if 'cudaDeviceSynchronize' in found_functions}}
-
-cdef cudaError_t cudaDeviceSynchronize() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSynchronize()
-
-{{endif}}
-
-{{if 'cudaDeviceSetLimit' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetLimit(cudaLimit limit, size_t value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSetLimit(limit, value)
-
-{{endif}}
-
-{{if 'cudaDeviceGetLimit' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetLimit(pValue, limit)
-
-{{endif}}
-
-{{if 'cudaDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, const cudaChannelFormatDesc* fmtDesc, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetTexture1DLinearMaxWidth(maxWidthInElements, fmtDesc, device)
-
-{{endif}}
-
-{{if 'cudaDeviceGetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetCacheConfig(cudaFuncCache* pCacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetCacheConfig(pCacheConfig)
-
-{{endif}}
-
-{{if 'cudaDeviceGetStreamPriorityRange' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority)
-
-{{endif}}
-
-{{if 'cudaDeviceSetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetCacheConfig(cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSetCacheConfig(cacheConfig)
-
-{{endif}}
-
-{{if 'cudaDeviceGetByPCIBusId' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetByPCIBusId(int* device, const char* pciBusId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetByPCIBusId(device, pciBusId)
-
-{{endif}}
-
-{{if 'cudaDeviceGetPCIBusId' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetPCIBusId(char* pciBusId, int length, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetPCIBusId(pciBusId, length, device)
-
-{{endif}}
-
-{{if 'cudaIpcGetEventHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaIpcGetEventHandle(handle, event)
-
-{{endif}}
-
-{{if 'cudaIpcOpenEventHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaIpcOpenEventHandle(event, handle)
-
-{{endif}}
-
-{{if 'cudaIpcGetMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaIpcGetMemHandle(handle, devPtr)
-
-{{endif}}
-
-{{if 'cudaIpcOpenMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaIpcOpenMemHandle(devPtr, handle, flags)
-
-{{endif}}
-
-{{if 'cudaIpcCloseMemHandle' in found_functions}}
-
-cdef cudaError_t cudaIpcCloseMemHandle(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaIpcCloseMemHandle(devPtr)
-
-{{endif}}
-
-{{if 'cudaDeviceFlushGPUDirectRDMAWrites' in found_functions}}
-
-cdef cudaError_t cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTarget target, cudaFlushGPUDirectRDMAWritesScope scope) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceFlushGPUDirectRDMAWrites(target, scope)
-
-{{endif}}
-
-{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceRegisterAsyncNotification(device, callbackFunc, userData, callback)
-
-{{endif}}
-
-{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}
-
-cdef cudaError_t cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceUnregisterAsyncNotification(device, callback)
-
-{{endif}}
-
-{{if 'cudaDeviceGetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetSharedMemConfig(cudaSharedMemConfig* pConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetSharedMemConfig(pConfig)
-
-{{endif}}
-
-{{if 'cudaDeviceSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSetSharedMemConfig(config)
-
-{{endif}}
-
-{{if 'cudaGetLastError' in found_functions}}
-
-cdef cudaError_t cudaGetLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetLastError()
-
-{{endif}}
-
-{{if 'cudaPeekAtLastError' in found_functions}}
-
-cdef cudaError_t cudaPeekAtLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaPeekAtLastError()
-
-{{endif}}
-
-{{if 'cudaGetErrorName' in found_functions}}
-
-cdef const char* cudaGetErrorName(cudaError_t error) except ?NULL nogil:
-    cdef const char* pStr = "unrecognized error code"
-    {{if 'cudaSuccess' in found_values}}
-    if error == cudaSuccess:
-        return "cudaSuccess"{{endif}}
-    {{if 'cudaErrorInvalidValue' in found_values}}
-    if error == cudaErrorInvalidValue:
-        return "cudaErrorInvalidValue"{{endif}}
-    {{if 'cudaErrorMemoryAllocation' in found_values}}
-    if error == cudaErrorMemoryAllocation:
-        return "cudaErrorMemoryAllocation"{{endif}}
-    {{if 'cudaErrorInitializationError' in found_values}}
-    if error == cudaErrorInitializationError:
-        return "cudaErrorInitializationError"{{endif}}
-    {{if 'cudaErrorCudartUnloading' in found_values}}
-    if error == cudaErrorCudartUnloading:
-        return "cudaErrorCudartUnloading"{{endif}}
-    {{if 'cudaErrorProfilerDisabled' in found_values}}
-    if error == cudaErrorProfilerDisabled:
-        return "cudaErrorProfilerDisabled"{{endif}}
-    {{if 'cudaErrorProfilerNotInitialized' in found_values}}
-    if error == cudaErrorProfilerNotInitialized:
-        return "cudaErrorProfilerNotInitialized"{{endif}}
-    {{if 'cudaErrorProfilerAlreadyStarted' in found_values}}
-    if error == cudaErrorProfilerAlreadyStarted:
-        return "cudaErrorProfilerAlreadyStarted"{{endif}}
-    {{if 'cudaErrorProfilerAlreadyStopped' in found_values}}
-    if error == cudaErrorProfilerAlreadyStopped:
-        return "cudaErrorProfilerAlreadyStopped"{{endif}}
-    {{if 'cudaErrorInvalidConfiguration' in found_values}}
-    if error == cudaErrorInvalidConfiguration:
-        return "cudaErrorInvalidConfiguration"{{endif}}
-    {{if 'cudaErrorInvalidPitchValue' in found_values}}
-    if error == cudaErrorInvalidPitchValue:
-        return "cudaErrorInvalidPitchValue"{{endif}}
-    {{if 'cudaErrorInvalidSymbol' in found_values}}
-    if error == cudaErrorInvalidSymbol:
-        return "cudaErrorInvalidSymbol"{{endif}}
-    {{if 'cudaErrorInvalidHostPointer' in found_values}}
-    if error == cudaErrorInvalidHostPointer:
-        return "cudaErrorInvalidHostPointer"{{endif}}
-    {{if 'cudaErrorInvalidDevicePointer' in found_values}}
-    if error == cudaErrorInvalidDevicePointer:
-        return "cudaErrorInvalidDevicePointer"{{endif}}
-    {{if 'cudaErrorInvalidTexture' in found_values}}
-    if error == cudaErrorInvalidTexture:
-        return "cudaErrorInvalidTexture"{{endif}}
-    {{if 'cudaErrorInvalidTextureBinding' in found_values}}
-    if error == cudaErrorInvalidTextureBinding:
-        return "cudaErrorInvalidTextureBinding"{{endif}}
-    {{if 'cudaErrorInvalidChannelDescriptor' in found_values}}
-    if error == cudaErrorInvalidChannelDescriptor:
-        return "cudaErrorInvalidChannelDescriptor"{{endif}}
-    {{if 'cudaErrorInvalidMemcpyDirection' in found_values}}
-    if error == cudaErrorInvalidMemcpyDirection:
-        return "cudaErrorInvalidMemcpyDirection"{{endif}}
-    {{if 'cudaErrorAddressOfConstant' in found_values}}
-    if error == cudaErrorAddressOfConstant:
-        return "cudaErrorAddressOfConstant"{{endif}}
-    {{if 'cudaErrorTextureFetchFailed' in found_values}}
-    if error == cudaErrorTextureFetchFailed:
-        return "cudaErrorTextureFetchFailed"{{endif}}
-    {{if 'cudaErrorTextureNotBound' in found_values}}
-    if error == cudaErrorTextureNotBound:
-        return "cudaErrorTextureNotBound"{{endif}}
-    {{if 'cudaErrorSynchronizationError' in found_values}}
-    if error == cudaErrorSynchronizationError:
-        return "cudaErrorSynchronizationError"{{endif}}
-    {{if 'cudaErrorInvalidFilterSetting' in found_values}}
-    if error == cudaErrorInvalidFilterSetting:
-        return "cudaErrorInvalidFilterSetting"{{endif}}
-    {{if 'cudaErrorInvalidNormSetting' in found_values}}
-    if error == cudaErrorInvalidNormSetting:
-        return "cudaErrorInvalidNormSetting"{{endif}}
-    {{if 'cudaErrorMixedDeviceExecution' in found_values}}
-    if error == cudaErrorMixedDeviceExecution:
-        return "cudaErrorMixedDeviceExecution"{{endif}}
-    {{if 'cudaErrorNotYetImplemented' in found_values}}
-    if error == cudaErrorNotYetImplemented:
-        return "cudaErrorNotYetImplemented"{{endif}}
-    {{if 'cudaErrorMemoryValueTooLarge' in found_values}}
-    if error == cudaErrorMemoryValueTooLarge:
-        return "cudaErrorMemoryValueTooLarge"{{endif}}
-    {{if 'cudaErrorStubLibrary' in found_values}}
-    if error == cudaErrorStubLibrary:
-        return "cudaErrorStubLibrary"{{endif}}
-    {{if 'cudaErrorInsufficientDriver' in found_values}}
-    if error == cudaErrorInsufficientDriver:
-        return "cudaErrorInsufficientDriver"{{endif}}
-    {{if 'cudaErrorCallRequiresNewerDriver' in found_values}}
-    if error == cudaErrorCallRequiresNewerDriver:
-        return "cudaErrorCallRequiresNewerDriver"{{endif}}
-    {{if 'cudaErrorInvalidSurface' in found_values}}
-    if error == cudaErrorInvalidSurface:
-        return "cudaErrorInvalidSurface"{{endif}}
-    {{if 'cudaErrorDuplicateVariableName' in found_values}}
-    if error == cudaErrorDuplicateVariableName:
-        return "cudaErrorDuplicateVariableName"{{endif}}
-    {{if 'cudaErrorDuplicateTextureName' in found_values}}
-    if error == cudaErrorDuplicateTextureName:
-        return "cudaErrorDuplicateTextureName"{{endif}}
-    {{if 'cudaErrorDuplicateSurfaceName' in found_values}}
-    if error == cudaErrorDuplicateSurfaceName:
-        return "cudaErrorDuplicateSurfaceName"{{endif}}
-    {{if 'cudaErrorDevicesUnavailable' in found_values}}
-    if error == cudaErrorDevicesUnavailable:
-        return "cudaErrorDevicesUnavailable"{{endif}}
-    {{if 'cudaErrorIncompatibleDriverContext' in found_values}}
-    if error == cudaErrorIncompatibleDriverContext:
-        return "cudaErrorIncompatibleDriverContext"{{endif}}
-    {{if 'cudaErrorMissingConfiguration' in found_values}}
-    if error == cudaErrorMissingConfiguration:
-        return "cudaErrorMissingConfiguration"{{endif}}
-    {{if 'cudaErrorPriorLaunchFailure' in found_values}}
-    if error == cudaErrorPriorLaunchFailure:
-        return "cudaErrorPriorLaunchFailure"{{endif}}
-    {{if 'cudaErrorLaunchMaxDepthExceeded' in found_values}}
-    if error == cudaErrorLaunchMaxDepthExceeded:
-        return "cudaErrorLaunchMaxDepthExceeded"{{endif}}
-    {{if 'cudaErrorLaunchFileScopedTex' in found_values}}
-    if error == cudaErrorLaunchFileScopedTex:
-        return "cudaErrorLaunchFileScopedTex"{{endif}}
-    {{if 'cudaErrorLaunchFileScopedSurf' in found_values}}
-    if error == cudaErrorLaunchFileScopedSurf:
-        return "cudaErrorLaunchFileScopedSurf"{{endif}}
-    {{if 'cudaErrorSyncDepthExceeded' in found_values}}
-    if error == cudaErrorSyncDepthExceeded:
-        return "cudaErrorSyncDepthExceeded"{{endif}}
-    {{if 'cudaErrorLaunchPendingCountExceeded' in found_values}}
-    if error == cudaErrorLaunchPendingCountExceeded:
-        return "cudaErrorLaunchPendingCountExceeded"{{endif}}
-    {{if 'cudaErrorInvalidDeviceFunction' in found_values}}
-    if error == cudaErrorInvalidDeviceFunction:
-        return "cudaErrorInvalidDeviceFunction"{{endif}}
-    {{if 'cudaErrorNoDevice' in found_values}}
-    if error == cudaErrorNoDevice:
-        return "cudaErrorNoDevice"{{endif}}
-    {{if 'cudaErrorInvalidDevice' in found_values}}
-    if error == cudaErrorInvalidDevice:
-        return "cudaErrorInvalidDevice"{{endif}}
-    {{if 'cudaErrorDeviceNotLicensed' in found_values}}
-    if error == cudaErrorDeviceNotLicensed:
-        return "cudaErrorDeviceNotLicensed"{{endif}}
-    {{if 'cudaErrorSoftwareValidityNotEstablished' in found_values}}
-    if error == cudaErrorSoftwareValidityNotEstablished:
-        return "cudaErrorSoftwareValidityNotEstablished"{{endif}}
-    {{if 'cudaErrorStartupFailure' in found_values}}
-    if error == cudaErrorStartupFailure:
-        return "cudaErrorStartupFailure"{{endif}}
-    {{if 'cudaErrorInvalidKernelImage' in found_values}}
-    if error == cudaErrorInvalidKernelImage:
-        return "cudaErrorInvalidKernelImage"{{endif}}
-    {{if 'cudaErrorDeviceUninitialized' in found_values}}
-    if error == cudaErrorDeviceUninitialized:
-        return "cudaErrorDeviceUninitialized"{{endif}}
-    {{if 'cudaErrorMapBufferObjectFailed' in found_values}}
-    if error == cudaErrorMapBufferObjectFailed:
-        return "cudaErrorMapBufferObjectFailed"{{endif}}
-    {{if 'cudaErrorUnmapBufferObjectFailed' in found_values}}
-    if error == cudaErrorUnmapBufferObjectFailed:
-        return "cudaErrorUnmapBufferObjectFailed"{{endif}}
-    {{if 'cudaErrorArrayIsMapped' in found_values}}
-    if error == cudaErrorArrayIsMapped:
-        return "cudaErrorArrayIsMapped"{{endif}}
-    {{if 'cudaErrorAlreadyMapped' in found_values}}
-    if error == cudaErrorAlreadyMapped:
-        return "cudaErrorAlreadyMapped"{{endif}}
-    {{if 'cudaErrorNoKernelImageForDevice' in found_values}}
-    if error == cudaErrorNoKernelImageForDevice:
-        return "cudaErrorNoKernelImageForDevice"{{endif}}
-    {{if 'cudaErrorAlreadyAcquired' in found_values}}
-    if error == cudaErrorAlreadyAcquired:
-        return "cudaErrorAlreadyAcquired"{{endif}}
-    {{if 'cudaErrorNotMapped' in found_values}}
-    if error == cudaErrorNotMapped:
-        return "cudaErrorNotMapped"{{endif}}
-    {{if 'cudaErrorNotMappedAsArray' in found_values}}
-    if error == cudaErrorNotMappedAsArray:
-        return "cudaErrorNotMappedAsArray"{{endif}}
-    {{if 'cudaErrorNotMappedAsPointer' in found_values}}
-    if error == cudaErrorNotMappedAsPointer:
-        return "cudaErrorNotMappedAsPointer"{{endif}}
-    {{if 'cudaErrorECCUncorrectable' in found_values}}
-    if error == cudaErrorECCUncorrectable:
-        return "cudaErrorECCUncorrectable"{{endif}}
-    {{if 'cudaErrorUnsupportedLimit' in found_values}}
-    if error == cudaErrorUnsupportedLimit:
-        return "cudaErrorUnsupportedLimit"{{endif}}
-    {{if 'cudaErrorDeviceAlreadyInUse' in found_values}}
-    if error == cudaErrorDeviceAlreadyInUse:
-        return "cudaErrorDeviceAlreadyInUse"{{endif}}
-    {{if 'cudaErrorPeerAccessUnsupported' in found_values}}
-    if error == cudaErrorPeerAccessUnsupported:
-        return "cudaErrorPeerAccessUnsupported"{{endif}}
-    {{if 'cudaErrorInvalidPtx' in found_values}}
-    if error == cudaErrorInvalidPtx:
-        return "cudaErrorInvalidPtx"{{endif}}
-    {{if 'cudaErrorInvalidGraphicsContext' in found_values}}
-    if error == cudaErrorInvalidGraphicsContext:
-        return "cudaErrorInvalidGraphicsContext"{{endif}}
-    {{if 'cudaErrorNvlinkUncorrectable' in found_values}}
-    if error == cudaErrorNvlinkUncorrectable:
-        return "cudaErrorNvlinkUncorrectable"{{endif}}
-    {{if 'cudaErrorJitCompilerNotFound' in found_values}}
-    if error == cudaErrorJitCompilerNotFound:
-        return "cudaErrorJitCompilerNotFound"{{endif}}
-    {{if 'cudaErrorUnsupportedPtxVersion' in found_values}}
-    if error == cudaErrorUnsupportedPtxVersion:
-        return "cudaErrorUnsupportedPtxVersion"{{endif}}
-    {{if 'cudaErrorJitCompilationDisabled' in found_values}}
-    if error == cudaErrorJitCompilationDisabled:
-        return "cudaErrorJitCompilationDisabled"{{endif}}
-    {{if 'cudaErrorUnsupportedExecAffinity' in found_values}}
-    if error == cudaErrorUnsupportedExecAffinity:
-        return "cudaErrorUnsupportedExecAffinity"{{endif}}
-    {{if 'cudaErrorUnsupportedDevSideSync' in found_values}}
-    if error == cudaErrorUnsupportedDevSideSync:
-        return "cudaErrorUnsupportedDevSideSync"{{endif}}
-    {{if 'cudaErrorInvalidSource' in found_values}}
-    if error == cudaErrorInvalidSource:
-        return "cudaErrorInvalidSource"{{endif}}
-    {{if 'cudaErrorFileNotFound' in found_values}}
-    if error == cudaErrorFileNotFound:
-        return "cudaErrorFileNotFound"{{endif}}
-    {{if 'cudaErrorSharedObjectSymbolNotFound' in found_values}}
-    if error == cudaErrorSharedObjectSymbolNotFound:
-        return "cudaErrorSharedObjectSymbolNotFound"{{endif}}
-    {{if 'cudaErrorSharedObjectInitFailed' in found_values}}
-    if error == cudaErrorSharedObjectInitFailed:
-        return "cudaErrorSharedObjectInitFailed"{{endif}}
-    {{if 'cudaErrorOperatingSystem' in found_values}}
-    if error == cudaErrorOperatingSystem:
-        return "cudaErrorOperatingSystem"{{endif}}
-    {{if 'cudaErrorInvalidResourceHandle' in found_values}}
-    if error == cudaErrorInvalidResourceHandle:
-        return "cudaErrorInvalidResourceHandle"{{endif}}
-    {{if 'cudaErrorIllegalState' in found_values}}
-    if error == cudaErrorIllegalState:
-        return "cudaErrorIllegalState"{{endif}}
-    {{if 'cudaErrorLossyQuery' in found_values}}
-    if error == cudaErrorLossyQuery:
-        return "cudaErrorLossyQuery"{{endif}}
-    {{if 'cudaErrorSymbolNotFound' in found_values}}
-    if error == cudaErrorSymbolNotFound:
-        return "cudaErrorSymbolNotFound"{{endif}}
-    {{if 'cudaErrorNotReady' in found_values}}
-    if error == cudaErrorNotReady:
-        return "cudaErrorNotReady"{{endif}}
-    {{if 'cudaErrorIllegalAddress' in found_values}}
-    if error == cudaErrorIllegalAddress:
-        return "cudaErrorIllegalAddress"{{endif}}
-    {{if 'cudaErrorLaunchOutOfResources' in found_values}}
-    if error == cudaErrorLaunchOutOfResources:
-        return "cudaErrorLaunchOutOfResources"{{endif}}
-    {{if 'cudaErrorLaunchTimeout' in found_values}}
-    if error == cudaErrorLaunchTimeout:
-        return "cudaErrorLaunchTimeout"{{endif}}
-    {{if 'cudaErrorLaunchIncompatibleTexturing' in found_values}}
-    if error == cudaErrorLaunchIncompatibleTexturing:
-        return "cudaErrorLaunchIncompatibleTexturing"{{endif}}
-    {{if 'cudaErrorPeerAccessAlreadyEnabled' in found_values}}
-    if error == cudaErrorPeerAccessAlreadyEnabled:
-        return "cudaErrorPeerAccessAlreadyEnabled"{{endif}}
-    {{if 'cudaErrorPeerAccessNotEnabled' in found_values}}
-    if error == cudaErrorPeerAccessNotEnabled:
-        return "cudaErrorPeerAccessNotEnabled"{{endif}}
-    {{if 'cudaErrorSetOnActiveProcess' in found_values}}
-    if error == cudaErrorSetOnActiveProcess:
-        return "cudaErrorSetOnActiveProcess"{{endif}}
-    {{if 'cudaErrorContextIsDestroyed' in found_values}}
-    if error == cudaErrorContextIsDestroyed:
-        return "cudaErrorContextIsDestroyed"{{endif}}
-    {{if 'cudaErrorAssert' in found_values}}
-    if error == cudaErrorAssert:
-        return "cudaErrorAssert"{{endif}}
-    {{if 'cudaErrorTooManyPeers' in found_values}}
-    if error == cudaErrorTooManyPeers:
-        return "cudaErrorTooManyPeers"{{endif}}
-    {{if 'cudaErrorHostMemoryAlreadyRegistered' in found_values}}
-    if error == cudaErrorHostMemoryAlreadyRegistered:
-        return "cudaErrorHostMemoryAlreadyRegistered"{{endif}}
-    {{if 'cudaErrorHostMemoryNotRegistered' in found_values}}
-    if error == cudaErrorHostMemoryNotRegistered:
-        return "cudaErrorHostMemoryNotRegistered"{{endif}}
-    {{if 'cudaErrorHardwareStackError' in found_values}}
-    if error == cudaErrorHardwareStackError:
-        return "cudaErrorHardwareStackError"{{endif}}
-    {{if 'cudaErrorIllegalInstruction' in found_values}}
-    if error == cudaErrorIllegalInstruction:
-        return "cudaErrorIllegalInstruction"{{endif}}
-    {{if 'cudaErrorMisalignedAddress' in found_values}}
-    if error == cudaErrorMisalignedAddress:
-        return "cudaErrorMisalignedAddress"{{endif}}
-    {{if 'cudaErrorInvalidAddressSpace' in found_values}}
-    if error == cudaErrorInvalidAddressSpace:
-        return "cudaErrorInvalidAddressSpace"{{endif}}
-    {{if 'cudaErrorInvalidPc' in found_values}}
-    if error == cudaErrorInvalidPc:
-        return "cudaErrorInvalidPc"{{endif}}
-    {{if 'cudaErrorLaunchFailure' in found_values}}
-    if error == cudaErrorLaunchFailure:
-        return "cudaErrorLaunchFailure"{{endif}}
-    {{if 'cudaErrorCooperativeLaunchTooLarge' in found_values}}
-    if error == cudaErrorCooperativeLaunchTooLarge:
-        return "cudaErrorCooperativeLaunchTooLarge"{{endif}}
-    {{if 'cudaErrorNotPermitted' in found_values}}
-    if error == cudaErrorNotPermitted:
-        return "cudaErrorNotPermitted"{{endif}}
-    {{if 'cudaErrorNotSupported' in found_values}}
-    if error == cudaErrorNotSupported:
-        return "cudaErrorNotSupported"{{endif}}
-    {{if 'cudaErrorSystemNotReady' in found_values}}
-    if error == cudaErrorSystemNotReady:
-        return "cudaErrorSystemNotReady"{{endif}}
-    {{if 'cudaErrorSystemDriverMismatch' in found_values}}
-    if error == cudaErrorSystemDriverMismatch:
-        return "cudaErrorSystemDriverMismatch"{{endif}}
-    {{if 'cudaErrorCompatNotSupportedOnDevice' in found_values}}
-    if error == cudaErrorCompatNotSupportedOnDevice:
-        return "cudaErrorCompatNotSupportedOnDevice"{{endif}}
-    {{if 'cudaErrorMpsConnectionFailed' in found_values}}
-    if error == cudaErrorMpsConnectionFailed:
-        return "cudaErrorMpsConnectionFailed"{{endif}}
-    {{if 'cudaErrorMpsRpcFailure' in found_values}}
-    if error == cudaErrorMpsRpcFailure:
-        return "cudaErrorMpsRpcFailure"{{endif}}
-    {{if 'cudaErrorMpsServerNotReady' in found_values}}
-    if error == cudaErrorMpsServerNotReady:
-        return "cudaErrorMpsServerNotReady"{{endif}}
-    {{if 'cudaErrorMpsMaxClientsReached' in found_values}}
-    if error == cudaErrorMpsMaxClientsReached:
-        return "cudaErrorMpsMaxClientsReached"{{endif}}
-    {{if 'cudaErrorMpsMaxConnectionsReached' in found_values}}
-    if error == cudaErrorMpsMaxConnectionsReached:
-        return "cudaErrorMpsMaxConnectionsReached"{{endif}}
-    {{if 'cudaErrorMpsClientTerminated' in found_values}}
-    if error == cudaErrorMpsClientTerminated:
-        return "cudaErrorMpsClientTerminated"{{endif}}
-    {{if 'cudaErrorCdpNotSupported' in found_values}}
-    if error == cudaErrorCdpNotSupported:
-        return "cudaErrorCdpNotSupported"{{endif}}
-    {{if 'cudaErrorCdpVersionMismatch' in found_values}}
-    if error == cudaErrorCdpVersionMismatch:
-        return "cudaErrorCdpVersionMismatch"{{endif}}
-    {{if 'cudaErrorStreamCaptureUnsupported' in found_values}}
-    if error == cudaErrorStreamCaptureUnsupported:
-        return "cudaErrorStreamCaptureUnsupported"{{endif}}
-    {{if 'cudaErrorStreamCaptureInvalidated' in found_values}}
-    if error == cudaErrorStreamCaptureInvalidated:
-        return "cudaErrorStreamCaptureInvalidated"{{endif}}
-    {{if 'cudaErrorStreamCaptureMerge' in found_values}}
-    if error == cudaErrorStreamCaptureMerge:
-        return "cudaErrorStreamCaptureMerge"{{endif}}
-    {{if 'cudaErrorStreamCaptureUnmatched' in found_values}}
-    if error == cudaErrorStreamCaptureUnmatched:
-        return "cudaErrorStreamCaptureUnmatched"{{endif}}
-    {{if 'cudaErrorStreamCaptureUnjoined' in found_values}}
-    if error == cudaErrorStreamCaptureUnjoined:
-        return "cudaErrorStreamCaptureUnjoined"{{endif}}
-    {{if 'cudaErrorStreamCaptureIsolation' in found_values}}
-    if error == cudaErrorStreamCaptureIsolation:
-        return "cudaErrorStreamCaptureIsolation"{{endif}}
-    {{if 'cudaErrorStreamCaptureImplicit' in found_values}}
-    if error == cudaErrorStreamCaptureImplicit:
-        return "cudaErrorStreamCaptureImplicit"{{endif}}
-    {{if 'cudaErrorCapturedEvent' in found_values}}
-    if error == cudaErrorCapturedEvent:
-        return "cudaErrorCapturedEvent"{{endif}}
-    {{if 'cudaErrorStreamCaptureWrongThread' in found_values}}
-    if error == cudaErrorStreamCaptureWrongThread:
-        return "cudaErrorStreamCaptureWrongThread"{{endif}}
-    {{if 'cudaErrorTimeout' in found_values}}
-    if error == cudaErrorTimeout:
-        return "cudaErrorTimeout"{{endif}}
-    {{if 'cudaErrorGraphExecUpdateFailure' in found_values}}
-    if error == cudaErrorGraphExecUpdateFailure:
-        return "cudaErrorGraphExecUpdateFailure"{{endif}}
-    {{if 'cudaErrorExternalDevice' in found_values}}
-    if error == cudaErrorExternalDevice:
-        return "cudaErrorExternalDevice"{{endif}}
-    {{if 'cudaErrorInvalidClusterSize' in found_values}}
-    if error == cudaErrorInvalidClusterSize:
-        return "cudaErrorInvalidClusterSize"{{endif}}
-    {{if 'cudaErrorFunctionNotLoaded' in found_values}}
-    if error == cudaErrorFunctionNotLoaded:
-        return "cudaErrorFunctionNotLoaded"{{endif}}
-    {{if 'cudaErrorInvalidResourceType' in found_values}}
-    if error == cudaErrorInvalidResourceType:
-        return "cudaErrorInvalidResourceType"{{endif}}
-    {{if 'cudaErrorInvalidResourceConfiguration' in found_values}}
-    if error == cudaErrorInvalidResourceConfiguration:
-        return "cudaErrorInvalidResourceConfiguration"{{endif}}
-    {{if 'cudaErrorUnknown' in found_values}}
-    if error == cudaErrorUnknown:
-        return "cudaErrorUnknown"{{endif}}
-    {{if 'cudaErrorApiFailureBase' in found_values}}
-    if error == cudaErrorApiFailureBase:
-        return "cudaErrorApiFailureBase"{{endif}}
-    return pStr
-
-{{endif}}
-
-{{if 'cudaGetErrorString' in found_functions}}
-
-cdef const char* cudaGetErrorString(cudaError_t error) except ?NULL nogil:
-    return _cudaGetErrorString(error)
-
-{{endif}}
-
-{{if 'cudaGetDeviceCount' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceCount(int* count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDeviceCount(count)
-
-{{endif}}
-
-{{if 'cudaGetDeviceProperties_v2' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceProperties(cudaDeviceProp* prop, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDeviceProperties_v2(prop, device)
-
-{{endif}}
-
-{{if 'cudaDeviceGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetAttribute(value, attr, device)
-
-{{endif}}
-
-{{if 'cudaDeviceGetDefaultMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetDefaultMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetDefaultMemPool(memPool, device)
-
-{{endif}}
-
-{{if 'cudaDeviceSetMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSetMemPool(device, memPool)
-
-{{endif}}
-
-{{if 'cudaDeviceGetMemPool' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetMemPool(memPool, device)
-
-{{endif}}
-
-{{if 'cudaDeviceGetNvSciSyncAttributes' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, int device, int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, device, flags)
-
-{{endif}}
-
-{{if 'cudaDeviceGetP2PAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetP2PAttribute(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice)
-
-{{endif}}
-
-{{if 'cudaChooseDevice' in found_functions}}
-
-cdef cudaError_t cudaChooseDevice(int* device, const cudaDeviceProp* prop) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaChooseDevice(device, prop)
-
-{{endif}}
-
-{{if 'cudaInitDevice' in found_functions}}
-
-cdef cudaError_t cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaInitDevice(device, deviceFlags, flags)
-
-{{endif}}
-
-{{if 'cudaSetDevice' in found_functions}}
-
-cdef cudaError_t cudaSetDevice(int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaSetDevice(device)
-
-{{endif}}
-
-{{if 'cudaGetDevice' in found_functions}}
-
-cdef cudaError_t cudaGetDevice(int* device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDevice(device)
-
-{{endif}}
-
-{{if 'cudaSetDeviceFlags' in found_functions}}
-
-cdef cudaError_t cudaSetDeviceFlags(unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaSetDeviceFlags(flags)
-
-{{endif}}
-
-{{if 'cudaGetDeviceFlags' in found_functions}}
-
-cdef cudaError_t cudaGetDeviceFlags(unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDeviceFlags(flags)
-
-{{endif}}
-
-{{if 'cudaStreamCreate' in found_functions}}
-
-cdef cudaError_t cudaStreamCreate(cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamCreate(pStream)
-
-{{endif}}
-
-{{if 'cudaStreamCreateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamCreateWithFlags(pStream, flags)
-
-{{endif}}
-
-{{if 'cudaStreamCreateWithPriority' in found_functions}}
-
-cdef cudaError_t cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamCreateWithPriority(pStream, flags, priority)
-
-{{endif}}
-
-{{if 'cudaStreamGetPriority' in found_functions}}
-
-cdef cudaError_t cudaStreamGetPriority(cudaStream_t hStream, int* priority) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetPriority(hStream, priority)
-
-{{endif}}
-
-{{if 'cudaStreamGetFlags' in found_functions}}
-
-cdef cudaError_t cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetFlags(hStream, flags)
-
-{{endif}}
-
-{{if 'cudaStreamGetId' in found_functions}}
-
-cdef cudaError_t cudaStreamGetId(cudaStream_t hStream, unsigned long long* streamId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetId(hStream, streamId)
-
-{{endif}}
-
-{{if 'cudaCtxResetPersistingL2Cache' in found_functions}}
-
-cdef cudaError_t cudaCtxResetPersistingL2Cache() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaCtxResetPersistingL2Cache()
-
-{{endif}}
-
-{{if 'cudaStreamCopyAttributes' in found_functions}}
-
-cdef cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamCopyAttributes(dst, src)
-
-{{endif}}
-
-{{if 'cudaStreamGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, cudaStreamAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetAttribute(hStream, attr, value_out)
-
-{{endif}}
-
-{{if 'cudaStreamSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, const cudaStreamAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamSetAttribute(hStream, attr, value)
-
-{{endif}}
-
-{{if 'cudaStreamDestroy' in found_functions}}
-
-cdef cudaError_t cudaStreamDestroy(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamDestroy(stream)
-
-{{endif}}
-
-{{if 'cudaStreamWaitEvent' in found_functions}}
-
-cdef cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamWaitEvent(stream, event, flags)
-
-{{endif}}
-
-{{if 'cudaStreamAddCallback' in found_functions}}
-
-cdef cudaError_t cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void* userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamAddCallback(stream, callback, userData, flags)
-
-{{endif}}
-
-{{if 'cudaStreamSynchronize' in found_functions}}
-
-cdef cudaError_t cudaStreamSynchronize(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamSynchronize(stream)
-
-{{endif}}
-
-{{if 'cudaStreamQuery' in found_functions}}
-
-cdef cudaError_t cudaStreamQuery(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamQuery(stream)
-
-{{endif}}
-
-{{if 'cudaStreamAttachMemAsync' in found_functions}}
-
-cdef cudaError_t cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamAttachMemAsync(stream, devPtr, length, flags)
-
-{{endif}}
-
-{{if 'cudaStreamBeginCapture' in found_functions}}
-
-cdef cudaError_t cudaStreamBeginCapture(cudaStream_t stream, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamBeginCapture(stream, mode)
-
-{{endif}}
-
-{{if 'cudaStreamBeginCaptureToGraph' in found_functions}}
-
-cdef cudaError_t cudaStreamBeginCaptureToGraph(cudaStream_t stream, cudaGraph_t graph, const cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamBeginCaptureToGraph(stream, graph, dependencies, dependencyData, numDependencies, mode)
-
-{{endif}}
-
-{{if 'cudaThreadExchangeStreamCaptureMode' in found_functions}}
-
-cdef cudaError_t cudaThreadExchangeStreamCaptureMode(cudaStreamCaptureMode* mode) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaThreadExchangeStreamCaptureMode(mode)
-
-{{endif}}
-
-{{if 'cudaStreamEndCapture' in found_functions}}
-
-cdef cudaError_t cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamEndCapture(stream, pGraph)
-
-{{endif}}
-
-{{if 'cudaStreamIsCapturing' in found_functions}}
-
-cdef cudaError_t cudaStreamIsCapturing(cudaStream_t stream, cudaStreamCaptureStatus* pCaptureStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamIsCapturing(stream, pCaptureStatus)
-
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v2' in found_functions}}
-
-cdef cudaError_t cudaStreamGetCaptureInfo(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetCaptureInfo_v2(stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)
-
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v3' in found_functions}}
-
-cdef cudaError_t cudaStreamGetCaptureInfo_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamGetCaptureInfo_v3(stream, captureStatus_out, id_out, graph_out, dependencies_out, edgeData_out, numDependencies_out)
-
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies' in found_functions}}
-
-cdef cudaError_t cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t* dependencies, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamUpdateCaptureDependencies(stream, dependencies, numDependencies, flags)
-
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaStreamUpdateCaptureDependencies_v2(cudaStream_t stream, cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaStreamUpdateCaptureDependencies_v2(stream, dependencies, dependencyData, numDependencies, flags)
-
-{{endif}}
-
-{{if 'cudaEventCreate' in found_functions}}
-
-cdef cudaError_t cudaEventCreate(cudaEvent_t* event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventCreate(event)
-
-{{endif}}
-
-{{if 'cudaEventCreateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaEventCreateWithFlags(cudaEvent_t* event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventCreateWithFlags(event, flags)
-
-{{endif}}
-
-{{if 'cudaEventRecord' in found_functions}}
-
-cdef cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventRecord(event, stream)
-
-{{endif}}
-
-{{if 'cudaEventRecordWithFlags' in found_functions}}
-
-cdef cudaError_t cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventRecordWithFlags(event, stream, flags)
-
-{{endif}}
-
-{{if 'cudaEventQuery' in found_functions}}
-
-cdef cudaError_t cudaEventQuery(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventQuery(event)
-
-{{endif}}
-
-{{if 'cudaEventSynchronize' in found_functions}}
-
-cdef cudaError_t cudaEventSynchronize(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventSynchronize(event)
-
-{{endif}}
-
-{{if 'cudaEventDestroy' in found_functions}}
-
-cdef cudaError_t cudaEventDestroy(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventDestroy(event)
-
-{{endif}}
-
-{{if 'cudaEventElapsedTime' in found_functions}}
-
-cdef cudaError_t cudaEventElapsedTime(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventElapsedTime(ms, start, end)
-
-{{endif}}
-
-{{if 'cudaImportExternalMemory' in found_functions}}
-
-cdef cudaError_t cudaImportExternalMemory(cudaExternalMemory_t* extMem_out, const cudaExternalMemoryHandleDesc* memHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaImportExternalMemory(extMem_out, memHandleDesc)
-
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedBuffer' in found_functions}}
-
-cdef cudaError_t cudaExternalMemoryGetMappedBuffer(void** devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc* bufferDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaExternalMemoryGetMappedBuffer(devPtr, extMem, bufferDesc)
-
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t* mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc* mipmapDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaExternalMemoryGetMappedMipmappedArray(mipmap, extMem, mipmapDesc)
-
-{{endif}}
-
-{{if 'cudaDestroyExternalMemory' in found_functions}}
-
-cdef cudaError_t cudaDestroyExternalMemory(cudaExternalMemory_t extMem) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDestroyExternalMemory(extMem)
-
-{{endif}}
-
-{{if 'cudaImportExternalSemaphore' in found_functions}}
-
-cdef cudaError_t cudaImportExternalSemaphore(cudaExternalSemaphore_t* extSem_out, const cudaExternalSemaphoreHandleDesc* semHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaImportExternalSemaphore(extSem_out, semHandleDesc)
-
-{{endif}}
-
-{{if 'cudaSignalExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaSignalExternalSemaphoresAsync(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreSignalParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaSignalExternalSemaphoresAsync_v2(extSemArray, paramsArray, numExtSems, stream)
-
-{{endif}}
-
-{{if 'cudaWaitExternalSemaphoresAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaWaitExternalSemaphoresAsync(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreWaitParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaWaitExternalSemaphoresAsync_v2(extSemArray, paramsArray, numExtSems, stream)
-
-{{endif}}
-
-{{if 'cudaDestroyExternalSemaphore' in found_functions}}
-
-cdef cudaError_t cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDestroyExternalSemaphore(extSem)
-
-{{endif}}
-
-{{if 'cudaFuncSetCacheConfig' in found_functions}}
-
-cdef cudaError_t cudaFuncSetCacheConfig(const void* func, cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFuncSetCacheConfig(func, cacheConfig)
-
-{{endif}}
-
-{{if 'cudaFuncGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaFuncGetAttributes(cudaFuncAttributes* attr, const void* func) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFuncGetAttributes(attr, func)
-
-{{endif}}
-
-{{if 'cudaFuncSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFuncSetAttribute(func, attr, value)
-
-{{endif}}
-
-{{if 'cudaLaunchHostFunc' in found_functions}}
-
-cdef cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaLaunchHostFunc(stream, fn, userData)
-
-{{endif}}
-
-{{if 'cudaFuncSetSharedMemConfig' in found_functions}}
-
-cdef cudaError_t cudaFuncSetSharedMemConfig(const void* func, cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFuncSetSharedMemConfig(func, config)
-
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-cdef cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize)
-
-{{endif}}
-
-{{if 'cudaOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-cdef cudaError_t cudaOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* func, int numBlocks, int blockSize) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, func, numBlocks, blockSize)
-
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-cdef cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, blockSize, dynamicSMemSize, flags)
-
-{{endif}}
-
-{{if 'cudaMallocManaged' in found_functions}}
-
-cdef cudaError_t cudaMallocManaged(void** devPtr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocManaged(devPtr, size, flags)
-
-{{endif}}
-
-{{if 'cudaMalloc' in found_functions}}
-
-cdef cudaError_t cudaMalloc(void** devPtr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMalloc(devPtr, size)
-
-{{endif}}
-
-{{if 'cudaMallocHost' in found_functions}}
-
-cdef cudaError_t cudaMallocHost(void** ptr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocHost(ptr, size)
-
-{{endif}}
-
-{{if 'cudaMallocPitch' in found_functions}}
-
-cdef cudaError_t cudaMallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocPitch(devPtr, pitch, width, height)
-
-{{endif}}
-
-{{if 'cudaMallocArray' in found_functions}}
-
-cdef cudaError_t cudaMallocArray(cudaArray_t* array, const cudaChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocArray(array, desc, width, height, flags)
-
-{{endif}}
-
-{{if 'cudaFree' in found_functions}}
-
-cdef cudaError_t cudaFree(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFree(devPtr)
-
-{{endif}}
-
-{{if 'cudaFreeHost' in found_functions}}
-
-cdef cudaError_t cudaFreeHost(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFreeHost(ptr)
-
-{{endif}}
-
-{{if 'cudaFreeArray' in found_functions}}
-
-cdef cudaError_t cudaFreeArray(cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFreeArray(array)
-
-{{endif}}
-
-{{if 'cudaFreeMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFreeMipmappedArray(mipmappedArray)
-
-{{endif}}
-
-{{if 'cudaHostAlloc' in found_functions}}
-
-cdef cudaError_t cudaHostAlloc(void** pHost, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaHostAlloc(pHost, size, flags)
-
-{{endif}}
-
-{{if 'cudaHostRegister' in found_functions}}
-
-cdef cudaError_t cudaHostRegister(void* ptr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaHostRegister(ptr, size, flags)
-
-{{endif}}
-
-{{if 'cudaHostUnregister' in found_functions}}
-
-cdef cudaError_t cudaHostUnregister(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaHostUnregister(ptr)
-
-{{endif}}
-
-{{if 'cudaHostGetDevicePointer' in found_functions}}
-
-cdef cudaError_t cudaHostGetDevicePointer(void** pDevice, void* pHost, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaHostGetDevicePointer(pDevice, pHost, flags)
-
-{{endif}}
-
-{{if 'cudaHostGetFlags' in found_functions}}
-
-cdef cudaError_t cudaHostGetFlags(unsigned int* pFlags, void* pHost) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaHostGetFlags(pFlags, pHost)
-
-{{endif}}
-
-{{if 'cudaMalloc3D' in found_functions}}
-
-cdef cudaError_t cudaMalloc3D(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMalloc3D(pitchedDevPtr, extent)
-
-{{endif}}
-
-{{if 'cudaMalloc3DArray' in found_functions}}
-
-cdef cudaError_t cudaMalloc3DArray(cudaArray_t* array, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMalloc3DArray(array, desc, extent, flags)
-
-{{endif}}
-
-{{if 'cudaMallocMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaMallocMipmappedArray(cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocMipmappedArray(mipmappedArray, desc, extent, numLevels, flags)
-
-{{endif}}
-
-{{if 'cudaGetMipmappedArrayLevel' in found_functions}}
-
-cdef cudaError_t cudaGetMipmappedArrayLevel(cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetMipmappedArrayLevel(levelArray, mipmappedArray, level)
-
-{{endif}}
-
-{{if 'cudaMemcpy3D' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3D(const cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy3D(p)
-
-{{endif}}
-
-{{if 'cudaMemcpy3DPeer' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DPeer(const cudaMemcpy3DPeerParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy3DPeer(p)
-
-{{endif}}
-
-{{if 'cudaMemcpy3DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DAsync(const cudaMemcpy3DParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy3DAsync(p, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpy3DPeerAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy3DPeerAsync(const cudaMemcpy3DPeerParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy3DPeerAsync(p, stream)
-
-{{endif}}
-
-{{if 'cudaMemGetInfo' in found_functions}}
-
-cdef cudaError_t cudaMemGetInfo(size_t* free, size_t* total) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemGetInfo(free, total)
-
-{{endif}}
-
-{{if 'cudaArrayGetInfo' in found_functions}}
-
-cdef cudaError_t cudaArrayGetInfo(cudaChannelFormatDesc* desc, cudaExtent* extent, unsigned int* flags, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaArrayGetInfo(desc, extent, flags, array)
-
-{{endif}}
-
-{{if 'cudaArrayGetPlane' in found_functions}}
-
-cdef cudaError_t cudaArrayGetPlane(cudaArray_t* pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaArrayGetPlane(pPlaneArray, hArray, planeIdx)
-
-{{endif}}
-
-{{if 'cudaArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaArrayGetMemoryRequirements(memoryRequirements, array, device)
-
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-cdef cudaError_t cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMipmappedArrayGetMemoryRequirements(memoryRequirements, mipmap, device)
-
-{{endif}}
-
-{{if 'cudaArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t cudaArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaArrayGetSparseProperties(sparseProperties, array)
-
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetSparseProperties' in found_functions}}
-
-cdef cudaError_t cudaMipmappedArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaMipmappedArray_t mipmap) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMipmappedArrayGetSparseProperties(sparseProperties, mipmap)
-
-{{endif}}
-
-{{if 'cudaMemcpy' in found_functions}}
-
-cdef cudaError_t cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy(dst, src, count, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpyPeer' in found_functions}}
-
-cdef cudaError_t cudaMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)
-
-{{endif}}
-
-{{if 'cudaMemcpy2D' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DFromArray(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width, height, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DArrayToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, width, height, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpyAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyAsync(dst, src, count, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpyPeerAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DToArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, width, height, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpy2DFromArrayAsync(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset, width, height, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMemset' in found_functions}}
-
-cdef cudaError_t cudaMemset(void* devPtr, int value, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemset(devPtr, value, count)
-
-{{endif}}
-
-{{if 'cudaMemset2D' in found_functions}}
-
-cdef cudaError_t cudaMemset2D(void* devPtr, size_t pitch, int value, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemset2D(devPtr, pitch, value, width, height)
-
-{{endif}}
-
-{{if 'cudaMemset3D' in found_functions}}
-
-cdef cudaError_t cudaMemset3D(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemset3D(pitchedDevPtr, value, extent)
-
-{{endif}}
-
-{{if 'cudaMemsetAsync' in found_functions}}
-
-cdef cudaError_t cudaMemsetAsync(void* devPtr, int value, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemsetAsync(devPtr, value, count, stream)
-
-{{endif}}
-
-{{if 'cudaMemset2DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemset2DAsync(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemset2DAsync(devPtr, pitch, value, width, height, stream)
-
-{{endif}}
-
-{{if 'cudaMemset3DAsync' in found_functions}}
-
-cdef cudaError_t cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)
-
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync' in found_functions}}
-
-cdef cudaError_t cudaMemPrefetchAsync(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPrefetchAsync(devPtr, count, dstDevice, stream)
-
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync_v2' in found_functions}}
-
-cdef cudaError_t cudaMemPrefetchAsync_v2(const void* devPtr, size_t count, cudaMemLocation location, unsigned int flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPrefetchAsync_v2(devPtr, count, location, flags, stream)
-
-{{endif}}
-
-{{if 'cudaMemAdvise' in found_functions}}
-
-cdef cudaError_t cudaMemAdvise(const void* devPtr, size_t count, cudaMemoryAdvise advice, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemAdvise(devPtr, count, advice, device)
-
-{{endif}}
-
-{{if 'cudaMemAdvise_v2' in found_functions}}
-
-cdef cudaError_t cudaMemAdvise_v2(const void* devPtr, size_t count, cudaMemoryAdvise advice, cudaMemLocation location) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemAdvise_v2(devPtr, count, advice, location)
-
-{{endif}}
-
-{{if 'cudaMemRangeGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemRangeGetAttribute(void* data, size_t dataSize, cudaMemRangeAttribute attribute, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemRangeGetAttribute(data, dataSize, attribute, devPtr, count)
-
-{{endif}}
-
-{{if 'cudaMemRangeGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaMemRangeGetAttributes(void** data, size_t* dataSizes, cudaMemRangeAttribute* attributes, size_t numAttributes, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemRangeGetAttributes(data, dataSizes, attributes, numAttributes, devPtr, count)
-
-{{endif}}
-
-{{if 'cudaMemcpyToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyToArray(dst, wOffset, hOffset, src, count, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpyFromArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyFromArray(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyFromArray(dst, src, wOffset, hOffset, count, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpyArrayToArray' in found_functions}}
-
-cdef cudaError_t cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyArrayToArray(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, count, kind)
-
-{{endif}}
-
-{{if 'cudaMemcpyToArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyToArrayAsync(dst, wOffset, hOffset, src, count, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMemcpyFromArrayAsync' in found_functions}}
-
-cdef cudaError_t cudaMemcpyFromArrayAsync(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemcpyFromArrayAsync(dst, src, wOffset, hOffset, count, kind, stream)
-
-{{endif}}
-
-{{if 'cudaMallocAsync' in found_functions}}
-
-cdef cudaError_t cudaMallocAsync(void** devPtr, size_t size, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocAsync(devPtr, size, hStream)
-
-{{endif}}
-
-{{if 'cudaFreeAsync' in found_functions}}
-
-cdef cudaError_t cudaFreeAsync(void* devPtr, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaFreeAsync(devPtr, hStream)
-
-{{endif}}
-
-{{if 'cudaMemPoolTrimTo' in found_functions}}
-
-cdef cudaError_t cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolTrimTo(memPool, minBytesToKeep)
-
-{{endif}}
-
-{{if 'cudaMemPoolSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemPoolSetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolSetAttribute(memPool, attr, value)
-
-{{endif}}
-
-{{if 'cudaMemPoolGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaMemPoolGetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolGetAttribute(memPool, attr, value)
-
-{{endif}}
-
-{{if 'cudaMemPoolSetAccess' in found_functions}}
-
-cdef cudaError_t cudaMemPoolSetAccess(cudaMemPool_t memPool, const cudaMemAccessDesc* descList, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolSetAccess(memPool, descList, count)
-
-{{endif}}
-
-{{if 'cudaMemPoolGetAccess' in found_functions}}
-
-cdef cudaError_t cudaMemPoolGetAccess(cudaMemAccessFlags* flags, cudaMemPool_t memPool, cudaMemLocation* location) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolGetAccess(flags, memPool, location)
-
-{{endif}}
-
-{{if 'cudaMemPoolCreate' in found_functions}}
-
-cdef cudaError_t cudaMemPoolCreate(cudaMemPool_t* memPool, const cudaMemPoolProps* poolProps) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolCreate(memPool, poolProps)
-
-{{endif}}
-
-{{if 'cudaMemPoolDestroy' in found_functions}}
-
-cdef cudaError_t cudaMemPoolDestroy(cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolDestroy(memPool)
-
-{{endif}}
-
-{{if 'cudaMallocFromPoolAsync' in found_functions}}
-
-cdef cudaError_t cudaMallocFromPoolAsync(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMallocFromPoolAsync(ptr, size, memPool, stream)
-
-{{endif}}
-
-{{if 'cudaMemPoolExportToShareableHandle' in found_functions}}
-
-cdef cudaError_t cudaMemPoolExportToShareableHandle(void* shareableHandle, cudaMemPool_t memPool, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolExportToShareableHandle(shareableHandle, memPool, handleType, flags)
-
-{{endif}}
-
-{{if 'cudaMemPoolImportFromShareableHandle' in found_functions}}
-
-cdef cudaError_t cudaMemPoolImportFromShareableHandle(cudaMemPool_t* memPool, void* shareableHandle, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolImportFromShareableHandle(memPool, shareableHandle, handleType, flags)
-
-{{endif}}
-
-{{if 'cudaMemPoolExportPointer' in found_functions}}
-
-cdef cudaError_t cudaMemPoolExportPointer(cudaMemPoolPtrExportData* exportData, void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolExportPointer(exportData, ptr)
-
-{{endif}}
-
-{{if 'cudaMemPoolImportPointer' in found_functions}}
-
-cdef cudaError_t cudaMemPoolImportPointer(void** ptr, cudaMemPool_t memPool, cudaMemPoolPtrExportData* exportData) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaMemPoolImportPointer(ptr, memPool, exportData)
-
-{{endif}}
-
-{{if 'cudaPointerGetAttributes' in found_functions}}
-
-cdef cudaError_t cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaPointerGetAttributes(attributes, ptr)
-
-{{endif}}
-
-{{if 'cudaDeviceCanAccessPeer' in found_functions}}
-
-cdef cudaError_t cudaDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice)
-
-{{endif}}
-
-{{if 'cudaDeviceEnablePeerAccess' in found_functions}}
-
-cdef cudaError_t cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceEnablePeerAccess(peerDevice, flags)
-
-{{endif}}
-
-{{if 'cudaDeviceDisablePeerAccess' in found_functions}}
-
-cdef cudaError_t cudaDeviceDisablePeerAccess(int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceDisablePeerAccess(peerDevice)
-
-{{endif}}
-
-{{if 'cudaGraphicsUnregisterResource' in found_functions}}
-
-cdef cudaError_t cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsUnregisterResource(resource)
-
-{{endif}}
-
-{{if 'cudaGraphicsResourceSetMapFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsResourceSetMapFlags(resource, flags)
-
-{{endif}}
-
-{{if 'cudaGraphicsMapResources' in found_functions}}
-
-cdef cudaError_t cudaGraphicsMapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsMapResources(count, resources, stream)
-
-{{endif}}
-
-{{if 'cudaGraphicsUnmapResources' in found_functions}}
-
-cdef cudaError_t cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsUnmapResources(count, resources, stream)
-
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedPointer' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsResourceGetMappedPointer(devPtr, size, resource)
-
-{{endif}}
-
-{{if 'cudaGraphicsSubResourceGetMappedArray' in found_functions}}
-
-cdef cudaError_t cudaGraphicsSubResourceGetMappedArray(cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsSubResourceGetMappedArray(array, resource, arrayIndex, mipLevel)
-
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t* mipmappedArray, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsResourceGetMappedMipmappedArray(mipmappedArray, resource)
-
-{{endif}}
-
-{{if 'cudaGetChannelDesc' in found_functions}}
-
-cdef cudaError_t cudaGetChannelDesc(cudaChannelFormatDesc* desc, cudaArray_const_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetChannelDesc(desc, array)
-
-{{endif}}
-
-{{if 'cudaCreateChannelDesc' in found_functions}}
-
-cdef cudaChannelFormatDesc cudaCreateChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) nogil:
-    return _cudaCreateChannelDesc(x, y, z, w, f)
-
-{{endif}}
-
-{{if 'cudaCreateTextureObject' in found_functions}}
-
-cdef cudaError_t cudaCreateTextureObject(cudaTextureObject_t* pTexObject, const cudaResourceDesc* pResDesc, const cudaTextureDesc* pTexDesc, const cudaResourceViewDesc* pResViewDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)
-
-{{endif}}
-
-{{if 'cudaDestroyTextureObject' in found_functions}}
-
-cdef cudaError_t cudaDestroyTextureObject(cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDestroyTextureObject(texObject)
-
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetTextureObjectResourceDesc(pResDesc, texObject)
-
-{{endif}}
-
-{{if 'cudaGetTextureObjectTextureDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectTextureDesc(cudaTextureDesc* pTexDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetTextureObjectTextureDesc(pTexDesc, texObject)
-
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceViewDesc' in found_functions}}
-
-cdef cudaError_t cudaGetTextureObjectResourceViewDesc(cudaResourceViewDesc* pResViewDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetTextureObjectResourceViewDesc(pResViewDesc, texObject)
-
-{{endif}}
-
-{{if 'cudaCreateSurfaceObject' in found_functions}}
-
-cdef cudaError_t cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaCreateSurfaceObject(pSurfObject, pResDesc)
-
-{{endif}}
-
-{{if 'cudaDestroySurfaceObject' in found_functions}}
-
-cdef cudaError_t cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDestroySurfaceObject(surfObject)
-
-{{endif}}
-
-{{if 'cudaGetSurfaceObjectResourceDesc' in found_functions}}
-
-cdef cudaError_t cudaGetSurfaceObjectResourceDesc(cudaResourceDesc* pResDesc, cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetSurfaceObjectResourceDesc(pResDesc, surfObject)
-
-{{endif}}
-
-{{if 'cudaDriverGetVersion' in found_functions}}
-
-cdef cudaError_t cudaDriverGetVersion(int* driverVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDriverGetVersion(driverVersion)
-
-{{endif}}
-
-{{if 'cudaRuntimeGetVersion' in found_functions}}
-
-cdef cudaError_t cudaRuntimeGetVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaRuntimeGetVersion(runtimeVersion)
-
-{{endif}}
-
-{{if 'cudaGraphCreate' in found_functions}}
-
-cdef cudaError_t cudaGraphCreate(cudaGraph_t* pGraph, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphCreate(pGraph, flags)
-
-{{endif}}
-
-{{if 'cudaGraphAddKernelNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddKernelNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddKernelNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeGetParams(cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphKernelNodeGetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphKernelNodeSetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphKernelNodeCopyAttributes' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphKernelNodeCopyAttributes(hSrc, hDst)
-
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetAttribute' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphKernelNodeGetAttribute(hNode, attr, value_out)
-
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetAttribute' in found_functions}}
-
-cdef cudaError_t cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphKernelNodeSetAttribute(hNode, attr, value)
-
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemcpyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, pCopyParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode1D' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddMemcpyNode1D(pGraphNode, graph, pDependencies, numDependencies, dst, src, count, kind)
-
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemcpyNodeGetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemcpyNodeSetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemcpyNodeSetParams1D(node, dst, src, count, kind)
-
-{{endif}}
-
-{{if 'cudaGraphAddMemsetNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemsetNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, pMemsetParams)
-
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemsetNodeGetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemsetNodeSetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddHostNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddHostNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddHostNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphHostNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphHostNodeGetParams(cudaGraphNode_t node, cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphHostNodeGetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphHostNodeSetParams(cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphHostNodeSetParams(node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddChildGraphNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddChildGraphNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddChildGraphNode(pGraphNode, graph, pDependencies, numDependencies, childGraph)
-
-{{endif}}
-
-{{if 'cudaGraphChildGraphNodeGetGraph' in found_functions}}
-
-cdef cudaError_t cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphChildGraphNodeGetGraph(node, pGraph)
-
-{{endif}}
-
-{{if 'cudaGraphAddEmptyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEmptyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddEmptyNode(pGraphNode, graph, pDependencies, numDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphAddEventRecordNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEventRecordNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddEventRecordNode(pGraphNode, graph, pDependencies, numDependencies, event)
-
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeGetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphEventRecordNodeGetEvent(node, event_out)
-
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphEventRecordNodeSetEvent(node, event)
-
-{{endif}}
-
-{{if 'cudaGraphAddEventWaitNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddEventWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddEventWaitNode(pGraphNode, graph, pDependencies, numDependencies, event)
-
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeGetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphEventWaitNodeGetEvent(node, event_out)
-
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphEventWaitNodeSetEvent(node, event)
-
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddExternalSemaphoresSignalNode(pGraphNode, graph, pDependencies, numDependencies, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreSignalNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExternalSemaphoresSignalNodeGetParams(hNode, params_out)
-
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddExternalSemaphoresWaitNode(pGraphNode, graph, pDependencies, numDependencies, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreWaitNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExternalSemaphoresWaitNodeGetParams(hNode, params_out)
-
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddMemAllocNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemAllocNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaMemAllocNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddMemAllocNode(pGraphNode, graph, pDependencies, numDependencies, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphMemAllocNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, cudaMemAllocNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemAllocNodeGetParams(node, params_out)
-
-{{endif}}
-
-{{if 'cudaGraphAddMemFreeNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddMemFreeNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dptr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddMemFreeNode(pGraphNode, graph, pDependencies, numDependencies, dptr)
-
-{{endif}}
-
-{{if 'cudaGraphMemFreeNodeGetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void* dptr_out) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphMemFreeNodeGetParams(node, dptr_out)
-
-{{endif}}
-
-{{if 'cudaDeviceGraphMemTrim' in found_functions}}
-
-cdef cudaError_t cudaDeviceGraphMemTrim(int device) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGraphMemTrim(device)
-
-{{endif}}
-
-{{if 'cudaDeviceGetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceGetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceGetGraphMemAttribute(device, attr, value)
-
-{{endif}}
-
-{{if 'cudaDeviceSetGraphMemAttribute' in found_functions}}
-
-cdef cudaError_t cudaDeviceSetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaDeviceSetGraphMemAttribute(device, attr, value)
-
-{{endif}}
-
-{{if 'cudaGraphClone' in found_functions}}
-
-cdef cudaError_t cudaGraphClone(cudaGraph_t* pGraphClone, cudaGraph_t originalGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphClone(pGraphClone, originalGraph)
-
-{{endif}}
-
-{{if 'cudaGraphNodeFindInClone' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeFindInClone(cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeFindInClone(pNode, originalNode, clonedGraph)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetType' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetType(cudaGraphNode_t node, cudaGraphNodeType* pType) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetType(node, pType)
-
-{{endif}}
-
-{{if 'cudaGraphGetNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphGetNodes(graph, nodes, numNodes)
-
-{{endif}}
-
-{{if 'cudaGraphGetRootNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphGetRootNodes(graph, pRootNodes, pNumRootNodes)
-
-{{endif}}
-
-{{if 'cudaGraphGetEdges' in found_functions}}
-
-cdef cudaError_t cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphGetEdges(graph, from_, to, numEdges)
-
-{{endif}}
-
-{{if 'cudaGraphGetEdges_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphGetEdges_v2(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, cudaGraphEdgeData* edgeData, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphGetEdges_v2(graph, from_, to, edgeData, numEdges)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetDependencies(node, pDependencies, pNumDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependencies_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, cudaGraphEdgeData* edgeData, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetDependencies_v2(node, pDependencies, edgeData, pNumDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetDependentNodes(node, pDependentNodes, pNumDependentNodes)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetDependentNodes_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, cudaGraphEdgeData* edgeData, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetDependentNodes_v2(node, pDependentNodes, edgeData, pNumDependentNodes)
-
-{{endif}}
-
-{{if 'cudaGraphAddDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddDependencies(graph, from_, to, numDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphAddDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphAddDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddDependencies_v2(graph, from_, to, edgeData, numDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies' in found_functions}}
-
-cdef cudaError_t cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphRemoveDependencies(graph, from_, to, numDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphRemoveDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphRemoveDependencies_v2(graph, from_, to, edgeData, numDependencies)
-
-{{endif}}
-
-{{if 'cudaGraphDestroyNode' in found_functions}}
-
-cdef cudaError_t cudaGraphDestroyNode(cudaGraphNode_t node) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphDestroyNode(node)
-
-{{endif}}
-
-{{if 'cudaGraphInstantiate' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiate(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphInstantiate(pGraphExec, graph, flags)
-
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiateWithFlags(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphInstantiateWithFlags(pGraphExec, graph, flags)
-
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithParams' in found_functions}}
-
-cdef cudaError_t cudaGraphInstantiateWithParams(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams* instantiateParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphInstantiateWithParams(pGraphExec, graph, instantiateParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecGetFlags' in found_functions}}
-
-cdef cudaError_t cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecGetFlags(graphExec, flags)
-
-{{endif}}
-
-{{if 'cudaGraphExecKernelNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams1D' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, count, kind)
-
-{{endif}}
-
-{{if 'cudaGraphExecMemsetNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecHostNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecChildGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph)
-
-{{endif}}
-
-{{if 'cudaGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event)
-
-{{endif}}
-
-{{if 'cudaGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-cdef cudaError_t cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event)
-
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphNodeSetEnabled' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeSetEnabled(hGraphExec, hNode, isEnabled)
-
-{{endif}}
-
-{{if 'cudaGraphNodeGetEnabled' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int* isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeGetEnabled(hGraphExec, hNode, isEnabled)
-
-{{endif}}
-
-{{if 'cudaGraphExecUpdate' in found_functions}}
-
-cdef cudaError_t cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo* resultInfo) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecUpdate(hGraphExec, hGraph, resultInfo)
-
-{{endif}}
-
-{{if 'cudaGraphUpload' in found_functions}}
-
-cdef cudaError_t cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphUpload(graphExec, stream)
-
-{{endif}}
-
-{{if 'cudaGraphLaunch' in found_functions}}
-
-cdef cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphLaunch(graphExec, stream)
-
-{{endif}}
-
-{{if 'cudaGraphExecDestroy' in found_functions}}
-
-cdef cudaError_t cudaGraphExecDestroy(cudaGraphExec_t graphExec) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecDestroy(graphExec)
-
-{{endif}}
-
-{{if 'cudaGraphDestroy' in found_functions}}
-
-cdef cudaError_t cudaGraphDestroy(cudaGraph_t graph) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphDestroy(graph)
-
-{{endif}}
-
-{{if 'cudaGraphDebugDotPrint' in found_functions}}
-
-cdef cudaError_t cudaGraphDebugDotPrint(cudaGraph_t graph, const char* path, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphDebugDotPrint(graph, path, flags)
-
-{{endif}}
-
-{{if 'cudaUserObjectCreate' in found_functions}}
-
-cdef cudaError_t cudaUserObjectCreate(cudaUserObject_t* object_out, void* ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags)
-
-{{endif}}
-
-{{if 'cudaUserObjectRetain' in found_functions}}
-
-cdef cudaError_t cudaUserObjectRetain(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaUserObjectRetain(object, count)
-
-{{endif}}
-
-{{if 'cudaUserObjectRelease' in found_functions}}
-
-cdef cudaError_t cudaUserObjectRelease(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaUserObjectRelease(object, count)
-
-{{endif}}
-
-{{if 'cudaGraphRetainUserObject' in found_functions}}
-
-cdef cudaError_t cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphRetainUserObject(graph, object, count, flags)
-
-{{endif}}
-
-{{if 'cudaGraphReleaseUserObject' in found_functions}}
-
-cdef cudaError_t cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphReleaseUserObject(graph, object, count)
-
-{{endif}}
-
-{{if 'cudaGraphAddNode' in found_functions}}
-
-cdef cudaError_t cudaGraphAddNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddNode(pGraphNode, graph, pDependencies, numDependencies, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphAddNode_v2' in found_functions}}
-
-cdef cudaError_t cudaGraphAddNode_v2(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphAddNode_v2(pGraphNode, graph, pDependencies, dependencyData, numDependencies, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphNodeSetParams(cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphNodeSetParams(node, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphExecNodeSetParams' in found_functions}}
-
-cdef cudaError_t cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphExecNodeSetParams(graphExec, node, nodeParams)
-
-{{endif}}
-
-{{if 'cudaGraphConditionalHandleCreate' in found_functions}}
-
-cdef cudaError_t cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pHandle_out, cudaGraph_t graph, unsigned int defaultLaunchValue, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphConditionalHandleCreate(pHandle_out, graph, defaultLaunchValue, flags)
-
-{{endif}}
-
-{{if 'cudaGetDriverEntryPoint' in found_functions}}
-
-cdef cudaError_t cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDriverEntryPoint(symbol, funcPtr, flags, driverStatus)
-
-{{endif}}
-
-{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}
-
-cdef cudaError_t cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetDriverEntryPointByVersion(symbol, funcPtr, cudaVersion, flags, driverStatus)
-
-{{endif}}
-
-{{if 'cudaGetExportTable' in found_functions}}
-
-cdef cudaError_t cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetExportTable(ppExportTable, pExportTableId)
-
-{{endif}}
-
-{{if 'cudaGetKernel' in found_functions}}
-
-cdef cudaError_t cudaGetKernel(cudaKernel_t* kernelPtr, const void* entryFuncAddr) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGetKernel(kernelPtr, entryFuncAddr)
-
-{{endif}}
-
-{{if 'make_cudaPitchedPtr' in found_functions}}
-
-cdef cudaPitchedPtr make_cudaPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) nogil:
-    return _make_cudaPitchedPtr(d, p, xsz, ysz)
-
-{{endif}}
-
-{{if 'make_cudaPos' in found_functions}}
-
-cdef cudaPos make_cudaPos(size_t x, size_t y, size_t z) nogil:
-    return _make_cudaPos(x, y, z)
-
-{{endif}}
-
-{{if 'make_cudaExtent' in found_functions}}
-
-cdef cudaExtent make_cudaExtent(size_t w, size_t h, size_t d) nogil:
-    return _make_cudaExtent(w, h, d)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsEGLRegisterImage(cudaGraphicsResource** pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsEGLRegisterImage(pCudaResource, image, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamConsumerConnect(conn, eglStream)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamConsumerConnectWithFlags(conn, eglStream, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamConsumerDisconnect(conn)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t* pCudaResource, cudaStream_t* pStream, unsigned int timeout) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, timeout)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t pCudaResource, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, EGLint width, EGLint height) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamProducerConnect(conn, eglStream, width, height)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamProducerDisconnect(conn)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamProducerPresentFrame(conn, eglframe, pStream)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEGLStreamProducerReturnFrame(conn, eglframe, pStream)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsResourceGetMappedEglFrame(eglFrame, resource, index, mipLevel)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaEventCreateFromEGLSync(cudaEvent_t* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaEventCreateFromEGLSync(phEvent, eglSync, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaProfilerStart() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaProfilerStart()
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaProfilerStop() except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaProfilerStop()
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGLGetDevices(unsigned int* pCudaDeviceCount, int* pCudaDevices, unsigned int cudaDeviceCount, cudaGLDeviceList deviceList) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGLGetDevices(pCudaDeviceCount, pCudaDevices, cudaDeviceCount, deviceList)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsGLRegisterImage(cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsGLRegisterImage(resource, image, target, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsGLRegisterBuffer(cudaGraphicsResource** resource, GLuint buffer, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsGLRegisterBuffer(resource, buffer, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaVDPAUGetDevice(int* device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaVDPAUGetDevice(device, vdpDevice, vdpGetProcAddress)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaVDPAUSetVDPAUDevice(device, vdpDevice, vdpGetProcAddress)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsVDPAURegisterVideoSurface(cudaGraphicsResource** resource, VdpVideoSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsVDPAURegisterVideoSurface(resource, vdpSurface, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** resource, VdpOutputSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _cudaGraphicsVDPAURegisterOutputSurface(resource, vdpSurface, flags)
-
-{{endif}}
-
-{{if True}}
-
-cdef cudaError_t getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
-    return _getLocalRuntimeVersion(runtimeVersion)
-
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/driver.pxd.in b/cuda_bindings/cuda/bindings/driver.pxd.in
deleted file mode 100644
index 9be46cb2..00000000
--- a/cuda_bindings/cuda/bindings/driver.pxd.in
+++ /dev/null
@@ -1,7587 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings.cydriver as cydriver
-cimport cuda.bindings._lib.utils as utils
-
-{{if 'CUcontext' in found_types}}
-
-cdef class CUcontext:
-    """
-
-    A regular context handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUcontext  __val
-    cdef cydriver.CUcontext* _ptr
-{{endif}}
-
-{{if 'CUmodule' in found_types}}
-
-cdef class CUmodule:
-    """
-
-    CUDA module
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmodule  __val
-    cdef cydriver.CUmodule* _ptr
-{{endif}}
-
-{{if 'CUfunction' in found_types}}
-
-cdef class CUfunction:
-    """
-
-    CUDA function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUfunction  __val
-    cdef cydriver.CUfunction* _ptr
-{{endif}}
-
-{{if 'CUlibrary' in found_types}}
-
-cdef class CUlibrary:
-    """
-
-    CUDA library
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlibrary  __val
-    cdef cydriver.CUlibrary* _ptr
-{{endif}}
-
-{{if 'CUkernel' in found_types}}
-
-cdef class CUkernel:
-    """
-
-    CUDA kernel
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUkernel  __val
-    cdef cydriver.CUkernel* _ptr
-{{endif}}
-
-{{if 'CUarray' in found_types}}
-
-cdef class CUarray:
-    """
-
-    CUDA array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarray  __val
-    cdef cydriver.CUarray* _ptr
-{{endif}}
-
-{{if 'CUmipmappedArray' in found_types}}
-
-cdef class CUmipmappedArray:
-    """
-
-    CUDA mipmapped array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmipmappedArray  __val
-    cdef cydriver.CUmipmappedArray* _ptr
-{{endif}}
-
-{{if 'CUtexref' in found_types}}
-
-cdef class CUtexref:
-    """
-
-    CUDA texture reference
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUtexref  __val
-    cdef cydriver.CUtexref* _ptr
-{{endif}}
-
-{{if 'CUsurfref' in found_types}}
-
-cdef class CUsurfref:
-    """
-
-    CUDA surface reference
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUsurfref  __val
-    cdef cydriver.CUsurfref* _ptr
-{{endif}}
-
-{{if 'CUevent' in found_types}}
-
-cdef class CUevent:
-    """
-
-    CUDA event
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUevent  __val
-    cdef cydriver.CUevent* _ptr
-{{endif}}
-
-{{if 'CUstream' in found_types}}
-
-cdef class CUstream:
-    """
-
-    CUDA stream
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstream  __val
-    cdef cydriver.CUstream* _ptr
-{{endif}}
-
-{{if 'CUgraphicsResource' in found_types}}
-
-cdef class CUgraphicsResource:
-    """
-
-    CUDA graphics interop resource
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphicsResource  __val
-    cdef cydriver.CUgraphicsResource* _ptr
-{{endif}}
-
-{{if 'CUexternalMemory' in found_types}}
-
-cdef class CUexternalMemory:
-    """
-
-    CUDA external memory
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUexternalMemory  __val
-    cdef cydriver.CUexternalMemory* _ptr
-{{endif}}
-
-{{if 'CUexternalSemaphore' in found_types}}
-
-cdef class CUexternalSemaphore:
-    """
-
-    CUDA external semaphore
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUexternalSemaphore  __val
-    cdef cydriver.CUexternalSemaphore* _ptr
-{{endif}}
-
-{{if 'CUgraph' in found_types}}
-
-cdef class CUgraph:
-    """
-
-    CUDA graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraph  __val
-    cdef cydriver.CUgraph* _ptr
-{{endif}}
-
-{{if 'CUgraphNode' in found_types}}
-
-cdef class CUgraphNode:
-    """
-
-    CUDA graph node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphNode  __val
-    cdef cydriver.CUgraphNode* _ptr
-{{endif}}
-
-{{if 'CUgraphExec' in found_types}}
-
-cdef class CUgraphExec:
-    """
-
-    CUDA executable graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphExec  __val
-    cdef cydriver.CUgraphExec* _ptr
-{{endif}}
-
-{{if 'CUmemoryPool' in found_types}}
-
-cdef class CUmemoryPool:
-    """
-
-    CUDA memory pool
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemoryPool  __val
-    cdef cydriver.CUmemoryPool* _ptr
-{{endif}}
-
-{{if 'CUuserObject' in found_types}}
-
-cdef class CUuserObject:
-    """
-
-    CUDA user object for graphs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUuserObject  __val
-    cdef cydriver.CUuserObject* _ptr
-{{endif}}
-
-{{if 'CUgraphDeviceNode' in found_types}}
-
-cdef class CUgraphDeviceNode:
-    """
-
-    CUDA graph device node handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphDeviceNode  __val
-    cdef cydriver.CUgraphDeviceNode* _ptr
-{{endif}}
-
-{{if 'CUasyncCallbackHandle' in found_types}}
-
-cdef class CUasyncCallbackHandle:
-    """
-
-    CUDA async notification callback handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUasyncCallbackHandle  __val
-    cdef cydriver.CUasyncCallbackHandle* _ptr
-{{endif}}
-
-{{if 'CUgreenCtx' in found_types}}
-
-cdef class CUgreenCtx:
-    """
-
-    A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgreenCtx  __val
-    cdef cydriver.CUgreenCtx* _ptr
-{{endif}}
-
-{{if 'CUlinkState' in found_types}}
-
-cdef class CUlinkState:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlinkState  __val
-    cdef cydriver.CUlinkState* _ptr
-    cdef list _keepalive
-{{endif}}
-
-{{if 'CUdevResourceDesc' in found_types}}
-
-cdef class CUdevResourceDesc:
-    """
-
-    An opaque descriptor handle. The descriptor encapsulates multiple created and configured resources. Created via cuDevResourceGenerateDesc
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevResourceDesc  __val
-    cdef cydriver.CUdevResourceDesc* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class CUeglStreamConnection:
-    """
-
-    CUDA EGLSream Connection
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUeglStreamConnection  __val
-    cdef cydriver.CUeglStreamConnection* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLImageKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.EGLImageKHR  __val
-    cdef cydriver.EGLImageKHR* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLStreamKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.EGLStreamKHR  __val
-    cdef cydriver.EGLStreamKHR* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLSyncKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.EGLSyncKHR  __val
-    cdef cydriver.EGLSyncKHR* _ptr
-{{endif}}
-
-{{if 'CUasyncCallback' in found_types}}
-
-cdef class CUasyncCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUasyncCallback  __val
-    cdef cydriver.CUasyncCallback* _ptr
-{{endif}}
-
-{{if 'CUhostFn' in found_types}}
-
-cdef class CUhostFn:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUhostFn  __val
-    cdef cydriver.CUhostFn* _ptr
-{{endif}}
-
-{{if 'CUstreamCallback' in found_types}}
-
-cdef class CUstreamCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamCallback  __val
-    cdef cydriver.CUstreamCallback* _ptr
-{{endif}}
-
-{{if 'CUoccupancyB2DSize' in found_types}}
-
-cdef class CUoccupancyB2DSize:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUoccupancyB2DSize  __val
-    cdef cydriver.CUoccupancyB2DSize* _ptr
-{{endif}}
-
-{{if 'struct CUuuid_st' in found_types}}
-
-cdef class CUuuid_st:
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUuuid_st __val
-    cdef cydriver.CUuuid_st* _ptr
-{{endif}}
-{{if 'struct CUmemFabricHandle_st' in found_types}}
-
-cdef class CUmemFabricHandle_st:
-    """
-    Fabric handle - An opaque handle representing a memory allocation
-    that can be exported to processes in same or different nodes. For
-    IPC between processes on different nodes they must be connected via
-    the NVSwitch fabric.
-
-    Attributes
-    ----------
-    data : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemFabricHandle_st __val
-    cdef cydriver.CUmemFabricHandle_st* _ptr
-{{endif}}
-{{if 'struct CUipcEventHandle_st' in found_types}}
-
-cdef class CUipcEventHandle_st:
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUipcEventHandle_st __val
-    cdef cydriver.CUipcEventHandle_st* _ptr
-{{endif}}
-{{if 'struct CUipcMemHandle_st' in found_types}}
-
-cdef class CUipcMemHandle_st:
-    """
-    CUDA IPC mem handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUipcMemHandle_st __val
-    cdef cydriver.CUipcMemHandle_st* _ptr
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpWaitValueParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    address : CUdeviceptr
-
-    value : cuuint32_t
-
-    value64 : cuuint64_t
-
-    flags : unsigned int
-
-    alias : CUdeviceptr
-        For driver internal use. Initial value is unimportant.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamBatchMemOpParams_union* _ptr
-    cdef CUdeviceptr _address
-    cdef cuuint32_t _value
-    cdef cuuint64_t _value64
-    cdef CUdeviceptr _alias
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpWriteValueParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    address : CUdeviceptr
-
-    value : cuuint32_t
-
-    value64 : cuuint64_t
-
-    flags : unsigned int
-
-    alias : CUdeviceptr
-        For driver internal use. Initial value is unimportant.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamBatchMemOpParams_union* _ptr
-    cdef CUdeviceptr _address
-    cdef cuuint32_t _value
-    cdef cuuint64_t _value64
-    cdef CUdeviceptr _alias
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpFlushRemoteWritesParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamBatchMemOpParams_union* _ptr
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpMemoryBarrierParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-        < Only supported in the _v2 API
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamBatchMemOpParams_union* _ptr
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamBatchMemOpParams_union:
-    """
-    Per-operation parameters for cuStreamBatchMemOp
-
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    waitValue : CUstreamMemOpWaitValueParams_st
-
-    writeValue : CUstreamMemOpWriteValueParams_st
-
-    flushRemoteWrites : CUstreamMemOpFlushRemoteWritesParams_st
-
-    memoryBarrier : CUstreamMemOpMemoryBarrierParams_st
-
-    pad : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUstreamBatchMemOpParams_union __val
-    cdef cydriver.CUstreamBatchMemOpParams_union* _ptr
-    cdef CUstreamMemOpWaitValueParams_st _waitValue
-    cdef CUstreamMemOpWriteValueParams_st _writeValue
-    cdef CUstreamMemOpFlushRemoteWritesParams_st _flushRemoteWrites
-    cdef CUstreamMemOpMemoryBarrierParams_st _memoryBarrier
-{{endif}}
-{{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st:
-    """
-    Attributes
-    ----------
-    ctx : CUcontext
-
-    count : unsigned int
-
-    paramArray : CUstreamBatchMemOpParams
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st __val
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st* _ptr
-    cdef CUcontext _ctx
-    cdef size_t _paramArray_length
-    cdef cydriver.CUstreamBatchMemOpParams* _paramArray
-
-{{endif}}
-{{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st:
-    """
-    Batch memory operation node parameters
-
-    Attributes
-    ----------
-    ctx : CUcontext
-        Context to use for the operations.
-    count : unsigned int
-        Number of operations in paramArray.
-    paramArray : CUstreamBatchMemOpParams
-        Array of batch memory operations.
-    flags : unsigned int
-        Flags to control the node.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st* _ptr
-    cdef CUcontext _ctx
-    cdef size_t _paramArray_length
-    cdef cydriver.CUstreamBatchMemOpParams* _paramArray
-
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class anon_struct0:
-    """
-    Attributes
-    ----------
-    bytesOverBudget : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUasyncNotificationInfo_st* _ptr
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class anon_union2:
-    """
-    Attributes
-    ----------
-    overBudget : anon_struct0
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUasyncNotificationInfo_st* _ptr
-    cdef anon_struct0 _overBudget
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class CUasyncNotificationInfo_st:
-    """
-    Information passed to the user via the async notification callback
-
-    Attributes
-    ----------
-    type : CUasyncNotificationType
-
-    info : anon_union2
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUasyncNotificationInfo_st* _val_ptr
-    cdef cydriver.CUasyncNotificationInfo_st* _ptr
-    cdef anon_union2 _info
-{{endif}}
-{{if 'struct CUdevprop_st' in found_types}}
-
-cdef class CUdevprop_st:
-    """
-    Legacy device properties
-
-    Attributes
-    ----------
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    sharedMemPerBlock : int
-        Shared memory available per block in bytes
-    totalConstantMemory : int
-        Constant memory available on device in bytes
-    SIMDWidth : int
-        Warp size in threads
-    memPitch : int
-        Maximum pitch in bytes allowed by memory copies
-    regsPerBlock : int
-        32-bit registers available per block
-    clockRate : int
-        Clock frequency in kilohertz
-    textureAlign : int
-        Alignment requirement for textures
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevprop_st __val
-    cdef cydriver.CUdevprop_st* _ptr
-{{endif}}
-{{if 'struct CUaccessPolicyWindow_st' in found_types}}
-
-cdef class CUaccessPolicyWindow_st:
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    num_bytes is limited by
-    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE. Partition into
-    many segments and assign segments such that: sum of "hit segments"
-    / window == approx. ratio. sum of "miss segments" / window ==
-    approx 1-ratio. Segments and ratio specifications are fitted to the
-    capabilities of the architecture. Accesses in a hit segment apply
-    the hitProp access policy. Accesses in a miss segment apply the
-    missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : CUaccessProperty
-        CUaccessProperty set for hit.
-    missProp : CUaccessProperty
-        CUaccessProperty set for miss. Must be either NORMAL or STREAMING
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUaccessPolicyWindow_st __val
-    cdef cydriver.CUaccessPolicyWindow_st* _ptr
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_st* _ptr
-    cdef CUfunction _func
-    cdef utils.HelperKernelParams _cykernelParams
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v2_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_v2_st* _ptr
-    cdef CUfunction _func
-    cdef utils.HelperKernelParams _cykernelParams
-    cdef CUkernel _kern
-    cdef CUcontext _ctx
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_v3_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v3_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_v3_st __val
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS_v3_st* _ptr
-    cdef CUfunction _func
-    cdef utils.HelperKernelParams _cykernelParams
-    cdef CUkernel _kern
-    cdef CUcontext _ctx
-{{endif}}
-{{if 'struct CUDA_MEMSET_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_st:
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS_st* _ptr
-    cdef CUdeviceptr _dst
-{{endif}}
-{{if 'struct CUDA_MEMSET_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_v2_st:
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-    ctx : CUcontext
-        Context on which to run the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS_v2_st* _ptr
-    cdef CUdeviceptr _dst
-    cdef CUcontext _ctx
-{{endif}}
-{{if 'struct CUDA_HOST_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_st:
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_HOST_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_HOST_NODE_PARAMS_st* _ptr
-    cdef CUhostFn _fn
-{{endif}}
-{{if 'struct CUDA_HOST_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_v2_st:
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_HOST_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_HOST_NODE_PARAMS_v2_st* _ptr
-    cdef CUhostFn _fn
-{{endif}}
-{{if 'struct CUDA_CONDITIONAL_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_CONDITIONAL_NODE_PARAMS:
-    """
-    Conditional node parameters
-
-    Attributes
-    ----------
-    handle : CUgraphConditionalHandle
-        Conditional node handle. Handles must be created in advance of
-        creating the node using cuGraphConditionalHandleCreate.
-    type : CUgraphConditionalNodeType
-        Type of conditional node.
-    size : unsigned int
-        Size of graph output array. Must be 1.
-    phGraph_out : CUgraph
-        CUDA-owned array populated with conditional node child graphs
-        during creation of the node. Valid for the lifetime of the
-        conditional node. The contents of the graph(s) are subject to the
-        following constraints:   - Allowed node types are kernel nodes,
-        empty nodes, child graphs, memsets, memcopies, and conditionals.
-        This applies recursively to child graphs and conditional bodies.
-        - All kernels, including kernels in nested conditionals or child
-        graphs at any level, must belong to the same CUDA context.
-        These graphs may be populated using graph node creation APIs or
-        cuStreamBeginCaptureToGraph.
-    ctx : CUcontext
-        Context on which to run the node. Must match context used to create
-        the handle and all body nodes.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_CONDITIONAL_NODE_PARAMS __val
-    cdef cydriver.CUDA_CONDITIONAL_NODE_PARAMS* _ptr
-    cdef CUgraphConditionalHandle _handle
-    cdef size_t _phGraph_out_length
-    cdef cydriver.CUgraph* _phGraph_out
-
-    cdef CUcontext _ctx
-{{endif}}
-{{if 'struct CUgraphEdgeData_st' in found_types}}
-
-cdef class CUgraphEdgeData_st:
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        CU_GRAPH_KERNEL_NODE_PORT_DEFAULT,
-        CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC, or
-        CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from CUgraphDependencyType.
-        (It is typed as char due to compiler-specific layout of bitfields.)
-        See CUgraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphEdgeData_st __val
-    cdef cydriver.CUgraphEdgeData_st* _ptr
-{{endif}}
-{{if 'struct CUDA_GRAPH_INSTANTIATE_PARAMS_st' in found_types}}
-
-cdef class CUDA_GRAPH_INSTANTIATE_PARAMS_st:
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : cuuint64_t
-        Instantiation flags
-    hUploadStream : CUstream
-        Upload stream
-    hErrNode_out : CUgraphNode
-        The node which caused instantiation to fail, if any
-    result_out : CUgraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS_st __val
-    cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS_st* _ptr
-    cdef cuuint64_t _flags
-    cdef CUstream _hUploadStream
-    cdef CUgraphNode _hErrNode_out
-{{endif}}
-{{if 'struct CUlaunchMemSyncDomainMap_st' in found_types}}
-
-cdef class CUlaunchMemSyncDomainMap_st:
-    """
-    Memory Synchronization Domain map  See ::cudaLaunchMemSyncDomain.
-    By default, kernels are launched in domain 0. Kernel launched with
-    CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a different domain ID.
-    User may also alter the domain ID with CUlaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.  Domain ID range is
-    available through CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchMemSyncDomainMap_st __val
-    cdef cydriver.CUlaunchMemSyncDomainMap_st* _ptr
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct1:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttributeValue_union* _ptr
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct2:
-    """
-    Attributes
-    ----------
-    event : CUevent
-
-    flags : int
-
-    triggerAtBlockStart : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttributeValue_union* _ptr
-    cdef CUevent _event
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct3:
-    """
-    Attributes
-    ----------
-    event : CUevent
-
-    flags : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttributeValue_union* _ptr
-    cdef CUevent _event
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct4:
-    """
-    Attributes
-    ----------
-    deviceUpdatable : int
-
-    devNode : CUgraphDeviceNode
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttributeValue_union* _ptr
-    cdef CUgraphDeviceNode _devNode
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class CUlaunchAttributeValue_union:
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttributeValue_union __val
-    cdef cydriver.CUlaunchAttributeValue_union* _ptr
-    cdef CUaccessPolicyWindow _accessPolicyWindow
-    cdef anon_struct1 _clusterDim
-    cdef anon_struct2 _programmaticEvent
-    cdef anon_struct3 _launchCompletionEvent
-    cdef CUlaunchMemSyncDomainMap _memSyncDomainMap
-    cdef anon_struct4 _deviceUpdatableKernelNode
-{{endif}}
-{{if 'struct CUlaunchAttribute_st' in found_types}}
-
-cdef class CUlaunchAttribute_st:
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : CUlaunchAttributeID
-        Attribute to set
-    value : CUlaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchAttribute_st __val
-    cdef cydriver.CUlaunchAttribute_st* _ptr
-    cdef CUlaunchAttributeValue _value
-{{endif}}
-{{if 'struct CUlaunchConfig_st' in found_types}}
-
-cdef class CUlaunchConfig_st:
-    """
-    CUDA extensible launch configuration
-
-    Attributes
-    ----------
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    attrs : CUlaunchAttribute
-        List of attributes; nullable if CUlaunchConfig::numAttrs == 0
-    numAttrs : unsigned int
-        Number of attributes populated in CUlaunchConfig::attrs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlaunchConfig_st __val
-    cdef cydriver.CUlaunchConfig_st* _ptr
-    cdef CUstream _hStream
-    cdef size_t _attrs_length
-    cdef cydriver.CUlaunchAttribute* _attrs
-
-{{endif}}
-{{if 'struct CUexecAffinitySmCount_st' in found_types}}
-
-cdef class CUexecAffinitySmCount_st:
-    """
-    Value for CU_EXEC_AFFINITY_TYPE_SM_COUNT
-
-    Attributes
-    ----------
-    val : unsigned int
-        The number of SMs the context is limited to use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUexecAffinitySmCount_st __val
-    cdef cydriver.CUexecAffinitySmCount_st* _ptr
-{{endif}}
-{{if 'struct CUexecAffinityParam_st' in found_types}}
-
-cdef class anon_union3:
-    """
-    Attributes
-    ----------
-    smCount : CUexecAffinitySmCount
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUexecAffinityParam_st* _ptr
-    cdef CUexecAffinitySmCount _smCount
-{{endif}}
-{{if 'struct CUexecAffinityParam_st' in found_types}}
-
-cdef class CUexecAffinityParam_st:
-    """
-    Execution Affinity Parameters
-
-    Attributes
-    ----------
-    type : CUexecAffinityType
-
-    param : anon_union3
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUexecAffinityParam_st* _val_ptr
-    cdef cydriver.CUexecAffinityParam_st* _ptr
-    cdef anon_union3 _param
-{{endif}}
-{{if 'struct CUctxCigParam_st' in found_types}}
-
-cdef class CUctxCigParam_st:
-    """
-    CIG Context Create Params
-
-    Attributes
-    ----------
-    sharedDataType : CUcigDataType
-
-    sharedData : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUctxCigParam_st __val
-    cdef cydriver.CUctxCigParam_st* _ptr
-{{endif}}
-{{if 'struct CUctxCreateParams_st' in found_types}}
-
-cdef class CUctxCreateParams_st:
-    """
-    Params for creating CUDA context Exactly one of execAffinityParams
-    and cigParams must be non-NULL.
-
-    Attributes
-    ----------
-    execAffinityParams : CUexecAffinityParam
-
-    numExecAffinityParams : int
-
-    cigParams : CUctxCigParam
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUctxCreateParams_st __val
-    cdef cydriver.CUctxCreateParams_st* _ptr
-    cdef size_t _execAffinityParams_length
-    cdef cydriver.CUexecAffinityParam* _execAffinityParams
-
-    cdef size_t _cigParams_length
-    cdef cydriver.CUctxCigParam* _cigParams
-
-{{endif}}
-{{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}}
-
-cdef class CUlibraryHostUniversalFunctionAndDataTable_st:
-    """
-    Attributes
-    ----------
-    functionTable : Any
-
-    functionWindowSize : size_t
-
-    dataTable : Any
-
-    dataWindowSize : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUlibraryHostUniversalFunctionAndDataTable_st __val
-    cdef cydriver.CUlibraryHostUniversalFunctionAndDataTable_st* _ptr
-{{endif}}
-{{if 'struct CUDA_MEMCPY2D_st' in found_types}}
-
-cdef class CUDA_MEMCPY2D_st:
-    """
-    2D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    WidthInBytes : size_t
-        Width of 2D memory copy in bytes
-    Height : size_t
-        Height of 2D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMCPY2D_st __val
-    cdef cydriver.CUDA_MEMCPY2D_st* _ptr
-    cdef CUdeviceptr _srcDevice
-    cdef CUarray _srcArray
-    cdef CUdeviceptr _dstDevice
-    cdef CUarray _dstArray
-{{endif}}
-{{if 'struct CUDA_MEMCPY3D_st' in found_types}}
-
-cdef class CUDA_MEMCPY3D_st:
-    """
-    3D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    reserved0 : Any
-        Must be NULL
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    reserved1 : Any
-        Must be NULL
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMCPY3D_st __val
-    cdef cydriver.CUDA_MEMCPY3D_st* _ptr
-    cdef CUdeviceptr _srcDevice
-    cdef CUarray _srcArray
-    cdef CUdeviceptr _dstDevice
-    cdef CUarray _dstArray
-{{endif}}
-{{if 'struct CUDA_MEMCPY3D_PEER_st' in found_types}}
-
-cdef class CUDA_MEMCPY3D_PEER_st:
-    """
-    3D memory cross-context copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcContext : CUcontext
-        Source context (ignored with srcMemoryType is CU_MEMORYTYPE_ARRAY)
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstContext : CUcontext
-        Destination context (ignored with dstMemoryType is
-        CU_MEMORYTYPE_ARRAY)
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMCPY3D_PEER_st __val
-    cdef cydriver.CUDA_MEMCPY3D_PEER_st* _ptr
-    cdef CUdeviceptr _srcDevice
-    cdef CUarray _srcArray
-    cdef CUcontext _srcContext
-    cdef CUdeviceptr _dstDevice
-    cdef CUarray _dstArray
-    cdef CUcontext _dstContext
-{{endif}}
-{{if 'struct CUDA_MEMCPY_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEMCPY_NODE_PARAMS_st:
-    """
-    Memcpy node parameters
-
-    Attributes
-    ----------
-    flags : int
-        Must be zero
-    reserved : int
-        Must be zero
-    copyCtx : CUcontext
-        Context on which to run the node
-    copyParams : CUDA_MEMCPY3D
-        Parameters for the memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEMCPY_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_MEMCPY_NODE_PARAMS_st* _ptr
-    cdef CUcontext _copyCtx
-    cdef CUDA_MEMCPY3D _copyParams
-{{endif}}
-{{if 'struct CUDA_ARRAY_DESCRIPTOR_st' in found_types}}
-
-cdef class CUDA_ARRAY_DESCRIPTOR_st:
-    """
-    Array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of array
-    Height : size_t
-        Height of array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_ARRAY_DESCRIPTOR_st __val
-    cdef cydriver.CUDA_ARRAY_DESCRIPTOR_st* _ptr
-{{endif}}
-{{if 'struct CUDA_ARRAY3D_DESCRIPTOR_st' in found_types}}
-
-cdef class CUDA_ARRAY3D_DESCRIPTOR_st:
-    """
-    3D array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of 3D array
-    Height : size_t
-        Height of 3D array
-    Depth : size_t
-        Depth of 3D array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-    Flags : unsigned int
-        Flags
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_st __val
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_st* _ptr
-{{endif}}
-{{if 'struct CUDA_ARRAY_SPARSE_PROPERTIES_st' in found_types}}
-
-cdef class anon_struct5:
-    """
-    Attributes
-    ----------
-    width : unsigned int
-
-    height : unsigned int
-
-    depth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st* _ptr
-{{endif}}
-{{if 'struct CUDA_ARRAY_SPARSE_PROPERTIES_st' in found_types}}
-
-cdef class CUDA_ARRAY_SPARSE_PROPERTIES_st:
-    """
-    CUDA array sparse properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct5
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins.
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or
-        CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st __val
-    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st* _ptr
-    cdef anon_struct5 _tileExtent
-{{endif}}
-{{if 'struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st' in found_types}}
-
-cdef class CUDA_ARRAY_MEMORY_REQUIREMENTS_st:
-    """
-    CUDA array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total required memory size
-    alignment : size_t
-        alignment requirement
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS_st __val
-    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct6:
-    """
-    Attributes
-    ----------
-    hArray : CUarray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef CUarray _hArray
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct7:
-    """
-    Attributes
-    ----------
-    hMipmappedArray : CUmipmappedArray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef CUmipmappedArray _hMipmappedArray
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct8:
-    """
-    Attributes
-    ----------
-    devPtr : CUdeviceptr
-
-    format : CUarray_format
-
-    numChannels : unsigned int
-
-    sizeInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef CUdeviceptr _devPtr
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct9:
-    """
-    Attributes
-    ----------
-    devPtr : CUdeviceptr
-
-    format : CUarray_format
-
-    numChannels : unsigned int
-
-    width : size_t
-
-    height : size_t
-
-    pitchInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef CUdeviceptr _devPtr
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct10:
-    """
-    Attributes
-    ----------
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_union4:
-    """
-    Attributes
-    ----------
-    array : anon_struct6
-
-    mipmap : anon_struct7
-
-    linear : anon_struct8
-
-    pitch2D : anon_struct9
-
-    reserved : anon_struct10
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef anon_struct6 _array
-    cdef anon_struct7 _mipmap
-    cdef anon_struct8 _linear
-    cdef anon_struct9 _pitch2D
-    cdef anon_struct10 _reserved
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class CUDA_RESOURCE_DESC_st:
-    """
-    CUDA Resource descriptor
-
-    Attributes
-    ----------
-    resType : CUresourcetype
-        Resource type
-    res : anon_union4
-
-    flags : unsigned int
-        Flags (must be zero)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _val_ptr
-    cdef cydriver.CUDA_RESOURCE_DESC_st* _ptr
-    cdef anon_union4 _res
-{{endif}}
-{{if 'struct CUDA_TEXTURE_DESC_st' in found_types}}
-
-cdef class CUDA_TEXTURE_DESC_st:
-    """
-    Texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[CUaddress_mode]
-        Address modes
-    filterMode : CUfilter_mode
-        Filter mode
-    flags : unsigned int
-        Flags
-    maxAnisotropy : unsigned int
-        Maximum anisotropy ratio
-    mipmapFilterMode : CUfilter_mode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Mipmap level bias
-    minMipmapLevelClamp : float
-        Mipmap minimum level clamp
-    maxMipmapLevelClamp : float
-        Mipmap maximum level clamp
-    borderColor : List[float]
-        Border Color
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_TEXTURE_DESC_st __val
-    cdef cydriver.CUDA_TEXTURE_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUDA_RESOURCE_VIEW_DESC_st' in found_types}}
-
-cdef class CUDA_RESOURCE_VIEW_DESC_st:
-    """
-    Resource view descriptor
-
-    Attributes
-    ----------
-    format : CUresourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_RESOURCE_VIEW_DESC_st __val
-    cdef cydriver.CUDA_RESOURCE_VIEW_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUtensorMap_st' in found_types}}
-
-cdef class CUtensorMap_st:
-    """
-    Tensor map descriptor. Requires compiler support for aligning to 64
-    bytes.
-
-    Attributes
-    ----------
-    opaque : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUtensorMap_st __val
-    cdef cydriver.CUtensorMap_st* _ptr
-{{endif}}
-{{if 'struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st' in found_types}}
-
-cdef class CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st:
-    """
-    GPU Direct v3 tokens
-
-    Attributes
-    ----------
-    p2pToken : unsigned long long
-
-    vaSpaceToken : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st __val
-    cdef cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_LAUNCH_PARAMS_st' in found_types}}
-
-cdef class CUDA_LAUNCH_PARAMS_st:
-    """
-    Kernel launch parameters
-
-    Attributes
-    ----------
-    function : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_LAUNCH_PARAMS_st __val
-    cdef cydriver.CUDA_LAUNCH_PARAMS_st* _ptr
-    cdef CUfunction _function
-    cdef CUstream _hStream
-    cdef utils.HelperKernelParams _cykernelParams
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_struct11:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_union5:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct11
-
-    nvSciBufObject : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st* _ptr
-    cdef anon_struct11 _win32
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st:
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalMemoryHandleType
-        Type of the handle
-    handle : anon_union5
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or CUDA_EXTERNAL_MEMORY_DEDICATED
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st* _val_ptr
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st* _ptr
-    cdef anon_union5 _handle
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st:
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st __val
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st:
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    arrayDesc : CUDA_ARRAY3D_DESCRIPTOR
-        Format, dimension and type of base level of the mipmap chain
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st __val
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st* _ptr
-    cdef CUDA_ARRAY3D_DESCRIPTOR _arrayDesc
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_struct12:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_union6:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct12
-
-    nvSciSyncObj : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st* _ptr
-    cdef anon_struct12 _win32
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st:
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union6
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st* _val_ptr
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st* _ptr
-    cdef anon_union6 _handle
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct13:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_union7:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct14:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct15:
-    """
-    Attributes
-    ----------
-    fence : anon_struct13
-
-    nvSciSync : anon_union7
-
-    keyedMutex : anon_struct14
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st* _ptr
-    cdef anon_struct13 _fence
-    cdef anon_union7 _nvSciSync
-    cdef anon_struct14 _keyedMutex
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st:
-    """
-    External semaphore signal parameters
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS is used to signal
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC which
-        indicates that while signaling the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st __val
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st* _ptr
-    cdef anon_struct15 _params
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct16:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_union8:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct17:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-    timeoutMs : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st* _ptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct18:
-    """
-    Attributes
-    ----------
-    fence : anon_struct16
-
-    nvSciSync : anon_union8
-
-    keyedMutex : anon_struct17
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st* _ptr
-    cdef anon_struct16 _fence
-    cdef anon_union8 _nvSciSync
-    cdef anon_struct17 _keyedMutex
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st:
-    """
-    External semaphore wait parameters
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS is used to wait on
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC which indicates
-        that while waiting for the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st __val
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st* _ptr
-    cdef anon_struct18 _params
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st:
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st* _ptr
-    cdef size_t _extSemArray_length
-    cdef cydriver.CUexternalSemaphore* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* _paramsArray
-
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st:
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st* _ptr
-    cdef size_t _extSemArray_length
-    cdef cydriver.CUexternalSemaphore* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* _paramsArray
-
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_st:
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_st* _ptr
-    cdef size_t _extSemArray_length
-    cdef cydriver.CUexternalSemaphore* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* _paramsArray
-
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st:
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st* _ptr
-    cdef size_t _extSemArray_length
-    cdef cydriver.CUexternalSemaphore* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* _paramsArray
-
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union9:
-    """
-    Attributes
-    ----------
-    mipmap : CUmipmappedArray
-
-    array : CUarray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-    cdef CUmipmappedArray _mipmap
-    cdef CUarray _array
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_struct19:
-    """
-    Attributes
-    ----------
-    level : unsigned int
-
-    layer : unsigned int
-
-    offsetX : unsigned int
-
-    offsetY : unsigned int
-
-    offsetZ : unsigned int
-
-    extentWidth : unsigned int
-
-    extentHeight : unsigned int
-
-    extentDepth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_struct20:
-    """
-    Attributes
-    ----------
-    layer : unsigned int
-
-    offset : unsigned long long
-
-    size : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union10:
-    """
-    Attributes
-    ----------
-    sparseLevel : anon_struct19
-
-    miptail : anon_struct20
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-    cdef anon_struct19 _sparseLevel
-    cdef anon_struct20 _miptail
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union11:
-    """
-    Attributes
-    ----------
-    memHandle : CUmemGenericAllocationHandle
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-    cdef CUmemGenericAllocationHandle _memHandle
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class CUarrayMapInfo_st:
-    """
-    Specifies the CUDA array or CUDA mipmapped array memory mapping
-    information
-
-    Attributes
-    ----------
-    resourceType : CUresourcetype
-        Resource type
-    resource : anon_union9
-
-    subresourceType : CUarraySparseSubresourceType
-        Sparse subresource type
-    subresource : anon_union10
-
-    memOperationType : CUmemOperationType
-        Memory operation type
-    memHandleType : CUmemHandleType
-        Memory handle type
-    memHandle : anon_union11
-
-    offset : unsigned long long
-        Offset within mip tail  Offset within the memory
-    deviceBitMask : unsigned int
-        Device ordinal bit mask
-    flags : unsigned int
-        flags for future use, must be zero now.
-    reserved : List[unsigned int]
-        Reserved for future use, must be zero now.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUarrayMapInfo_st* _val_ptr
-    cdef cydriver.CUarrayMapInfo_st* _ptr
-    cdef anon_union9 _resource
-    cdef anon_union10 _subresource
-    cdef anon_union11 _memHandle
-{{endif}}
-{{if 'struct CUmemLocation_st' in found_types}}
-
-cdef class CUmemLocation_st:
-    """
-    Specifies a memory location.
-
-    Attributes
-    ----------
-    type : CUmemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemLocation_st __val
-    cdef cydriver.CUmemLocation_st* _ptr
-{{endif}}
-{{if 'struct CUmemAllocationProp_st' in found_types}}
-
-cdef class anon_struct21:
-    """
-    Attributes
-    ----------
-    compressionType : bytes
-
-    gpuDirectRDMACapable : bytes
-
-    usage : unsigned short
-
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemAllocationProp_st* _ptr
-{{endif}}
-{{if 'struct CUmemAllocationProp_st' in found_types}}
-
-cdef class CUmemAllocationProp_st:
-    """
-    Specifies the allocation properties for a allocation.
-
-    Attributes
-    ----------
-    type : CUmemAllocationType
-        Allocation type
-    requestedHandleTypes : CUmemAllocationHandleType
-        requested CUmemAllocationHandleType
-    location : CUmemLocation
-        Location of allocation
-    win32HandleMetaData : Any
-        Windows-specific POBJECT_ATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes
-        structure includes security attributes that define the scope of
-        which exported allocations may be transferred to other processes.
-        In all other cases, this field is required to be zero.
-    allocFlags : anon_struct21
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemAllocationProp_st __val
-    cdef cydriver.CUmemAllocationProp_st* _ptr
-    cdef CUmemLocation _location
-    cdef anon_struct21 _allocFlags
-{{endif}}
-{{if 'struct CUmulticastObjectProp_st' in found_types}}
-
-cdef class CUmulticastObjectProp_st:
-    """
-    Specifies the properties for a multicast object.
-
-    Attributes
-    ----------
-    numDevices : unsigned int
-        The number of devices in the multicast team that will bind memory
-        to this object
-    size : size_t
-        The maximum amount of memory that can be bound to this multicast
-        object per device
-    handleTypes : unsigned long long
-        Bitmask of exportable handle types (see CUmemAllocationHandleType)
-        for this object
-    flags : unsigned long long
-        Flags for future use, must be zero now
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmulticastObjectProp_st __val
-    cdef cydriver.CUmulticastObjectProp_st* _ptr
-{{endif}}
-{{if 'struct CUmemAccessDesc_st' in found_types}}
-
-cdef class CUmemAccessDesc_st:
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : CUmemLocation
-        Location on which the request is to change it's accessibility
-    flags : CUmemAccess_flags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemAccessDesc_st __val
-    cdef cydriver.CUmemAccessDesc_st* _ptr
-    cdef CUmemLocation _location
-{{endif}}
-{{if 'struct CUgraphExecUpdateResultInfo_st' in found_types}}
-
-cdef class CUgraphExecUpdateResultInfo_st:
-    """
-    Result information returned by cuGraphExecUpdate
-
-    Attributes
-    ----------
-    result : CUgraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : CUgraphNode
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : CUgraphNode
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphExecUpdateResultInfo_st __val
-    cdef cydriver.CUgraphExecUpdateResultInfo_st* _ptr
-    cdef CUgraphNode _errorNode
-    cdef CUgraphNode _errorFromNode
-{{endif}}
-{{if 'struct CUmemPoolProps_st' in found_types}}
-
-cdef class CUmemPoolProps_st:
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : CUmemAllocationType
-        Allocation type. Currently must be specified as
-        CU_MEM_ALLOCATION_TYPE_PINNED
-    handleTypes : CUmemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : CUmemLocation
-        Location where allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute
-        defines the scope of which exported allocations may be transferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemPoolProps_st __val
-    cdef cydriver.CUmemPoolProps_st* _ptr
-    cdef CUmemLocation _location
-{{endif}}
-{{if 'struct CUmemPoolPtrExportData_st' in found_types}}
-
-cdef class CUmemPoolPtrExportData_st:
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemPoolPtrExportData_st __val
-    cdef cydriver.CUmemPoolPtrExportData_st* _ptr
-{{endif}}
-{{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v1_st' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v1_st:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v1_st __val
-    cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v1_st* _ptr
-    cdef CUmemPoolProps _poolProps
-    cdef size_t _accessDescs_length
-    cdef cydriver.CUmemAccessDesc* _accessDescs
-
-    cdef CUdeviceptr _dptr
-{{endif}}
-{{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v2_st:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2_st __val
-    cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2_st* _ptr
-    cdef CUmemPoolProps _poolProps
-    cdef size_t _accessDescs_length
-    cdef cydriver.CUmemAccessDesc* _accessDescs
-
-    cdef CUdeviceptr _dptr
-{{endif}}
-{{if 'struct CUDA_MEM_FREE_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEM_FREE_NODE_PARAMS_st:
-    """
-    Memory free node parameters
-
-    Attributes
-    ----------
-    dptr : CUdeviceptr
-        in: the pointer to free
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_MEM_FREE_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_MEM_FREE_NODE_PARAMS_st* _ptr
-    cdef CUdeviceptr _dptr
-{{endif}}
-{{if 'struct CUDA_CHILD_GRAPH_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_CHILD_GRAPH_NODE_PARAMS_st:
-    """
-    Child graph node parameters
-
-    Attributes
-    ----------
-    graph : CUgraph
-        The child graph to clone into the node for node creation, or a
-        handle to the graph owned by the node for node query
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS_st* _ptr
-    cdef CUgraph _graph
-{{endif}}
-{{if 'struct CUDA_EVENT_RECORD_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EVENT_RECORD_NODE_PARAMS_st:
-    """
-    Event record node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to record when the node executes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EVENT_RECORD_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_EVENT_RECORD_NODE_PARAMS_st* _ptr
-    cdef CUevent _event
-{{endif}}
-{{if 'struct CUDA_EVENT_WAIT_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EVENT_WAIT_NODE_PARAMS_st:
-    """
-    Event wait node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to wait on from the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUDA_EVENT_WAIT_NODE_PARAMS_st __val
-    cdef cydriver.CUDA_EVENT_WAIT_NODE_PARAMS_st* _ptr
-    cdef CUevent _event
-{{endif}}
-{{if 'struct CUgraphNodeParams_st' in found_types}}
-
-cdef class CUgraphNodeParams_st:
-    """
-    Graph node parameters. See cuGraphAddNode.
-
-    Attributes
-    ----------
-    type : CUgraphNodeType
-        Type of the node
-    reserved0 : List[int]
-        Reserved. Must be zero.
-    reserved1 : List[long long]
-        Padding. Unused bytes must be zero.
-    kernel : CUDA_KERNEL_NODE_PARAMS_v3
-        Kernel node parameters.
-    memcpy : CUDA_MEMCPY_NODE_PARAMS
-        Memcpy node parameters.
-    memset : CUDA_MEMSET_NODE_PARAMS_v2
-        Memset node parameters.
-    host : CUDA_HOST_NODE_PARAMS_v2
-        Host node parameters.
-    graph : CUDA_CHILD_GRAPH_NODE_PARAMS
-        Child graph node parameters.
-    eventWait : CUDA_EVENT_WAIT_NODE_PARAMS
-        Event wait node parameters.
-    eventRecord : CUDA_EVENT_RECORD_NODE_PARAMS
-        Event record node parameters.
-    extSemSignal : CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2
-        External semaphore signal node parameters.
-    extSemWait : CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2
-        External semaphore wait node parameters.
-    alloc : CUDA_MEM_ALLOC_NODE_PARAMS_v2
-        Memory allocation node parameters.
-    free : CUDA_MEM_FREE_NODE_PARAMS
-        Memory free node parameters.
-    memOp : CUDA_BATCH_MEM_OP_NODE_PARAMS_v2
-        MemOp node parameters.
-    conditional : CUDA_CONDITIONAL_NODE_PARAMS
-        Conditional node parameters.
-    reserved2 : long long
-        Reserved bytes. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphNodeParams_st* _val_ptr
-    cdef cydriver.CUgraphNodeParams_st* _ptr
-    cdef CUDA_KERNEL_NODE_PARAMS_v3 _kernel
-    cdef CUDA_MEMCPY_NODE_PARAMS _memcpy
-    cdef CUDA_MEMSET_NODE_PARAMS_v2 _memset
-    cdef CUDA_HOST_NODE_PARAMS_v2 _host
-    cdef CUDA_CHILD_GRAPH_NODE_PARAMS _graph
-    cdef CUDA_EVENT_WAIT_NODE_PARAMS _eventWait
-    cdef CUDA_EVENT_RECORD_NODE_PARAMS _eventRecord
-    cdef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2 _extSemSignal
-    cdef CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2 _extSemWait
-    cdef CUDA_MEM_ALLOC_NODE_PARAMS_v2 _alloc
-    cdef CUDA_MEM_FREE_NODE_PARAMS _free
-    cdef CUDA_BATCH_MEM_OP_NODE_PARAMS_v2 _memOp
-    cdef CUDA_CONDITIONAL_NODE_PARAMS _conditional
-{{endif}}
-{{if 'struct CUdevSmResource_st' in found_types}}
-
-cdef class CUdevSmResource_st:
-    """
-    Attributes
-    ----------
-    smCount : unsigned int
-        The amount of streaming multiprocessors available in this resource.
-        This is an output parameter only, do not write to this field.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevSmResource_st __val
-    cdef cydriver.CUdevSmResource_st* _ptr
-{{endif}}
-{{if 'struct CUdevResource_st' in found_types}}
-
-cdef class CUdevResource_st:
-    """
-    Attributes
-    ----------
-    type : CUdevResourceType
-        Type of resource, dictates which union field was last set
-    _internal_padding : bytes
-
-    sm : CUdevSmResource
-        Resource corresponding to CU_DEV_RESOURCE_TYPE_SM ``. type.
-    _oversize : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevResource_st* _val_ptr
-    cdef cydriver.CUdevResource_st* _ptr
-    cdef CUdevSmResource _sm
-{{endif}}
-{{if True}}
-
-cdef class anon_union14:
-    """
-    Attributes
-    ----------
-    pArray : List[CUarray]
-
-    pPitch : List[Any]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUeglFrame_st* _ptr
-{{endif}}
-{{if True}}
-
-cdef class CUeglFrame_st:
-    """
-    CUDA EGLFrame structure Descriptor - structure defining one frame
-    of EGL.  Each frame may contain one or more planes depending on
-    whether the surface * is Multiplanar or not.
-
-    Attributes
-    ----------
-    frame : anon_union14
-
-    width : unsigned int
-        Width of first plane
-    height : unsigned int
-        Height of first plane
-    depth : unsigned int
-        Depth of first plane
-    pitch : unsigned int
-        Pitch of first plane
-    planeCount : unsigned int
-        Number of planes
-    numChannels : unsigned int
-        Number of channels for the plane
-    frameType : CUeglFrameType
-        Array or Pitch
-    eglColorFormat : CUeglColorFormat
-        CUDA EGL Color Format
-    cuFormat : CUarray_format
-        CUDA Array Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUeglFrame_st* _val_ptr
-    cdef cydriver.CUeglFrame_st* _ptr
-    cdef anon_union14 _frame
-{{endif}}
-{{if 'CUdeviceptr' in found_types}}
-
-cdef class CUdeviceptr:
-    """
-
-    CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdeviceptr __val
-    cdef cydriver.CUdeviceptr* _ptr
-{{endif}}
-{{if 'CUdevice' in found_types}}
-
-cdef class CUdevice:
-    """
-
-    CUDA device
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevice __val
-    cdef cydriver.CUdevice* _ptr
-{{endif}}
-{{if 'CUtexObject' in found_types}}
-
-cdef class CUtexObject:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUtexObject __val
-    cdef cydriver.CUtexObject* _ptr
-{{endif}}
-{{if 'CUsurfObject' in found_types}}
-
-cdef class CUsurfObject:
-    """
-
-    An opaque value that represents a CUDA surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUsurfObject __val
-    cdef cydriver.CUsurfObject* _ptr
-{{endif}}
-{{if 'CUgraphConditionalHandle' in found_types}}
-
-cdef class CUgraphConditionalHandle:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUgraphConditionalHandle __val
-    cdef cydriver.CUgraphConditionalHandle* _ptr
-{{endif}}
-{{if 'CUuuid' in found_types}}
-
-cdef class CUuuid(CUuuid_st):
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemFabricHandle_v1' in found_types}}
-
-cdef class CUmemFabricHandle_v1(CUmemFabricHandle_st):
-    """
-    Fabric handle - An opaque handle representing a memory allocation
-    that can be exported to processes in same or different nodes. For
-    IPC between processes on different nodes they must be connected via
-    the NVSwitch fabric.
-
-    Attributes
-    ----------
-    data : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemFabricHandle' in found_types}}
-
-cdef class CUmemFabricHandle(CUmemFabricHandle_v1):
-    """
-    Fabric handle - An opaque handle representing a memory allocation
-    that can be exported to processes in same or different nodes. For
-    IPC between processes on different nodes they must be connected via
-    the NVSwitch fabric.
-
-    Attributes
-    ----------
-    data : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUipcEventHandle_v1' in found_types}}
-
-cdef class CUipcEventHandle_v1(CUipcEventHandle_st):
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUipcEventHandle' in found_types}}
-
-cdef class CUipcEventHandle(CUipcEventHandle_v1):
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUipcMemHandle_v1' in found_types}}
-
-cdef class CUipcMemHandle_v1(CUipcMemHandle_st):
-    """
-    CUDA IPC mem handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUipcMemHandle' in found_types}}
-
-cdef class CUipcMemHandle(CUipcMemHandle_v1):
-    """
-    CUDA IPC mem handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUstreamBatchMemOpParams_v1' in found_types}}
-
-cdef class CUstreamBatchMemOpParams_v1(CUstreamBatchMemOpParams_union):
-    """
-    Per-operation parameters for cuStreamBatchMemOp
-
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    waitValue : CUstreamMemOpWaitValueParams_st
-
-    writeValue : CUstreamMemOpWriteValueParams_st
-
-    flushRemoteWrites : CUstreamMemOpFlushRemoteWritesParams_st
-
-    memoryBarrier : CUstreamMemOpMemoryBarrierParams_st
-
-    pad : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUstreamBatchMemOpParams' in found_types}}
-
-cdef class CUstreamBatchMemOpParams(CUstreamBatchMemOpParams_v1):
-    """
-    Per-operation parameters for cuStreamBatchMemOp
-
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    waitValue : CUstreamMemOpWaitValueParams_st
-
-    writeValue : CUstreamMemOpWriteValueParams_st
-
-    flushRemoteWrites : CUstreamMemOpFlushRemoteWritesParams_st
-
-    memoryBarrier : CUstreamMemOpMemoryBarrierParams_st
-
-    pad : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v1(CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st):
-    """
-    Attributes
-    ----------
-    ctx : CUcontext
-
-    count : unsigned int
-
-    paramArray : CUstreamBatchMemOpParams
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS(CUDA_BATCH_MEM_OP_NODE_PARAMS_v1):
-    """
-    Attributes
-    ----------
-    ctx : CUcontext
-
-    count : unsigned int
-
-    paramArray : CUstreamBatchMemOpParams
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v2(CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st):
-    """
-    Batch memory operation node parameters
-
-    Attributes
-    ----------
-    ctx : CUcontext
-        Context to use for the operations.
-    count : unsigned int
-        Number of operations in paramArray.
-    paramArray : CUstreamBatchMemOpParams
-        Array of batch memory operations.
-    flags : unsigned int
-        Flags to control the node.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUasyncNotificationInfo' in found_types}}
-
-cdef class CUasyncNotificationInfo(CUasyncNotificationInfo_st):
-    """
-    Information passed to the user via the async notification callback
-
-    Attributes
-    ----------
-    type : CUasyncNotificationType
-
-    info : anon_union2
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUdevprop_v1' in found_types}}
-
-cdef class CUdevprop_v1(CUdevprop_st):
-    """
-    Legacy device properties
-
-    Attributes
-    ----------
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    sharedMemPerBlock : int
-        Shared memory available per block in bytes
-    totalConstantMemory : int
-        Constant memory available on device in bytes
-    SIMDWidth : int
-        Warp size in threads
-    memPitch : int
-        Maximum pitch in bytes allowed by memory copies
-    regsPerBlock : int
-        32-bit registers available per block
-    clockRate : int
-        Clock frequency in kilohertz
-    textureAlign : int
-        Alignment requirement for textures
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUdevprop' in found_types}}
-
-cdef class CUdevprop(CUdevprop_v1):
-    """
-    Legacy device properties
-
-    Attributes
-    ----------
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    sharedMemPerBlock : int
-        Shared memory available per block in bytes
-    totalConstantMemory : int
-        Constant memory available on device in bytes
-    SIMDWidth : int
-        Warp size in threads
-    memPitch : int
-        Maximum pitch in bytes allowed by memory copies
-    regsPerBlock : int
-        32-bit registers available per block
-    clockRate : int
-        Clock frequency in kilohertz
-    textureAlign : int
-        Alignment requirement for textures
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUaccessPolicyWindow_v1' in found_types}}
-
-cdef class CUaccessPolicyWindow_v1(CUaccessPolicyWindow_st):
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    num_bytes is limited by
-    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE. Partition into
-    many segments and assign segments such that: sum of "hit segments"
-    / window == approx. ratio. sum of "miss segments" / window ==
-    approx 1-ratio. Segments and ratio specifications are fitted to the
-    capabilities of the architecture. Accesses in a hit segment apply
-    the hitProp access policy. Accesses in a miss segment apply the
-    missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : CUaccessProperty
-        CUaccessProperty set for hit.
-    missProp : CUaccessProperty
-        CUaccessProperty set for miss. Must be either NORMAL or STREAMING
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUaccessPolicyWindow' in found_types}}
-
-cdef class CUaccessPolicyWindow(CUaccessPolicyWindow_v1):
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    num_bytes is limited by
-    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE. Partition into
-    many segments and assign segments such that: sum of "hit segments"
-    / window == approx. ratio. sum of "miss segments" / window ==
-    approx 1-ratio. Segments and ratio specifications are fitted to the
-    capabilities of the architecture. Accesses in a hit segment apply
-    the hitProp access policy. Accesses in a miss segment apply the
-    missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : CUaccessProperty
-        CUaccessProperty set for hit.
-    missProp : CUaccessProperty
-        CUaccessProperty set for miss. Must be either NORMAL or STREAMING
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_KERNEL_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v1(CUDA_KERNEL_NODE_PARAMS_st):
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_KERNEL_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v2(CUDA_KERNEL_NODE_PARAMS_v2_st):
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_KERNEL_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS(CUDA_KERNEL_NODE_PARAMS_v2):
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_KERNEL_NODE_PARAMS_v3' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v3(CUDA_KERNEL_NODE_PARAMS_v3_st):
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMSET_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_v1(CUDA_MEMSET_NODE_PARAMS_st):
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMSET_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS(CUDA_MEMSET_NODE_PARAMS_v1):
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMSET_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_v2(CUDA_MEMSET_NODE_PARAMS_v2_st):
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-    ctx : CUcontext
-        Context on which to run the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_HOST_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_v1(CUDA_HOST_NODE_PARAMS_st):
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_HOST_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS(CUDA_HOST_NODE_PARAMS_v1):
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_HOST_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_v2(CUDA_HOST_NODE_PARAMS_v2_st):
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUgraphEdgeData' in found_types}}
-
-cdef class CUgraphEdgeData(CUgraphEdgeData_st):
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        CU_GRAPH_KERNEL_NODE_PORT_DEFAULT,
-        CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC, or
-        CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from CUgraphDependencyType.
-        (It is typed as char due to compiler-specific layout of bitfields.)
-        See CUgraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_GRAPH_INSTANTIATE_PARAMS' in found_types}}
-
-cdef class CUDA_GRAPH_INSTANTIATE_PARAMS(CUDA_GRAPH_INSTANTIATE_PARAMS_st):
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : cuuint64_t
-        Instantiation flags
-    hUploadStream : CUstream
-        Upload stream
-    hErrNode_out : CUgraphNode
-        The node which caused instantiation to fail, if any
-    result_out : CUgraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUlaunchMemSyncDomainMap' in found_types}}
-
-cdef class CUlaunchMemSyncDomainMap(CUlaunchMemSyncDomainMap_st):
-    """
-    Memory Synchronization Domain map  See ::cudaLaunchMemSyncDomain.
-    By default, kernels are launched in domain 0. Kernel launched with
-    CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a different domain ID.
-    User may also alter the domain ID with CUlaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.  Domain ID range is
-    available through CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUlaunchAttributeValue' in found_types}}
-
-cdef class CUlaunchAttributeValue(CUlaunchAttributeValue_union):
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUlaunchAttribute' in found_types}}
-
-cdef class CUlaunchAttribute(CUlaunchAttribute_st):
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : CUlaunchAttributeID
-        Attribute to set
-    value : CUlaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUlaunchConfig' in found_types}}
-
-cdef class CUlaunchConfig(CUlaunchConfig_st):
-    """
-    CUDA extensible launch configuration
-
-    Attributes
-    ----------
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    attrs : CUlaunchAttribute
-        List of attributes; nullable if CUlaunchConfig::numAttrs == 0
-    numAttrs : unsigned int
-        Number of attributes populated in CUlaunchConfig::attrs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUkernelNodeAttrValue_v1' in found_types}}
-
-cdef class CUkernelNodeAttrValue_v1(CUlaunchAttributeValue):
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUkernelNodeAttrValue' in found_types}}
-
-cdef class CUkernelNodeAttrValue(CUkernelNodeAttrValue_v1):
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUstreamAttrValue_v1' in found_types}}
-
-cdef class CUstreamAttrValue_v1(CUlaunchAttributeValue):
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUstreamAttrValue' in found_types}}
-
-cdef class CUstreamAttrValue(CUstreamAttrValue_v1):
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUexecAffinitySmCount_v1' in found_types}}
-
-cdef class CUexecAffinitySmCount_v1(CUexecAffinitySmCount_st):
-    """
-    Value for CU_EXEC_AFFINITY_TYPE_SM_COUNT
-
-    Attributes
-    ----------
-    val : unsigned int
-        The number of SMs the context is limited to use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUexecAffinitySmCount' in found_types}}
-
-cdef class CUexecAffinitySmCount(CUexecAffinitySmCount_v1):
-    """
-    Value for CU_EXEC_AFFINITY_TYPE_SM_COUNT
-
-    Attributes
-    ----------
-    val : unsigned int
-        The number of SMs the context is limited to use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUexecAffinityParam_v1' in found_types}}
-
-cdef class CUexecAffinityParam_v1(CUexecAffinityParam_st):
-    """
-    Execution Affinity Parameters
-
-    Attributes
-    ----------
-    type : CUexecAffinityType
-
-    param : anon_union3
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUexecAffinityParam' in found_types}}
-
-cdef class CUexecAffinityParam(CUexecAffinityParam_v1):
-    """
-    Execution Affinity Parameters
-
-    Attributes
-    ----------
-    type : CUexecAffinityType
-
-    param : anon_union3
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUctxCigParam' in found_types}}
-
-cdef class CUctxCigParam(CUctxCigParam_st):
-    """
-    CIG Context Create Params
-
-    Attributes
-    ----------
-    sharedDataType : CUcigDataType
-
-    sharedData : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUctxCreateParams' in found_types}}
-
-cdef class CUctxCreateParams(CUctxCreateParams_st):
-    """
-    Params for creating CUDA context Exactly one of execAffinityParams
-    and cigParams must be non-NULL.
-
-    Attributes
-    ----------
-    execAffinityParams : CUexecAffinityParam
-
-    numExecAffinityParams : int
-
-    cigParams : CUctxCigParam
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUlibraryHostUniversalFunctionAndDataTable' in found_types}}
-
-cdef class CUlibraryHostUniversalFunctionAndDataTable(CUlibraryHostUniversalFunctionAndDataTable_st):
-    """
-    Attributes
-    ----------
-    functionTable : Any
-
-    functionWindowSize : size_t
-
-    dataTable : Any
-
-    dataWindowSize : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY2D_v2' in found_types}}
-
-cdef class CUDA_MEMCPY2D_v2(CUDA_MEMCPY2D_st):
-    """
-    2D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    WidthInBytes : size_t
-        Width of 2D memory copy in bytes
-    Height : size_t
-        Height of 2D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY2D' in found_types}}
-
-cdef class CUDA_MEMCPY2D(CUDA_MEMCPY2D_v2):
-    """
-    2D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    WidthInBytes : size_t
-        Width of 2D memory copy in bytes
-    Height : size_t
-        Height of 2D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY3D_v2' in found_types}}
-
-cdef class CUDA_MEMCPY3D_v2(CUDA_MEMCPY3D_st):
-    """
-    3D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    reserved0 : Any
-        Must be NULL
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    reserved1 : Any
-        Must be NULL
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY3D' in found_types}}
-
-cdef class CUDA_MEMCPY3D(CUDA_MEMCPY3D_v2):
-    """
-    3D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    reserved0 : Any
-        Must be NULL
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    reserved1 : Any
-        Must be NULL
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY3D_PEER_v1' in found_types}}
-
-cdef class CUDA_MEMCPY3D_PEER_v1(CUDA_MEMCPY3D_PEER_st):
-    """
-    3D memory cross-context copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcContext : CUcontext
-        Source context (ignored with srcMemoryType is CU_MEMORYTYPE_ARRAY)
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstContext : CUcontext
-        Destination context (ignored with dstMemoryType is
-        CU_MEMORYTYPE_ARRAY)
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY3D_PEER' in found_types}}
-
-cdef class CUDA_MEMCPY3D_PEER(CUDA_MEMCPY3D_PEER_v1):
-    """
-    3D memory cross-context copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcContext : CUcontext
-        Source context (ignored with srcMemoryType is CU_MEMORYTYPE_ARRAY)
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstContext : CUcontext
-        Destination context (ignored with dstMemoryType is
-        CU_MEMORYTYPE_ARRAY)
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEMCPY_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_MEMCPY_NODE_PARAMS(CUDA_MEMCPY_NODE_PARAMS_st):
-    """
-    Memcpy node parameters
-
-    Attributes
-    ----------
-    flags : int
-        Must be zero
-    reserved : int
-        Must be zero
-    copyCtx : CUcontext
-        Context on which to run the node
-    copyParams : CUDA_MEMCPY3D
-        Parameters for the memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_DESCRIPTOR_v2' in found_types}}
-
-cdef class CUDA_ARRAY_DESCRIPTOR_v2(CUDA_ARRAY_DESCRIPTOR_st):
-    """
-    Array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of array
-    Height : size_t
-        Height of array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_DESCRIPTOR' in found_types}}
-
-cdef class CUDA_ARRAY_DESCRIPTOR(CUDA_ARRAY_DESCRIPTOR_v2):
-    """
-    Array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of array
-    Height : size_t
-        Height of array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY3D_DESCRIPTOR_v2' in found_types}}
-
-cdef class CUDA_ARRAY3D_DESCRIPTOR_v2(CUDA_ARRAY3D_DESCRIPTOR_st):
-    """
-    3D array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of 3D array
-    Height : size_t
-        Height of 3D array
-    Depth : size_t
-        Depth of 3D array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-    Flags : unsigned int
-        Flags
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY3D_DESCRIPTOR' in found_types}}
-
-cdef class CUDA_ARRAY3D_DESCRIPTOR(CUDA_ARRAY3D_DESCRIPTOR_v2):
-    """
-    3D array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of 3D array
-    Height : size_t
-        Height of 3D array
-    Depth : size_t
-        Depth of 3D array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-    Flags : unsigned int
-        Flags
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_SPARSE_PROPERTIES_v1' in found_types}}
-
-cdef class CUDA_ARRAY_SPARSE_PROPERTIES_v1(CUDA_ARRAY_SPARSE_PROPERTIES_st):
-    """
-    CUDA array sparse properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct5
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins.
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or
-        CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_SPARSE_PROPERTIES' in found_types}}
-
-cdef class CUDA_ARRAY_SPARSE_PROPERTIES(CUDA_ARRAY_SPARSE_PROPERTIES_v1):
-    """
-    CUDA array sparse properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct5
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins.
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or
-        CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_MEMORY_REQUIREMENTS_v1' in found_types}}
-
-cdef class CUDA_ARRAY_MEMORY_REQUIREMENTS_v1(CUDA_ARRAY_MEMORY_REQUIREMENTS_st):
-    """
-    CUDA array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total required memory size
-    alignment : size_t
-        alignment requirement
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_ARRAY_MEMORY_REQUIREMENTS' in found_types}}
-
-cdef class CUDA_ARRAY_MEMORY_REQUIREMENTS(CUDA_ARRAY_MEMORY_REQUIREMENTS_v1):
-    """
-    CUDA array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total required memory size
-    alignment : size_t
-        alignment requirement
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_RESOURCE_DESC_v1' in found_types}}
-
-cdef class CUDA_RESOURCE_DESC_v1(CUDA_RESOURCE_DESC_st):
-    """
-    CUDA Resource descriptor
-
-    Attributes
-    ----------
-    resType : CUresourcetype
-        Resource type
-    res : anon_union4
-
-    flags : unsigned int
-        Flags (must be zero)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_RESOURCE_DESC' in found_types}}
-
-cdef class CUDA_RESOURCE_DESC(CUDA_RESOURCE_DESC_v1):
-    """
-    CUDA Resource descriptor
-
-    Attributes
-    ----------
-    resType : CUresourcetype
-        Resource type
-    res : anon_union4
-
-    flags : unsigned int
-        Flags (must be zero)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_TEXTURE_DESC_v1' in found_types}}
-
-cdef class CUDA_TEXTURE_DESC_v1(CUDA_TEXTURE_DESC_st):
-    """
-    Texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[CUaddress_mode]
-        Address modes
-    filterMode : CUfilter_mode
-        Filter mode
-    flags : unsigned int
-        Flags
-    maxAnisotropy : unsigned int
-        Maximum anisotropy ratio
-    mipmapFilterMode : CUfilter_mode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Mipmap level bias
-    minMipmapLevelClamp : float
-        Mipmap minimum level clamp
-    maxMipmapLevelClamp : float
-        Mipmap maximum level clamp
-    borderColor : List[float]
-        Border Color
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_TEXTURE_DESC' in found_types}}
-
-cdef class CUDA_TEXTURE_DESC(CUDA_TEXTURE_DESC_v1):
-    """
-    Texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[CUaddress_mode]
-        Address modes
-    filterMode : CUfilter_mode
-        Filter mode
-    flags : unsigned int
-        Flags
-    maxAnisotropy : unsigned int
-        Maximum anisotropy ratio
-    mipmapFilterMode : CUfilter_mode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Mipmap level bias
-    minMipmapLevelClamp : float
-        Mipmap minimum level clamp
-    maxMipmapLevelClamp : float
-        Mipmap maximum level clamp
-    borderColor : List[float]
-        Border Color
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_RESOURCE_VIEW_DESC_v1' in found_types}}
-
-cdef class CUDA_RESOURCE_VIEW_DESC_v1(CUDA_RESOURCE_VIEW_DESC_st):
-    """
-    Resource view descriptor
-
-    Attributes
-    ----------
-    format : CUresourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_RESOURCE_VIEW_DESC' in found_types}}
-
-cdef class CUDA_RESOURCE_VIEW_DESC(CUDA_RESOURCE_VIEW_DESC_v1):
-    """
-    Resource view descriptor
-
-    Attributes
-    ----------
-    format : CUresourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUtensorMap' in found_types}}
-
-cdef class CUtensorMap(CUtensorMap_st):
-    """
-    Tensor map descriptor. Requires compiler support for aligning to 64
-    bytes.
-
-    Attributes
-    ----------
-    opaque : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1' in found_types}}
-
-cdef class CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st):
-    """
-    GPU Direct v3 tokens
-
-    Attributes
-    ----------
-    p2pToken : unsigned long long
-
-    vaSpaceToken : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_POINTER_ATTRIBUTE_P2P_TOKENS' in found_types}}
-
-cdef class CUDA_POINTER_ATTRIBUTE_P2P_TOKENS(CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1):
-    """
-    GPU Direct v3 tokens
-
-    Attributes
-    ----------
-    p2pToken : unsigned long long
-
-    vaSpaceToken : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_LAUNCH_PARAMS_v1' in found_types}}
-
-cdef class CUDA_LAUNCH_PARAMS_v1(CUDA_LAUNCH_PARAMS_st):
-    """
-    Kernel launch parameters
-
-    Attributes
-    ----------
-    function : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_LAUNCH_PARAMS' in found_types}}
-
-cdef class CUDA_LAUNCH_PARAMS(CUDA_LAUNCH_PARAMS_v1):
-    """
-    Kernel launch parameters
-
-    Attributes
-    ----------
-    function : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st):
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalMemoryHandleType
-        Type of the handle
-    handle : anon_union5
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or CUDA_EXTERNAL_MEMORY_DEDICATED
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_HANDLE_DESC' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_HANDLE_DESC(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1):
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalMemoryHandleType
-        Type of the handle
-    handle : anon_union5
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or CUDA_EXTERNAL_MEMORY_DEDICATED
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st):
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_BUFFER_DESC' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_BUFFER_DESC(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1):
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st):
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    arrayDesc : CUDA_ARRAY3D_DESCRIPTOR
-        Format, dimension and type of base level of the mipmap chain
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1):
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    arrayDesc : CUDA_ARRAY3D_DESCRIPTOR
-        Format, dimension and type of base level of the mipmap chain
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st):
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union6
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1):
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union6
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st):
-    """
-    External semaphore signal parameters
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS is used to signal
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC which
-        indicates that while signaling the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1):
-    """
-    External semaphore signal parameters
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS is used to signal
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC which
-        indicates that while signaling the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st):
-    """
-    External semaphore wait parameters
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS is used to wait on
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC which indicates
-        that while waiting for the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1):
-    """
-    External semaphore wait parameters
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS is used to wait on
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC which indicates
-        that while waiting for the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st):
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1):
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2(CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st):
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1(CUDA_EXT_SEM_WAIT_NODE_PARAMS_st):
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS(CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1):
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2(CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st):
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemGenericAllocationHandle' in found_types}}
-
-cdef class CUmemGenericAllocationHandle:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemGenericAllocationHandle __val
-    cdef cydriver.CUmemGenericAllocationHandle* _ptr
-{{endif}}
-{{if 'CUarrayMapInfo_v1' in found_types}}
-
-cdef class CUarrayMapInfo_v1(CUarrayMapInfo_st):
-    """
-    Specifies the CUDA array or CUDA mipmapped array memory mapping
-    information
-
-    Attributes
-    ----------
-    resourceType : CUresourcetype
-        Resource type
-    resource : anon_union9
-
-    subresourceType : CUarraySparseSubresourceType
-        Sparse subresource type
-    subresource : anon_union10
-
-    memOperationType : CUmemOperationType
-        Memory operation type
-    memHandleType : CUmemHandleType
-        Memory handle type
-    memHandle : anon_union11
-
-    offset : unsigned long long
-        Offset within mip tail  Offset within the memory
-    deviceBitMask : unsigned int
-        Device ordinal bit mask
-    flags : unsigned int
-        flags for future use, must be zero now.
-    reserved : List[unsigned int]
-        Reserved for future use, must be zero now.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUarrayMapInfo' in found_types}}
-
-cdef class CUarrayMapInfo(CUarrayMapInfo_v1):
-    """
-    Specifies the CUDA array or CUDA mipmapped array memory mapping
-    information
-
-    Attributes
-    ----------
-    resourceType : CUresourcetype
-        Resource type
-    resource : anon_union9
-
-    subresourceType : CUarraySparseSubresourceType
-        Sparse subresource type
-    subresource : anon_union10
-
-    memOperationType : CUmemOperationType
-        Memory operation type
-    memHandleType : CUmemHandleType
-        Memory handle type
-    memHandle : anon_union11
-
-    offset : unsigned long long
-        Offset within mip tail  Offset within the memory
-    deviceBitMask : unsigned int
-        Device ordinal bit mask
-    flags : unsigned int
-        flags for future use, must be zero now.
-    reserved : List[unsigned int]
-        Reserved for future use, must be zero now.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemLocation_v1' in found_types}}
-
-cdef class CUmemLocation_v1(CUmemLocation_st):
-    """
-    Specifies a memory location.
-
-    Attributes
-    ----------
-    type : CUmemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemLocation' in found_types}}
-
-cdef class CUmemLocation(CUmemLocation_v1):
-    """
-    Specifies a memory location.
-
-    Attributes
-    ----------
-    type : CUmemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemAllocationProp_v1' in found_types}}
-
-cdef class CUmemAllocationProp_v1(CUmemAllocationProp_st):
-    """
-    Specifies the allocation properties for a allocation.
-
-    Attributes
-    ----------
-    type : CUmemAllocationType
-        Allocation type
-    requestedHandleTypes : CUmemAllocationHandleType
-        requested CUmemAllocationHandleType
-    location : CUmemLocation
-        Location of allocation
-    win32HandleMetaData : Any
-        Windows-specific POBJECT_ATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes
-        structure includes security attributes that define the scope of
-        which exported allocations may be transferred to other processes.
-        In all other cases, this field is required to be zero.
-    allocFlags : anon_struct21
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemAllocationProp' in found_types}}
-
-cdef class CUmemAllocationProp(CUmemAllocationProp_v1):
-    """
-    Specifies the allocation properties for a allocation.
-
-    Attributes
-    ----------
-    type : CUmemAllocationType
-        Allocation type
-    requestedHandleTypes : CUmemAllocationHandleType
-        requested CUmemAllocationHandleType
-    location : CUmemLocation
-        Location of allocation
-    win32HandleMetaData : Any
-        Windows-specific POBJECT_ATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes
-        structure includes security attributes that define the scope of
-        which exported allocations may be transferred to other processes.
-        In all other cases, this field is required to be zero.
-    allocFlags : anon_struct21
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmulticastObjectProp_v1' in found_types}}
-
-cdef class CUmulticastObjectProp_v1(CUmulticastObjectProp_st):
-    """
-    Specifies the properties for a multicast object.
-
-    Attributes
-    ----------
-    numDevices : unsigned int
-        The number of devices in the multicast team that will bind memory
-        to this object
-    size : size_t
-        The maximum amount of memory that can be bound to this multicast
-        object per device
-    handleTypes : unsigned long long
-        Bitmask of exportable handle types (see CUmemAllocationHandleType)
-        for this object
-    flags : unsigned long long
-        Flags for future use, must be zero now
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmulticastObjectProp' in found_types}}
-
-cdef class CUmulticastObjectProp(CUmulticastObjectProp_v1):
-    """
-    Specifies the properties for a multicast object.
-
-    Attributes
-    ----------
-    numDevices : unsigned int
-        The number of devices in the multicast team that will bind memory
-        to this object
-    size : size_t
-        The maximum amount of memory that can be bound to this multicast
-        object per device
-    handleTypes : unsigned long long
-        Bitmask of exportable handle types (see CUmemAllocationHandleType)
-        for this object
-    flags : unsigned long long
-        Flags for future use, must be zero now
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemAccessDesc_v1' in found_types}}
-
-cdef class CUmemAccessDesc_v1(CUmemAccessDesc_st):
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : CUmemLocation
-        Location on which the request is to change it's accessibility
-    flags : CUmemAccess_flags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemAccessDesc' in found_types}}
-
-cdef class CUmemAccessDesc(CUmemAccessDesc_v1):
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : CUmemLocation
-        Location on which the request is to change it's accessibility
-    flags : CUmemAccess_flags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUgraphExecUpdateResultInfo_v1' in found_types}}
-
-cdef class CUgraphExecUpdateResultInfo_v1(CUgraphExecUpdateResultInfo_st):
-    """
-    Result information returned by cuGraphExecUpdate
-
-    Attributes
-    ----------
-    result : CUgraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : CUgraphNode
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : CUgraphNode
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUgraphExecUpdateResultInfo' in found_types}}
-
-cdef class CUgraphExecUpdateResultInfo(CUgraphExecUpdateResultInfo_v1):
-    """
-    Result information returned by cuGraphExecUpdate
-
-    Attributes
-    ----------
-    result : CUgraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : CUgraphNode
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : CUgraphNode
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemPoolProps_v1' in found_types}}
-
-cdef class CUmemPoolProps_v1(CUmemPoolProps_st):
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : CUmemAllocationType
-        Allocation type. Currently must be specified as
-        CU_MEM_ALLOCATION_TYPE_PINNED
-    handleTypes : CUmemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : CUmemLocation
-        Location where allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute
-        defines the scope of which exported allocations may be transferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemPoolProps' in found_types}}
-
-cdef class CUmemPoolProps(CUmemPoolProps_v1):
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : CUmemAllocationType
-        Allocation type. Currently must be specified as
-        CU_MEM_ALLOCATION_TYPE_PINNED
-    handleTypes : CUmemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : CUmemLocation
-        Location where allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute
-        defines the scope of which exported allocations may be transferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemPoolPtrExportData_v1' in found_types}}
-
-cdef class CUmemPoolPtrExportData_v1(CUmemPoolPtrExportData_st):
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUmemPoolPtrExportData' in found_types}}
-
-cdef class CUmemPoolPtrExportData(CUmemPoolPtrExportData_v1):
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEM_ALLOC_NODE_PARAMS_v1' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v1(CUDA_MEM_ALLOC_NODE_PARAMS_v1_st):
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEM_ALLOC_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS(CUDA_MEM_ALLOC_NODE_PARAMS_v1):
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEM_ALLOC_NODE_PARAMS_v2' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v2(CUDA_MEM_ALLOC_NODE_PARAMS_v2_st):
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_MEM_FREE_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_MEM_FREE_NODE_PARAMS(CUDA_MEM_FREE_NODE_PARAMS_st):
-    """
-    Memory free node parameters
-
-    Attributes
-    ----------
-    dptr : CUdeviceptr
-        in: the pointer to free
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_CHILD_GRAPH_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_CHILD_GRAPH_NODE_PARAMS(CUDA_CHILD_GRAPH_NODE_PARAMS_st):
-    """
-    Child graph node parameters
-
-    Attributes
-    ----------
-    graph : CUgraph
-        The child graph to clone into the node for node creation, or a
-        handle to the graph owned by the node for node query
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EVENT_RECORD_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_EVENT_RECORD_NODE_PARAMS(CUDA_EVENT_RECORD_NODE_PARAMS_st):
-    """
-    Event record node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to record when the node executes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUDA_EVENT_WAIT_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_EVENT_WAIT_NODE_PARAMS(CUDA_EVENT_WAIT_NODE_PARAMS_st):
-    """
-    Event wait node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to wait on from the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUgraphNodeParams' in found_types}}
-
-cdef class CUgraphNodeParams(CUgraphNodeParams_st):
-    """
-    Graph node parameters. See cuGraphAddNode.
-
-    Attributes
-    ----------
-    type : CUgraphNodeType
-        Type of the node
-    reserved0 : List[int]
-        Reserved. Must be zero.
-    reserved1 : List[long long]
-        Padding. Unused bytes must be zero.
-    kernel : CUDA_KERNEL_NODE_PARAMS_v3
-        Kernel node parameters.
-    memcpy : CUDA_MEMCPY_NODE_PARAMS
-        Memcpy node parameters.
-    memset : CUDA_MEMSET_NODE_PARAMS_v2
-        Memset node parameters.
-    host : CUDA_HOST_NODE_PARAMS_v2
-        Host node parameters.
-    graph : CUDA_CHILD_GRAPH_NODE_PARAMS
-        Child graph node parameters.
-    eventWait : CUDA_EVENT_WAIT_NODE_PARAMS
-        Event wait node parameters.
-    eventRecord : CUDA_EVENT_RECORD_NODE_PARAMS
-        Event record node parameters.
-    extSemSignal : CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2
-        External semaphore signal node parameters.
-    extSemWait : CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2
-        External semaphore wait node parameters.
-    alloc : CUDA_MEM_ALLOC_NODE_PARAMS_v2
-        Memory allocation node parameters.
-    free : CUDA_MEM_FREE_NODE_PARAMS
-        Memory free node parameters.
-    memOp : CUDA_BATCH_MEM_OP_NODE_PARAMS_v2
-        MemOp node parameters.
-    conditional : CUDA_CONDITIONAL_NODE_PARAMS
-        Conditional node parameters.
-    reserved2 : long long
-        Reserved bytes. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'CUdevSmResource' in found_types}}
-
-cdef class CUdevSmResource(CUdevSmResource_st):
-    """
-    Attributes
-    ----------
-    smCount : unsigned int
-        The amount of streaming multiprocessors available in this resource.
-        This is an output parameter only, do not write to this field.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'struct CUdevResource_st' in found_types}}
-
-cdef class CUdevResource_v1(CUdevResource_st):
-    """
-    Attributes
-    ----------
-    type : CUdevResourceType
-        Type of resource, dictates which union field was last set
-    _internal_padding : bytes
-
-    sm : CUdevSmResource
-        Resource corresponding to CU_DEV_RESOURCE_TYPE_SM ``. type.
-    _oversize : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'struct CUdevResource_st' in found_types}}
-
-cdef class CUdevResource(CUdevResource_v1):
-    """
-    Attributes
-    ----------
-    type : CUdevResourceType
-        Type of resource, dictates which union field was last set
-    _internal_padding : bytes
-
-    sm : CUdevSmResource
-        Resource corresponding to CU_DEV_RESOURCE_TYPE_SM ``. type.
-    _oversize : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class CUeglFrame_v1(CUeglFrame_st):
-    """
-    CUDA EGLFrame structure Descriptor - structure defining one frame
-    of EGL.  Each frame may contain one or more planes depending on
-    whether the surface * is Multiplanar or not.
-
-    Attributes
-    ----------
-    frame : anon_union14
-
-    width : unsigned int
-        Width of first plane
-    height : unsigned int
-        Height of first plane
-    depth : unsigned int
-        Depth of first plane
-    pitch : unsigned int
-        Pitch of first plane
-    planeCount : unsigned int
-        Number of planes
-    numChannels : unsigned int
-        Number of channels for the plane
-    frameType : CUeglFrameType
-        Array or Pitch
-    eglColorFormat : CUeglColorFormat
-        CUDA EGL Color Format
-    cuFormat : CUarray_format
-        CUDA Array Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class CUeglFrame(CUeglFrame_v1):
-    """
-    CUDA EGLFrame structure Descriptor - structure defining one frame
-    of EGL.  Each frame may contain one or more planes depending on
-    whether the surface * is Multiplanar or not.
-
-    Attributes
-    ----------
-    frame : anon_union14
-
-    width : unsigned int
-        Width of first plane
-    height : unsigned int
-        Height of first plane
-    depth : unsigned int
-        Depth of first plane
-    pitch : unsigned int
-        Pitch of first plane
-    planeCount : unsigned int
-        Number of planes
-    numChannels : unsigned int
-        Number of channels for the plane
-    frameType : CUeglFrameType
-        Array or Pitch
-    eglColorFormat : CUeglColorFormat
-        CUDA EGL Color Format
-    cuFormat : CUarray_format
-        CUDA Array Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cuuint32_t' in found_types}}
-
-cdef class cuuint32_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.cuuint32_t  __val
-    cdef cydriver.cuuint32_t* _ptr
-{{endif}}
-
-{{if 'cuuint64_t' in found_types}}
-
-cdef class cuuint64_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.cuuint64_t  __val
-    cdef cydriver.cuuint64_t* _ptr
-{{endif}}
-
-{{if 'CUdeviceptr_v2' in found_types}}
-
-cdef class CUdeviceptr_v2:
-    """
-
-    CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdeviceptr_v2  __val
-    cdef cydriver.CUdeviceptr_v2* _ptr
-{{endif}}
-
-{{if 'CUdevice_v1' in found_types}}
-
-cdef class CUdevice_v1:
-    """
-
-    CUDA device
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUdevice_v1  __val
-    cdef cydriver.CUdevice_v1* _ptr
-{{endif}}
-
-{{if 'CUtexObject_v1' in found_types}}
-
-cdef class CUtexObject_v1:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUtexObject_v1  __val
-    cdef cydriver.CUtexObject_v1* _ptr
-{{endif}}
-
-{{if 'CUsurfObject_v1' in found_types}}
-
-cdef class CUsurfObject_v1:
-    """
-
-    An opaque value that represents a CUDA surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUsurfObject_v1  __val
-    cdef cydriver.CUsurfObject_v1* _ptr
-{{endif}}
-
-{{if 'CUmemGenericAllocationHandle_v1' in found_types}}
-
-cdef class CUmemGenericAllocationHandle_v1:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.CUmemGenericAllocationHandle_v1  __val
-    cdef cydriver.CUmemGenericAllocationHandle_v1* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLenum:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.GLenum  __val
-    cdef cydriver.GLenum* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLuint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.GLuint  __val
-    cdef cydriver.GLuint* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.EGLint  __val
-    cdef cydriver.EGLint* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpDevice:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.VdpDevice  __val
-    cdef cydriver.VdpDevice* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpGetProcAddress:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.VdpGetProcAddress  __val
-    cdef cydriver.VdpGetProcAddress* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpVideoSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.VdpVideoSurface  __val
-    cdef cydriver.VdpVideoSurface* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpOutputSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cydriver.VdpOutputSurface  __val
-    cdef cydriver.VdpOutputSurface* _ptr
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in
deleted file mode 100644
index 8bbdd980..00000000
--- a/cuda_bindings/cuda/bindings/driver.pyx.in
+++ /dev/null
@@ -1,46867 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from typing import List, Tuple, Any, Optional
-from enum import IntEnum
-import cython
-import ctypes
-from libc.stdlib cimport calloc, free
-from libc cimport string
-from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
-from libc.stddef cimport wchar_t
-from libc.limits cimport CHAR_MIN
-from libcpp.vector cimport vector
-from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
-from cpython.bytes cimport PyBytes_FromStringAndSize
-
-ctypedef unsigned long long signed_char_ptr
-ctypedef unsigned long long unsigned_char_ptr
-ctypedef unsigned long long char_ptr
-ctypedef unsigned long long short_ptr
-ctypedef unsigned long long unsigned_short_ptr
-ctypedef unsigned long long int_ptr
-ctypedef unsigned long long long_int_ptr
-ctypedef unsigned long long long_long_int_ptr
-ctypedef unsigned long long unsigned_int_ptr
-ctypedef unsigned long long unsigned_long_int_ptr
-ctypedef unsigned long long unsigned_long_long_int_ptr
-ctypedef unsigned long long uint32_t_ptr
-ctypedef unsigned long long uint64_t_ptr
-ctypedef unsigned long long int32_t_ptr
-ctypedef unsigned long long int64_t_ptr
-ctypedef unsigned long long unsigned_ptr
-ctypedef unsigned long long unsigned_long_long_ptr
-ctypedef unsigned long long long_long_ptr
-ctypedef unsigned long long size_t_ptr
-ctypedef unsigned long long float_ptr
-ctypedef unsigned long long double_ptr
-ctypedef unsigned long long void_ptr
-
-#: CUDA API version number
-CUDA_VERSION = cydriver.CUDA_VERSION
-
-#: CUDA IPC handle size
-CU_IPC_HANDLE_SIZE = cydriver.CU_IPC_HANDLE_SIZE
-
-#: Legacy stream handle
-#:
-#: Stream handle that can be passed as a CUstream to use an implicit stream
-#: with legacy synchronization behavior.
-#:
-#: See details of the \link_sync_behavior
-CU_STREAM_LEGACY = cydriver.CU_STREAM_LEGACY
-
-#: Per-thread stream handle
-#:
-#: Stream handle that can be passed as a CUstream to use an implicit stream
-#: with per-thread synchronization behavior.
-#:
-#: See details of the \link_sync_behavior
-CU_STREAM_PER_THREAD = cydriver.CU_STREAM_PER_THREAD
-
-CU_COMPUTE_ACCELERATED_TARGET_BASE = cydriver.CU_COMPUTE_ACCELERATED_TARGET_BASE
-
-#: Conditional node handle flags Default value is applied when graph is
-#: launched.
-CU_GRAPH_COND_ASSIGN_DEFAULT = cydriver.CU_GRAPH_COND_ASSIGN_DEFAULT
-
-#: This port activates when the kernel has finished executing.
-CU_GRAPH_KERNEL_NODE_PORT_DEFAULT = cydriver.CU_GRAPH_KERNEL_NODE_PORT_DEFAULT
-
-#: This port activates when all blocks of the kernel have performed
-#: cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be
-#: used with edge type :py:obj:`~.CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC`.
-#: See also :py:obj:`~.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`.
-CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC = cydriver.CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC
-
-#: This port activates when all blocks of the kernel have begun execution.
-#: See also :py:obj:`~.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT`.
-CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER = cydriver.CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER
-
-CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = cydriver.CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW
-
-CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = cydriver.CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE
-
-CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION = cydriver.CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION
-
-CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = cydriver.CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
-
-CU_KERNEL_NODE_ATTRIBUTE_PRIORITY = cydriver.CU_KERNEL_NODE_ATTRIBUTE_PRIORITY
-
-CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = cydriver.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
-
-CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = cydriver.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN
-
-CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = cydriver.CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE
-
-CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = cydriver.CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-
-CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = cydriver.CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW
-
-CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = cydriver.CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY
-
-CU_STREAM_ATTRIBUTE_PRIORITY = cydriver.CU_STREAM_ATTRIBUTE_PRIORITY
-
-CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = cydriver.CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
-
-CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN = cydriver.CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN
-
-#: If set, host memory is portable between CUDA contexts. Flag for
-#: :py:obj:`~.cuMemHostAlloc()`
-CU_MEMHOSTALLOC_PORTABLE = cydriver.CU_MEMHOSTALLOC_PORTABLE
-
-#: If set, host memory is mapped into CUDA address space and
-#: :py:obj:`~.cuMemHostGetDevicePointer()` may be called on the host
-#: pointer. Flag for :py:obj:`~.cuMemHostAlloc()`
-CU_MEMHOSTALLOC_DEVICEMAP = cydriver.CU_MEMHOSTALLOC_DEVICEMAP
-
-#: If set, host memory is allocated as write-combined - fast to write,
-#: faster to DMA, slow to read except via SSE4 streaming load instruction
-#: (MOVNTDQA). Flag for :py:obj:`~.cuMemHostAlloc()`
-CU_MEMHOSTALLOC_WRITECOMBINED = cydriver.CU_MEMHOSTALLOC_WRITECOMBINED
-
-#: If set, host memory is portable between CUDA contexts. Flag for
-#: :py:obj:`~.cuMemHostRegister()`
-CU_MEMHOSTREGISTER_PORTABLE = cydriver.CU_MEMHOSTREGISTER_PORTABLE
-
-#: If set, host memory is mapped into CUDA address space and
-#: :py:obj:`~.cuMemHostGetDevicePointer()` may be called on the host
-#: pointer. Flag for :py:obj:`~.cuMemHostRegister()`
-CU_MEMHOSTREGISTER_DEVICEMAP = cydriver.CU_MEMHOSTREGISTER_DEVICEMAP
-
-#: If set, the passed memory pointer is treated as pointing to some memory-
-#: mapped I/O space, e.g. belonging to a third-party PCIe device. On
-#: Windows the flag is a no-op. On Linux that memory is marked as non
-#: cache-coherent for the GPU and is expected to be physically contiguous.
-#: It may return :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` if run as an
-#: unprivileged user, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` on older Linux
-#: kernel versions. On all other platforms, it is not supported and
-#: :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` is returned. Flag for
-#: :py:obj:`~.cuMemHostRegister()`
-CU_MEMHOSTREGISTER_IOMEMORY = cydriver.CU_MEMHOSTREGISTER_IOMEMORY
-
-#: If set, the passed memory pointer is treated as pointing to memory that
-#: is considered read-only by the device. On platforms without
-#: :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-#: this flag is required in order to register memory mapped to the CPU as
-#: read-only. Support for the use of this flag can be queried from the
-#: device attribute
-#: :py:obj:`~.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED`. Using
-#: this flag with a current context associated with a device that does not
-#: have this attribute set will cause :py:obj:`~.cuMemHostRegister` to
-#: error with :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`.
-CU_MEMHOSTREGISTER_READ_ONLY = cydriver.CU_MEMHOSTREGISTER_READ_ONLY
-
-#: Indicates that the layered sparse CUDA array or CUDA mipmapped array has
-#: a single mip tail region for all layers
-CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL = cydriver.CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-
-#: Size of tensor map descriptor
-CU_TENSOR_MAP_NUM_QWORDS = cydriver.CU_TENSOR_MAP_NUM_QWORDS
-
-#: Indicates that the external memory object is a dedicated resource
-CUDA_EXTERNAL_MEMORY_DEDICATED = cydriver.CUDA_EXTERNAL_MEMORY_DEDICATED
-
-#: When the `flags` parameter of
-#: :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS` contains this flag, it
-#: indicates that signaling an external semaphore object should skip
-#: performing appropriate memory synchronization operations over all the
-#: external memory objects that are imported as
-#: :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, which otherwise are
-#: performed by default to ensure data coherency with other importers of
-#: the same NvSciBuf memory objects.
-CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC = cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC
-
-#: When the `flags` parameter of
-#: :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS` contains this flag, it
-#: indicates that waiting on an external semaphore object should skip
-#: performing appropriate memory synchronization operations over all the
-#: external memory objects that are imported as
-#: :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, which otherwise are
-#: performed by default to ensure data coherency with other importers of
-#: the same NvSciBuf memory objects.
-CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC = cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC
-
-#: When `flags` of :py:obj:`~.cuDeviceGetNvSciSyncAttributes` is set to
-#: this, it indicates that application needs signaler specific
-#: NvSciSyncAttr to be filled by
-#: :py:obj:`~.cuDeviceGetNvSciSyncAttributes`.
-CUDA_NVSCISYNC_ATTR_SIGNAL = cydriver.CUDA_NVSCISYNC_ATTR_SIGNAL
-
-#: When `flags` of :py:obj:`~.cuDeviceGetNvSciSyncAttributes` is set to
-#: this, it indicates that application needs waiter specific NvSciSyncAttr
-#: to be filled by :py:obj:`~.cuDeviceGetNvSciSyncAttributes`.
-CUDA_NVSCISYNC_ATTR_WAIT = cydriver.CUDA_NVSCISYNC_ATTR_WAIT
-
-#: This flag if set indicates that the memory will be used as a tile pool.
-CU_MEM_CREATE_USAGE_TILE_POOL = cydriver.CU_MEM_CREATE_USAGE_TILE_POOL
-
-#: If set, each kernel launched as part of
-#: :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` only waits for prior
-#: work in the stream corresponding to that GPU to complete before the
-#: kernel begins execution.
-CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC = cydriver.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC
-
-#: If set, any subsequent work pushed in a stream that participated in a
-#: call to :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` will only wait
-#: for the kernel launched on the GPU corresponding to that stream to
-#: complete before it begins execution.
-CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC = cydriver.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC
-
-#: If set, the CUDA array is a collection of layers, where each layer is
-#: either a 1D or a 2D array and the Depth member of
-#: CUDA_ARRAY3D_DESCRIPTOR specifies the number of layers, not the depth of
-#: a 3D array.
-CUDA_ARRAY3D_LAYERED = cydriver.CUDA_ARRAY3D_LAYERED
-
-#: Deprecated, use CUDA_ARRAY3D_LAYERED
-CUDA_ARRAY3D_2DARRAY = cydriver.CUDA_ARRAY3D_2DARRAY
-
-#: This flag must be set in order to bind a surface reference to the CUDA
-#: array
-CUDA_ARRAY3D_SURFACE_LDST = cydriver.CUDA_ARRAY3D_SURFACE_LDST
-
-#: If set, the CUDA array is a collection of six 2D arrays, representing
-#: faces of a cube. The width of such a CUDA array must be equal to its
-#: height, and Depth must be six. If :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag
-#: is also set, then the CUDA array is a collection of cubemaps and Depth
-#: must be a multiple of six.
-CUDA_ARRAY3D_CUBEMAP = cydriver.CUDA_ARRAY3D_CUBEMAP
-
-#: This flag must be set in order to perform texture gather operations on a
-#: CUDA array.
-CUDA_ARRAY3D_TEXTURE_GATHER = cydriver.CUDA_ARRAY3D_TEXTURE_GATHER
-
-#: This flag if set indicates that the CUDA array is a DEPTH_TEXTURE.
-CUDA_ARRAY3D_DEPTH_TEXTURE = cydriver.CUDA_ARRAY3D_DEPTH_TEXTURE
-
-#: This flag indicates that the CUDA array may be bound as a color target
-#: in an external graphics API
-CUDA_ARRAY3D_COLOR_ATTACHMENT = cydriver.CUDA_ARRAY3D_COLOR_ATTACHMENT
-
-#: This flag if set indicates that the CUDA array or CUDA mipmapped array
-#: is a sparse CUDA array or CUDA mipmapped array respectively
-CUDA_ARRAY3D_SPARSE = cydriver.CUDA_ARRAY3D_SPARSE
-
-#: This flag if set indicates that the CUDA array or CUDA mipmapped array
-#: will allow deferred memory mapping
-CUDA_ARRAY3D_DEFERRED_MAPPING = cydriver.CUDA_ARRAY3D_DEFERRED_MAPPING
-
-#: This flag indicates that the CUDA array will be used for hardware
-#: accelerated video encode/decode operations.
-CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = cydriver.CUDA_ARRAY3D_VIDEO_ENCODE_DECODE
-
-#: Override the texref format with a format inferred from the array. Flag
-#: for :py:obj:`~.cuTexRefSetArray()`
-CU_TRSA_OVERRIDE_FORMAT = cydriver.CU_TRSA_OVERRIDE_FORMAT
-
-#: Read the texture as integers rather than promoting the values to floats
-#: in the range [0,1]. Flag for :py:obj:`~.cuTexRefSetFlags()` and
-#: :py:obj:`~.cuTexObjectCreate()`
-CU_TRSF_READ_AS_INTEGER = cydriver.CU_TRSF_READ_AS_INTEGER
-
-#: Use normalized texture coordinates in the range [0,1) instead of
-#: [0,dim). Flag for :py:obj:`~.cuTexRefSetFlags()` and
-#: :py:obj:`~.cuTexObjectCreate()`
-CU_TRSF_NORMALIZED_COORDINATES = cydriver.CU_TRSF_NORMALIZED_COORDINATES
-
-#: Perform sRGB->linear conversion during texture read. Flag for
-#: :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-CU_TRSF_SRGB = cydriver.CU_TRSF_SRGB
-
-#: Disable any trilinear filtering optimizations. Flag for
-#: :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION = cydriver.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION
-
-#: Enable seamless cube map filtering. Flag for
-#: :py:obj:`~.cuTexObjectCreate()`
-CU_TRSF_SEAMLESS_CUBEMAP = cydriver.CU_TRSF_SEAMLESS_CUBEMAP
-
-#: C++ compile time constant for CU_LAUNCH_PARAM_END
-CU_LAUNCH_PARAM_END_AS_INT = cydriver.CU_LAUNCH_PARAM_END_AS_INT
-
-#: End of array terminator for the `extra` parameter to
-#: :py:obj:`~.cuLaunchKernel`
-CU_LAUNCH_PARAM_END = cydriver.CU_LAUNCH_PARAM_END
-
-#: C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_POINTER
-CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT = cydriver.CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT
-
-#: Indicator that the next value in the `extra` parameter to
-#: :py:obj:`~.cuLaunchKernel` will be a pointer to a buffer containing all
-#: kernel parameters used for launching kernel `f`. This buffer needs to
-#: honor all alignment/padding requirements of the individual parameters.
-#: If :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE` is not also specified in the
-#: `extra` array, then :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER` will have
-#: no effect.
-CU_LAUNCH_PARAM_BUFFER_POINTER = cydriver.CU_LAUNCH_PARAM_BUFFER_POINTER
-
-#: C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_SIZE
-CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT = cydriver.CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT
-
-#: Indicator that the next value in the `extra` parameter to
-#: :py:obj:`~.cuLaunchKernel` will be a pointer to a size_t which contains
-#: the size of the buffer specified with
-#: :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`. It is required that
-#: :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER` also be specified in the
-#: `extra` array if the value associated with
-#: :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE` is not zero.
-CU_LAUNCH_PARAM_BUFFER_SIZE = cydriver.CU_LAUNCH_PARAM_BUFFER_SIZE
-
-#: For texture references loaded into the module, use default texunit from
-#: texture reference.
-CU_PARAM_TR_DEFAULT = cydriver.CU_PARAM_TR_DEFAULT
-
-#: Device that represents the CPU
-CU_DEVICE_CPU = cydriver.CU_DEVICE_CPU
-
-#: Device that represents an invalid device
-CU_DEVICE_INVALID = cydriver.CU_DEVICE_INVALID
-
-RESOURCE_ABI_VERSION = cydriver.RESOURCE_ABI_VERSION
-
-RESOURCE_ABI_EXTERNAL_BYTES = cydriver.RESOURCE_ABI_EXTERNAL_BYTES
-
-#: Maximum number of planes per frame
-MAX_PLANES = cydriver.MAX_PLANES
-
-#: Indicates that timeout for :py:obj:`~.cuEGLStreamConsumerAcquireFrame`
-#: is infinite.
-CUDA_EGL_INFINITE_TIMEOUT = cydriver.CUDA_EGL_INFINITE_TIMEOUT
-
-{{if 'CUipcMem_flags_enum' in found_types}}
-
-class CUipcMem_flags(IntEnum):
-    """
-    CUDA Ipc Mem Flags
-    """
-    {{if 'CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS' in found_values}}
-
-    #: Automatically enable peer access between remote devices as needed
-    CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = cydriver.CUipcMem_flags_enum.CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS{{endif}}
-{{endif}}
-{{if 'CUmemAttach_flags_enum' in found_types}}
-
-class CUmemAttach_flags(IntEnum):
-    """
-    CUDA Mem Attach Flags
-    """
-    {{if 'CU_MEM_ATTACH_GLOBAL' in found_values}}
-
-    #: Memory can be accessed by any stream on any device
-    CU_MEM_ATTACH_GLOBAL = cydriver.CUmemAttach_flags_enum.CU_MEM_ATTACH_GLOBAL{{endif}}
-    {{if 'CU_MEM_ATTACH_HOST' in found_values}}
-
-    #: Memory cannot be accessed by any stream on any device
-    CU_MEM_ATTACH_HOST = cydriver.CUmemAttach_flags_enum.CU_MEM_ATTACH_HOST{{endif}}
-    {{if 'CU_MEM_ATTACH_SINGLE' in found_values}}
-
-    #: Memory can only be accessed by a single stream on the associated
-    #: device
-    CU_MEM_ATTACH_SINGLE = cydriver.CUmemAttach_flags_enum.CU_MEM_ATTACH_SINGLE{{endif}}
-{{endif}}
-{{if 'CUctx_flags_enum' in found_types}}
-
-class CUctx_flags(IntEnum):
-    """
-    Context creation flags
-    """
-    {{if 'CU_CTX_SCHED_AUTO' in found_values}}
-
-    #: Automatic scheduling
-    CU_CTX_SCHED_AUTO = cydriver.CUctx_flags_enum.CU_CTX_SCHED_AUTO{{endif}}
-    {{if 'CU_CTX_SCHED_SPIN' in found_values}}
-
-    #: Set spin as default scheduling
-    CU_CTX_SCHED_SPIN = cydriver.CUctx_flags_enum.CU_CTX_SCHED_SPIN{{endif}}
-    {{if 'CU_CTX_SCHED_YIELD' in found_values}}
-
-    #: Set yield as default scheduling
-    CU_CTX_SCHED_YIELD = cydriver.CUctx_flags_enum.CU_CTX_SCHED_YIELD{{endif}}
-    {{if 'CU_CTX_SCHED_BLOCKING_SYNC' in found_values}}
-
-    #: Set blocking synchronization as default scheduling
-    CU_CTX_SCHED_BLOCKING_SYNC = cydriver.CUctx_flags_enum.CU_CTX_SCHED_BLOCKING_SYNC{{endif}}
-    {{if 'CU_CTX_BLOCKING_SYNC' in found_values}}
-
-    #: Set blocking synchronization as default scheduling [Deprecated]
-    CU_CTX_BLOCKING_SYNC = cydriver.CUctx_flags_enum.CU_CTX_BLOCKING_SYNC{{endif}}
-    {{if 'CU_CTX_SCHED_MASK' in found_values}}
-    CU_CTX_SCHED_MASK = cydriver.CUctx_flags_enum.CU_CTX_SCHED_MASK{{endif}}
-    {{if 'CU_CTX_MAP_HOST' in found_values}}
-
-    #: [Deprecated]
-    CU_CTX_MAP_HOST = cydriver.CUctx_flags_enum.CU_CTX_MAP_HOST{{endif}}
-    {{if 'CU_CTX_LMEM_RESIZE_TO_MAX' in found_values}}
-
-    #: Keep local memory allocation after launch
-    CU_CTX_LMEM_RESIZE_TO_MAX = cydriver.CUctx_flags_enum.CU_CTX_LMEM_RESIZE_TO_MAX{{endif}}
-    {{if 'CU_CTX_COREDUMP_ENABLE' in found_values}}
-
-    #: Trigger coredumps from exceptions in this context
-    CU_CTX_COREDUMP_ENABLE = cydriver.CUctx_flags_enum.CU_CTX_COREDUMP_ENABLE{{endif}}
-    {{if 'CU_CTX_USER_COREDUMP_ENABLE' in found_values}}
-
-    #: Enable user pipe to trigger coredumps in this context
-    CU_CTX_USER_COREDUMP_ENABLE = cydriver.CUctx_flags_enum.CU_CTX_USER_COREDUMP_ENABLE{{endif}}
-    {{if 'CU_CTX_SYNC_MEMOPS' in found_values}}
-
-    #: Ensure synchronous memory operations on this context will
-    #: synchronize
-    CU_CTX_SYNC_MEMOPS = cydriver.CUctx_flags_enum.CU_CTX_SYNC_MEMOPS{{endif}}
-    {{if 'CU_CTX_FLAGS_MASK' in found_values}}
-    CU_CTX_FLAGS_MASK = cydriver.CUctx_flags_enum.CU_CTX_FLAGS_MASK{{endif}}
-{{endif}}
-{{if 'CUevent_sched_flags_enum' in found_types}}
-
-class CUevent_sched_flags(IntEnum):
-    """
-    Event sched flags
-    """
-    {{if 'CU_EVENT_SCHED_AUTO' in found_values}}
-
-    #: Automatic scheduling
-    CU_EVENT_SCHED_AUTO = cydriver.CUevent_sched_flags_enum.CU_EVENT_SCHED_AUTO{{endif}}
-    {{if 'CU_EVENT_SCHED_SPIN' in found_values}}
-
-    #: Set spin as default scheduling
-    CU_EVENT_SCHED_SPIN = cydriver.CUevent_sched_flags_enum.CU_EVENT_SCHED_SPIN{{endif}}
-    {{if 'CU_EVENT_SCHED_YIELD' in found_values}}
-
-    #: Set yield as default scheduling
-    CU_EVENT_SCHED_YIELD = cydriver.CUevent_sched_flags_enum.CU_EVENT_SCHED_YIELD{{endif}}
-    {{if 'CU_EVENT_SCHED_BLOCKING_SYNC' in found_values}}
-
-    #: Set blocking synchronization as default scheduling
-    CU_EVENT_SCHED_BLOCKING_SYNC = cydriver.CUevent_sched_flags_enum.CU_EVENT_SCHED_BLOCKING_SYNC{{endif}}
-{{endif}}
-{{if 'cl_event_flags_enum' in found_types}}
-
-class cl_event_flags(IntEnum):
-    """
-    NVCL event scheduling flags
-    """
-    {{if 'NVCL_EVENT_SCHED_AUTO' in found_values}}
-
-    #: Automatic scheduling
-    NVCL_EVENT_SCHED_AUTO = cydriver.cl_event_flags_enum.NVCL_EVENT_SCHED_AUTO{{endif}}
-    {{if 'NVCL_EVENT_SCHED_SPIN' in found_values}}
-
-    #: Set spin as default scheduling
-    NVCL_EVENT_SCHED_SPIN = cydriver.cl_event_flags_enum.NVCL_EVENT_SCHED_SPIN{{endif}}
-    {{if 'NVCL_EVENT_SCHED_YIELD' in found_values}}
-
-    #: Set yield as default scheduling
-    NVCL_EVENT_SCHED_YIELD = cydriver.cl_event_flags_enum.NVCL_EVENT_SCHED_YIELD{{endif}}
-    {{if 'NVCL_EVENT_SCHED_BLOCKING_SYNC' in found_values}}
-
-    #: Set blocking synchronization as default scheduling
-    NVCL_EVENT_SCHED_BLOCKING_SYNC = cydriver.cl_event_flags_enum.NVCL_EVENT_SCHED_BLOCKING_SYNC{{endif}}
-{{endif}}
-{{if 'cl_context_flags_enum' in found_types}}
-
-class cl_context_flags(IntEnum):
-    """
-    NVCL context scheduling flags
-    """
-    {{if 'NVCL_CTX_SCHED_AUTO' in found_values}}
-
-    #: Automatic scheduling
-    NVCL_CTX_SCHED_AUTO = cydriver.cl_context_flags_enum.NVCL_CTX_SCHED_AUTO{{endif}}
-    {{if 'NVCL_CTX_SCHED_SPIN' in found_values}}
-
-    #: Set spin as default scheduling
-    NVCL_CTX_SCHED_SPIN = cydriver.cl_context_flags_enum.NVCL_CTX_SCHED_SPIN{{endif}}
-    {{if 'NVCL_CTX_SCHED_YIELD' in found_values}}
-
-    #: Set yield as default scheduling
-    NVCL_CTX_SCHED_YIELD = cydriver.cl_context_flags_enum.NVCL_CTX_SCHED_YIELD{{endif}}
-    {{if 'NVCL_CTX_SCHED_BLOCKING_SYNC' in found_values}}
-
-    #: Set blocking synchronization as default scheduling
-    NVCL_CTX_SCHED_BLOCKING_SYNC = cydriver.cl_context_flags_enum.NVCL_CTX_SCHED_BLOCKING_SYNC{{endif}}
-{{endif}}
-{{if 'CUstream_flags_enum' in found_types}}
-
-class CUstream_flags(IntEnum):
-    """
-    Stream creation flags
-    """
-    {{if 'CU_STREAM_DEFAULT' in found_values}}
-
-    #: Default stream flag
-    CU_STREAM_DEFAULT = cydriver.CUstream_flags_enum.CU_STREAM_DEFAULT{{endif}}
-    {{if 'CU_STREAM_NON_BLOCKING' in found_values}}
-
-    #: Stream does not synchronize with stream 0 (the NULL stream)
-    CU_STREAM_NON_BLOCKING = cydriver.CUstream_flags_enum.CU_STREAM_NON_BLOCKING{{endif}}
-{{endif}}
-{{if 'CUevent_flags_enum' in found_types}}
-
-class CUevent_flags(IntEnum):
-    """
-    Event creation flags
-    """
-    {{if 'CU_EVENT_DEFAULT' in found_values}}
-
-    #: Default event flag
-    CU_EVENT_DEFAULT = cydriver.CUevent_flags_enum.CU_EVENT_DEFAULT{{endif}}
-    {{if 'CU_EVENT_BLOCKING_SYNC' in found_values}}
-
-    #: Event uses blocking synchronization
-    CU_EVENT_BLOCKING_SYNC = cydriver.CUevent_flags_enum.CU_EVENT_BLOCKING_SYNC{{endif}}
-    {{if 'CU_EVENT_DISABLE_TIMING' in found_values}}
-
-    #: Event will not record timing data
-    CU_EVENT_DISABLE_TIMING = cydriver.CUevent_flags_enum.CU_EVENT_DISABLE_TIMING{{endif}}
-    {{if 'CU_EVENT_INTERPROCESS' in found_values}}
-
-    #: Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must
-    #: be set
-    CU_EVENT_INTERPROCESS = cydriver.CUevent_flags_enum.CU_EVENT_INTERPROCESS{{endif}}
-{{endif}}
-{{if 'CUevent_record_flags_enum' in found_types}}
-
-class CUevent_record_flags(IntEnum):
-    """
-    Event record flags
-    """
-    {{if 'CU_EVENT_RECORD_DEFAULT' in found_values}}
-
-    #: Default event record flag
-    CU_EVENT_RECORD_DEFAULT = cydriver.CUevent_record_flags_enum.CU_EVENT_RECORD_DEFAULT{{endif}}
-    {{if 'CU_EVENT_RECORD_EXTERNAL' in found_values}}
-
-    #: When using stream capture, create an event record node instead of
-    #: the default behavior. This flag is invalid when used outside of
-    #: capture.
-    CU_EVENT_RECORD_EXTERNAL = cydriver.CUevent_record_flags_enum.CU_EVENT_RECORD_EXTERNAL{{endif}}
-{{endif}}
-{{if 'CUevent_wait_flags_enum' in found_types}}
-
-class CUevent_wait_flags(IntEnum):
-    """
-    Event wait flags
-    """
-    {{if 'CU_EVENT_WAIT_DEFAULT' in found_values}}
-
-    #: Default event wait flag
-    CU_EVENT_WAIT_DEFAULT = cydriver.CUevent_wait_flags_enum.CU_EVENT_WAIT_DEFAULT{{endif}}
-    {{if 'CU_EVENT_WAIT_EXTERNAL' in found_values}}
-
-    #: When using stream capture, create an event wait node instead of the
-    #: default behavior. This flag is invalid when used outside of capture.
-    CU_EVENT_WAIT_EXTERNAL = cydriver.CUevent_wait_flags_enum.CU_EVENT_WAIT_EXTERNAL{{endif}}
-{{endif}}
-{{if 'CUstreamWaitValue_flags_enum' in found_types}}
-
-class CUstreamWaitValue_flags(IntEnum):
-    """
-    Flags for :py:obj:`~.cuStreamWaitValue32` and
-    :py:obj:`~.cuStreamWaitValue64`
-    """
-    {{if 'CU_STREAM_WAIT_VALUE_GEQ' in found_values}}
-
-    #: Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit
-    #: values). Note this is a cyclic comparison which ignores wraparound.
-    #: (Default behavior.)
-    CU_STREAM_WAIT_VALUE_GEQ = cydriver.CUstreamWaitValue_flags_enum.CU_STREAM_WAIT_VALUE_GEQ{{endif}}
-    {{if 'CU_STREAM_WAIT_VALUE_EQ' in found_values}}
-
-    #: Wait until *addr == value.
-    CU_STREAM_WAIT_VALUE_EQ = cydriver.CUstreamWaitValue_flags_enum.CU_STREAM_WAIT_VALUE_EQ{{endif}}
-    {{if 'CU_STREAM_WAIT_VALUE_AND' in found_values}}
-
-    #: Wait until (*addr & value) != 0.
-    CU_STREAM_WAIT_VALUE_AND = cydriver.CUstreamWaitValue_flags_enum.CU_STREAM_WAIT_VALUE_AND{{endif}}
-    {{if 'CU_STREAM_WAIT_VALUE_NOR' in found_values}}
-
-    #: Wait until ~(*addr | value) != 0. Support for this operation can be
-    #: queried with :py:obj:`~.cuDeviceGetAttribute()` and
-    #: :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`.
-    CU_STREAM_WAIT_VALUE_NOR = cydriver.CUstreamWaitValue_flags_enum.CU_STREAM_WAIT_VALUE_NOR{{endif}}
-    {{if 'CU_STREAM_WAIT_VALUE_FLUSH' in found_values}}
-
-    #: Follow the wait operation with a flush of outstanding remote writes.
-    #: This means that, if a remote write operation is guaranteed to have
-    #: reached the device before the wait can be satisfied, that write is
-    #: guaranteed to be visible to downstream device work. The device is
-    #: permitted to reorder remote writes internally. For example, this
-    #: flag would be required if two remote writes arrive in a defined
-    #: order, the wait is satisfied by the second write, and downstream
-    #: work needs to observe the first write. Support for this operation is
-    #: restricted to selected platforms and can be queried with
-    #: :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES`.
-    CU_STREAM_WAIT_VALUE_FLUSH = cydriver.CUstreamWaitValue_flags_enum.CU_STREAM_WAIT_VALUE_FLUSH{{endif}}
-{{endif}}
-{{if 'CUstreamWriteValue_flags_enum' in found_types}}
-
-class CUstreamWriteValue_flags(IntEnum):
-    """
-    Flags for :py:obj:`~.cuStreamWriteValue32`
-    """
-    {{if 'CU_STREAM_WRITE_VALUE_DEFAULT' in found_values}}
-
-    #: Default behavior
-    CU_STREAM_WRITE_VALUE_DEFAULT = cydriver.CUstreamWriteValue_flags_enum.CU_STREAM_WRITE_VALUE_DEFAULT{{endif}}
-    {{if 'CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER' in found_values}}
-
-    #: Permits the write to be reordered with writes which were issued
-    #: before it, as a performance optimization. Normally,
-    #: :py:obj:`~.cuStreamWriteValue32` will provide a memory fence before
-    #: the write, which has similar semantics to __threadfence_system() but
-    #: is scoped to the stream rather than a CUDA thread. This flag is not
-    #: supported in the v2 API.
-    CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = cydriver.CUstreamWriteValue_flags_enum.CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER{{endif}}
-{{endif}}
-{{if 'CUstreamBatchMemOpType_enum' in found_types}}
-
-class CUstreamBatchMemOpType(IntEnum):
-    """
-    Operations for :py:obj:`~.cuStreamBatchMemOp`
-    """
-    {{if 'CU_STREAM_MEM_OP_WAIT_VALUE_32' in found_values}}
-
-    #: Represents a :py:obj:`~.cuStreamWaitValue32` operation
-    CU_STREAM_MEM_OP_WAIT_VALUE_32 = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_WAIT_VALUE_32{{endif}}
-    {{if 'CU_STREAM_MEM_OP_WRITE_VALUE_32' in found_values}}
-
-    #: Represents a :py:obj:`~.cuStreamWriteValue32` operation
-    CU_STREAM_MEM_OP_WRITE_VALUE_32 = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_WRITE_VALUE_32{{endif}}
-    {{if 'CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES' in found_values}}
-
-    #: This has the same effect as :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH`,
-    #: but as a standalone operation.
-    CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES{{endif}}
-    {{if 'CU_STREAM_MEM_OP_WAIT_VALUE_64' in found_values}}
-
-    #: Represents a :py:obj:`~.cuStreamWaitValue64` operation
-    CU_STREAM_MEM_OP_WAIT_VALUE_64 = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_WAIT_VALUE_64{{endif}}
-    {{if 'CU_STREAM_MEM_OP_WRITE_VALUE_64' in found_values}}
-
-    #: Represents a :py:obj:`~.cuStreamWriteValue64` operation
-    CU_STREAM_MEM_OP_WRITE_VALUE_64 = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_WRITE_VALUE_64{{endif}}
-    {{if 'CU_STREAM_MEM_OP_BARRIER' in found_values}}
-
-    #: Insert a memory barrier of the specified type
-    CU_STREAM_MEM_OP_BARRIER = cydriver.CUstreamBatchMemOpType_enum.CU_STREAM_MEM_OP_BARRIER{{endif}}
-{{endif}}
-{{if 'CUstreamMemoryBarrier_flags_enum' in found_types}}
-
-class CUstreamMemoryBarrier_flags(IntEnum):
-    """
-    Flags for :py:obj:`~.cuStreamMemoryBarrier`
-    """
-    {{if 'CU_STREAM_MEMORY_BARRIER_TYPE_SYS' in found_values}}
-
-    #: System-wide memory barrier.
-    CU_STREAM_MEMORY_BARRIER_TYPE_SYS = cydriver.CUstreamMemoryBarrier_flags_enum.CU_STREAM_MEMORY_BARRIER_TYPE_SYS{{endif}}
-    {{if 'CU_STREAM_MEMORY_BARRIER_TYPE_GPU' in found_values}}
-
-    #: Limit memory barrier scope to the GPU.
-    CU_STREAM_MEMORY_BARRIER_TYPE_GPU = cydriver.CUstreamMemoryBarrier_flags_enum.CU_STREAM_MEMORY_BARRIER_TYPE_GPU{{endif}}
-{{endif}}
-{{if 'CUoccupancy_flags_enum' in found_types}}
-
-class CUoccupancy_flags(IntEnum):
-    """
-    Occupancy calculator flag
-    """
-    {{if 'CU_OCCUPANCY_DEFAULT' in found_values}}
-
-    #: Default behavior
-    CU_OCCUPANCY_DEFAULT = cydriver.CUoccupancy_flags_enum.CU_OCCUPANCY_DEFAULT{{endif}}
-    {{if 'CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE' in found_values}}
-
-    #: Assume global caching is enabled and cannot be automatically turned
-    #: off
-    CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = cydriver.CUoccupancy_flags_enum.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE{{endif}}
-{{endif}}
-{{if 'CUstreamUpdateCaptureDependencies_flags_enum' in found_types}}
-
-class CUstreamUpdateCaptureDependencies_flags(IntEnum):
-    """
-    Flags for :py:obj:`~.cuStreamUpdateCaptureDependencies`
-    """
-    {{if 'CU_STREAM_ADD_CAPTURE_DEPENDENCIES' in found_values}}
-
-    #: Add new nodes to the dependency set
-    CU_STREAM_ADD_CAPTURE_DEPENDENCIES = cydriver.CUstreamUpdateCaptureDependencies_flags_enum.CU_STREAM_ADD_CAPTURE_DEPENDENCIES{{endif}}
-    {{if 'CU_STREAM_SET_CAPTURE_DEPENDENCIES' in found_values}}
-
-    #: Replace the dependency set with the new nodes
-    CU_STREAM_SET_CAPTURE_DEPENDENCIES = cydriver.CUstreamUpdateCaptureDependencies_flags_enum.CU_STREAM_SET_CAPTURE_DEPENDENCIES{{endif}}
-{{endif}}
-{{if 'CUasyncNotificationType_enum' in found_types}}
-
-class CUasyncNotificationType(IntEnum):
-    """
-    Types of async notification that can be sent
-    """
-    {{if 'CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET' in found_values}}
-    CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET = cydriver.CUasyncNotificationType_enum.CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET{{endif}}
-{{endif}}
-{{if 'CUarray_format_enum' in found_types}}
-
-class CUarray_format(IntEnum):
-    """
-    Array formats
-    """
-    {{if 'CU_AD_FORMAT_UNSIGNED_INT8' in found_values}}
-
-    #: Unsigned 8-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT8 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT8{{endif}}
-    {{if 'CU_AD_FORMAT_UNSIGNED_INT16' in found_values}}
-
-    #: Unsigned 16-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT16 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT16{{endif}}
-    {{if 'CU_AD_FORMAT_UNSIGNED_INT32' in found_values}}
-
-    #: Unsigned 32-bit integers
-    CU_AD_FORMAT_UNSIGNED_INT32 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNSIGNED_INT32{{endif}}
-    {{if 'CU_AD_FORMAT_SIGNED_INT8' in found_values}}
-
-    #: Signed 8-bit integers
-    CU_AD_FORMAT_SIGNED_INT8 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT8{{endif}}
-    {{if 'CU_AD_FORMAT_SIGNED_INT16' in found_values}}
-
-    #: Signed 16-bit integers
-    CU_AD_FORMAT_SIGNED_INT16 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT16{{endif}}
-    {{if 'CU_AD_FORMAT_SIGNED_INT32' in found_values}}
-
-    #: Signed 32-bit integers
-    CU_AD_FORMAT_SIGNED_INT32 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SIGNED_INT32{{endif}}
-    {{if 'CU_AD_FORMAT_HALF' in found_values}}
-
-    #: 16-bit floating point
-    CU_AD_FORMAT_HALF = cydriver.CUarray_format_enum.CU_AD_FORMAT_HALF{{endif}}
-    {{if 'CU_AD_FORMAT_FLOAT' in found_values}}
-
-    #: 32-bit floating point
-    CU_AD_FORMAT_FLOAT = cydriver.CUarray_format_enum.CU_AD_FORMAT_FLOAT{{endif}}
-    {{if 'CU_AD_FORMAT_BC1_UNORM' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC1 compression)
-    #: format
-    CU_AD_FORMAT_BC1_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC1_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC1_UNORM_SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC1 compression)
-    #: format with sRGB encoding
-    CU_AD_FORMAT_BC1_UNORM_SRGB = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC1_UNORM_SRGB{{endif}}
-    {{if 'CU_AD_FORMAT_BC2_UNORM' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC2 compression)
-    #: format
-    CU_AD_FORMAT_BC2_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC2_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC2_UNORM_SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC2 compression)
-    #: format with sRGB encoding
-    CU_AD_FORMAT_BC2_UNORM_SRGB = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC2_UNORM_SRGB{{endif}}
-    {{if 'CU_AD_FORMAT_BC3_UNORM' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC3 compression)
-    #: format
-    CU_AD_FORMAT_BC3_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC3_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC3_UNORM_SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC3 compression)
-    #: format with sRGB encoding
-    CU_AD_FORMAT_BC3_UNORM_SRGB = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC3_UNORM_SRGB{{endif}}
-    {{if 'CU_AD_FORMAT_BC4_UNORM' in found_values}}
-
-    #: 1 channel unsigned normalized block-compressed (BC4 compression)
-    #: format
-    CU_AD_FORMAT_BC4_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC4_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC4_SNORM' in found_values}}
-
-    #: 1 channel signed normalized block-compressed (BC4 compression)
-    #: format
-    CU_AD_FORMAT_BC4_SNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC4_SNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC5_UNORM' in found_values}}
-
-    #: 2 channel unsigned normalized block-compressed (BC5 compression)
-    #: format
-    CU_AD_FORMAT_BC5_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC5_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC5_SNORM' in found_values}}
-
-    #: 2 channel signed normalized block-compressed (BC5 compression)
-    #: format
-    CU_AD_FORMAT_BC5_SNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC5_SNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC6H_UF16' in found_values}}
-
-    #: 3 channel unsigned half-float block-compressed (BC6H compression)
-    #: format
-    CU_AD_FORMAT_BC6H_UF16 = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC6H_UF16{{endif}}
-    {{if 'CU_AD_FORMAT_BC6H_SF16' in found_values}}
-
-    #: 3 channel signed half-float block-compressed (BC6H compression)
-    #: format
-    CU_AD_FORMAT_BC6H_SF16 = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC6H_SF16{{endif}}
-    {{if 'CU_AD_FORMAT_BC7_UNORM' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC7 compression)
-    #: format
-    CU_AD_FORMAT_BC7_UNORM = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC7_UNORM{{endif}}
-    {{if 'CU_AD_FORMAT_BC7_UNORM_SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC7 compression)
-    #: format with sRGB encoding
-    CU_AD_FORMAT_BC7_UNORM_SRGB = cydriver.CUarray_format_enum.CU_AD_FORMAT_BC7_UNORM_SRGB{{endif}}
-    {{if 'CU_AD_FORMAT_P010' in found_values}}
-
-    #: 10-bit YUV planar format, with 4:2:0 sampling
-    CU_AD_FORMAT_P010 = cydriver.CUarray_format_enum.CU_AD_FORMAT_P010{{endif}}
-    {{if 'CU_AD_FORMAT_P016' in found_values}}
-
-    #: 16-bit YUV planar format, with 4:2:0 sampling
-    CU_AD_FORMAT_P016 = cydriver.CUarray_format_enum.CU_AD_FORMAT_P016{{endif}}
-    {{if 'CU_AD_FORMAT_NV16' in found_values}}
-
-    #: 8-bit YUV planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_NV16 = cydriver.CUarray_format_enum.CU_AD_FORMAT_NV16{{endif}}
-    {{if 'CU_AD_FORMAT_P210' in found_values}}
-
-    #: 10-bit YUV planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_P210 = cydriver.CUarray_format_enum.CU_AD_FORMAT_P210{{endif}}
-    {{if 'CU_AD_FORMAT_P216' in found_values}}
-
-    #: 16-bit YUV planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_P216 = cydriver.CUarray_format_enum.CU_AD_FORMAT_P216{{endif}}
-    {{if 'CU_AD_FORMAT_YUY2' in found_values}}
-
-    #: 2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_YUY2 = cydriver.CUarray_format_enum.CU_AD_FORMAT_YUY2{{endif}}
-    {{if 'CU_AD_FORMAT_Y210' in found_values}}
-
-    #: 2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_Y210 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y210{{endif}}
-    {{if 'CU_AD_FORMAT_Y216' in found_values}}
-
-    #: 2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling
-    CU_AD_FORMAT_Y216 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y216{{endif}}
-    {{if 'CU_AD_FORMAT_AYUV' in found_values}}
-
-    #: 4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling
-    CU_AD_FORMAT_AYUV = cydriver.CUarray_format_enum.CU_AD_FORMAT_AYUV{{endif}}
-    {{if 'CU_AD_FORMAT_Y410' in found_values}}
-
-    #: 10-bit YUV packed planar format, with 4:4:4 sampling
-    CU_AD_FORMAT_Y410 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y410{{endif}}
-    {{if 'CU_AD_FORMAT_NV12' in found_values}}
-
-    #: 8-bit YUV planar format, with 4:2:0 sampling
-    CU_AD_FORMAT_NV12 = cydriver.CUarray_format_enum.CU_AD_FORMAT_NV12{{endif}}
-    {{if 'CU_AD_FORMAT_Y416' in found_values}}
-
-    #: 4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling
-    CU_AD_FORMAT_Y416 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y416{{endif}}
-    {{if 'CU_AD_FORMAT_Y444_PLANAR8' in found_values}}
-
-    #: 3 channel 8-bit YUV planar format, with 4:4:4 sampling
-    CU_AD_FORMAT_Y444_PLANAR8 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y444_PLANAR8{{endif}}
-    {{if 'CU_AD_FORMAT_Y444_PLANAR10' in found_values}}
-
-    #: 3 channel 10-bit YUV planar format, with 4:4:4 sampling
-    CU_AD_FORMAT_Y444_PLANAR10 = cydriver.CUarray_format_enum.CU_AD_FORMAT_Y444_PLANAR10{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT8X1' in found_values}}
-
-    #: 1 channel unsigned 8-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT8X1 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT8X1{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT8X2' in found_values}}
-
-    #: 2 channel unsigned 8-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT8X2 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT8X2{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT8X4' in found_values}}
-
-    #: 4 channel unsigned 8-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT8X4 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT8X4{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT16X1' in found_values}}
-
-    #: 1 channel unsigned 16-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT16X1 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT16X1{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT16X2' in found_values}}
-
-    #: 2 channel unsigned 16-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT16X2 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT16X2{{endif}}
-    {{if 'CU_AD_FORMAT_UNORM_INT16X4' in found_values}}
-
-    #: 4 channel unsigned 16-bit normalized integer
-    CU_AD_FORMAT_UNORM_INT16X4 = cydriver.CUarray_format_enum.CU_AD_FORMAT_UNORM_INT16X4{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT8X1' in found_values}}
-
-    #: 1 channel signed 8-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT8X1 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT8X1{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT8X2' in found_values}}
-
-    #: 2 channel signed 8-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT8X2 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT8X2{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT8X4' in found_values}}
-
-    #: 4 channel signed 8-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT8X4 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT8X4{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT16X1' in found_values}}
-
-    #: 1 channel signed 16-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT16X1 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT16X1{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT16X2' in found_values}}
-
-    #: 2 channel signed 16-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT16X2 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT16X2{{endif}}
-    {{if 'CU_AD_FORMAT_SNORM_INT16X4' in found_values}}
-
-    #: 4 channel signed 16-bit normalized integer
-    CU_AD_FORMAT_SNORM_INT16X4 = cydriver.CUarray_format_enum.CU_AD_FORMAT_SNORM_INT16X4{{endif}}
-    {{if 'CU_AD_FORMAT_MAX' in found_values}}
-    CU_AD_FORMAT_MAX = cydriver.CUarray_format_enum.CU_AD_FORMAT_MAX{{endif}}
-{{endif}}
-{{if 'CUaddress_mode_enum' in found_types}}
-
-class CUaddress_mode(IntEnum):
-    """
-    Texture reference addressing modes
-    """
-    {{if 'CU_TR_ADDRESS_MODE_WRAP' in found_values}}
-
-    #: Wrapping address mode
-    CU_TR_ADDRESS_MODE_WRAP = cydriver.CUaddress_mode_enum.CU_TR_ADDRESS_MODE_WRAP{{endif}}
-    {{if 'CU_TR_ADDRESS_MODE_CLAMP' in found_values}}
-
-    #: Clamp to edge address mode
-    CU_TR_ADDRESS_MODE_CLAMP = cydriver.CUaddress_mode_enum.CU_TR_ADDRESS_MODE_CLAMP{{endif}}
-    {{if 'CU_TR_ADDRESS_MODE_MIRROR' in found_values}}
-
-    #: Mirror address mode
-    CU_TR_ADDRESS_MODE_MIRROR = cydriver.CUaddress_mode_enum.CU_TR_ADDRESS_MODE_MIRROR{{endif}}
-    {{if 'CU_TR_ADDRESS_MODE_BORDER' in found_values}}
-
-    #: Border address mode
-    CU_TR_ADDRESS_MODE_BORDER = cydriver.CUaddress_mode_enum.CU_TR_ADDRESS_MODE_BORDER{{endif}}
-{{endif}}
-{{if 'CUfilter_mode_enum' in found_types}}
-
-class CUfilter_mode(IntEnum):
-    """
-    Texture reference filtering modes
-    """
-    {{if 'CU_TR_FILTER_MODE_POINT' in found_values}}
-
-    #: Point filter mode
-    CU_TR_FILTER_MODE_POINT = cydriver.CUfilter_mode_enum.CU_TR_FILTER_MODE_POINT{{endif}}
-    {{if 'CU_TR_FILTER_MODE_LINEAR' in found_values}}
-
-    #: Linear filter mode
-    CU_TR_FILTER_MODE_LINEAR = cydriver.CUfilter_mode_enum.CU_TR_FILTER_MODE_LINEAR{{endif}}
-{{endif}}
-{{if 'CUdevice_attribute_enum' in found_types}}
-
-class CUdevice_attribute(IntEnum):
-    """
-    Device properties
-    """
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK' in found_values}}
-
-    #: Maximum number of threads per block
-    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X' in found_values}}
-
-    #: Maximum block dimension X
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y' in found_values}}
-
-    #: Maximum block dimension Y
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z' in found_values}}
-
-    #: Maximum block dimension Z
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X' in found_values}}
-
-    #: Maximum grid dimension X
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y' in found_values}}
-
-    #: Maximum grid dimension Y
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z' in found_values}}
-
-    #: Maximum grid dimension Z
-    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK' in found_values}}
-
-    #: Maximum shared memory available per block in bytes
-    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK' in found_values}}
-
-    #: Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
-    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY' in found_values}}
-
-    #: Memory available on device for constant variables in a CUDA C kernel
-    #: in bytes
-    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_WARP_SIZE' in found_values}}
-
-    #: Warp size in threads
-    CU_DEVICE_ATTRIBUTE_WARP_SIZE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_WARP_SIZE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_PITCH' in found_values}}
-
-    #: Maximum pitch in bytes allowed by memory copies
-    CU_DEVICE_ATTRIBUTE_MAX_PITCH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_PITCH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK' in found_values}}
-
-    #: Maximum number of 32-bit registers available per block
-    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK' in found_values}}
-
-    #: Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
-    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CLOCK_RATE' in found_values}}
-
-    #: Typical clock frequency in kilohertz
-    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CLOCK_RATE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT' in found_values}}
-
-    #: Alignment requirement for textures
-    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GPU_OVERLAP' in found_values}}
-
-    #: Device can possibly copy memory and execute a kernel concurrently.
-    #: Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT.
-    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT' in found_values}}
-
-    #: Number of multiprocessors on device
-    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT' in found_values}}
-
-    #: Specifies whether there is a run time limit on kernels
-    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_INTEGRATED' in found_values}}
-
-    #: Device is integrated with host memory
-    CU_DEVICE_ATTRIBUTE_INTEGRATED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_INTEGRATED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY' in found_values}}
-
-    #: Device can map host memory into CUDA address space
-    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COMPUTE_MODE' in found_values}}
-
-    #: Compute mode (See :py:obj:`~.CUcomputemode` for details)
-    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH' in found_values}}
-
-    #: Maximum 1D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH' in found_values}}
-
-    #: Maximum 2D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT' in found_values}}
-
-    #: Maximum 2D texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH' in found_values}}
-
-    #: Maximum 3D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT' in found_values}}
-
-    #: Maximum 3D texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH' in found_values}}
-
-    #: Maximum 3D texture depth
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum 2D layered texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH' in found_values}}
-
-    #: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT' in found_values}}
-
-    #: Maximum 2D layered texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT' in found_values}}
-
-    #: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a 2D layered texture
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES' in found_values}}
-
-    #: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT' in found_values}}
-
-    #: Alignment requirement for surfaces
-    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS' in found_values}}
-
-    #: Device can possibly execute multiple kernels concurrently
-    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_ECC_ENABLED' in found_values}}
-
-    #: Device has ECC support enabled
-    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_ECC_ENABLED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_PCI_BUS_ID' in found_values}}
-
-    #: PCI bus ID of the device
-    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID' in found_values}}
-
-    #: PCI device ID of the device
-    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TCC_DRIVER' in found_values}}
-
-    #: Device is using TCC driver model
-    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TCC_DRIVER{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE' in found_values}}
-
-    #: Peak memory clock frequency in kilohertz
-    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH' in found_values}}
-
-    #: Global memory bus width in bits
-    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE' in found_values}}
-
-    #: Size of L2 cache in bytes
-    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR' in found_values}}
-
-    #: Maximum resident threads per multiprocessor
-    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT' in found_values}}
-
-    #: Number of asynchronous engines
-    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING' in found_values}}
-
-    #: Device shares a unified address space with the host
-    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum 1D layered texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a 1D layered texture
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER' in found_values}}
-
-    #: Deprecated, do not use.
-    CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH' in found_values}}
-
-    #: Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT' in found_values}}
-
-    #: Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE' in found_values}}
-
-    #: Alternate maximum 3D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE' in found_values}}
-
-    #: Alternate maximum 3D texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE' in found_values}}
-
-    #: Alternate maximum 3D texture depth
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID' in found_values}}
-
-    #: PCI domain ID of the device
-    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT' in found_values}}
-
-    #: Pitch alignment requirement for textures
-    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH' in found_values}}
-
-    #: Maximum cubemap texture width/height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum cubemap layered texture width/height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a cubemap layered texture
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH' in found_values}}
-
-    #: Maximum 1D surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH' in found_values}}
-
-    #: Maximum 2D surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT' in found_values}}
-
-    #: Maximum 2D surface height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH' in found_values}}
-
-    #: Maximum 3D surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT' in found_values}}
-
-    #: Maximum 3D surface height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH' in found_values}}
-
-    #: Maximum 3D surface depth
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum 1D layered surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a 1D layered surface
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum 2D layered surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT' in found_values}}
-
-    #: Maximum 2D layered surface height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a 2D layered surface
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH' in found_values}}
-
-    #: Maximum cubemap surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH' in found_values}}
-
-    #: Maximum cubemap layered surface width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS' in found_values}}
-
-    #: Maximum layers in a cubemap layered surface
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH' in found_values}}
-
-    #: Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth()
-    #: or :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth()` instead.
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH' in found_values}}
-
-    #: Maximum 2D linear texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT' in found_values}}
-
-    #: Maximum 2D linear texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH' in found_values}}
-
-    #: Maximum 2D linear texture pitch in bytes
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH' in found_values}}
-
-    #: Maximum mipmapped 2D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT' in found_values}}
-
-    #: Maximum mipmapped 2D texture height
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR' in found_values}}
-
-    #: Major compute capability version number
-    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR' in found_values}}
-
-    #: Minor compute capability version number
-    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH' in found_values}}
-
-    #: Maximum mipmapped 1D texture width
-    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED' in found_values}}
-
-    #: Device supports stream priorities
-    CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED' in found_values}}
-
-    #: Device supports caching globals in L1
-    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED' in found_values}}
-
-    #: Device supports caching locals in L1
-    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR' in found_values}}
-
-    #: Maximum shared memory available per multiprocessor in bytes
-    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR' in found_values}}
-
-    #: Maximum number of 32-bit registers available per multiprocessor
-    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY' in found_values}}
-
-    #: Device can allocate managed memory on this system
-    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD' in found_values}}
-
-    #: Device is on a multi-GPU board
-    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID' in found_values}}
-
-    #: Unique id for a group of devices on the same multi-GPU board
-    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED' in found_values}}
-
-    #: Link between the device and the host supports native atomic
-    #: operations (this is a placeholder attribute, and is not supported on
-    #: any current hardware)
-    CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO' in found_values}}
-
-    #: Ratio of single precision performance (in floating-point operations
-    #: per second) to double precision performance
-    CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS' in found_values}}
-
-    #: Device supports coherently accessing pageable memory without calling
-    #: cudaHostRegister on it
-    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS' in found_values}}
-
-    #: Device can coherently access managed memory concurrently with the
-    #: CPU
-    CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED' in found_values}}
-
-    #: Device supports compute preemption.
-    CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM' in found_values}}
-
-    #: Device can access host registered memory at the same virtual address
-    #: as the CPU
-    CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1' in found_values}}
-
-    #: Deprecated, along with v1 MemOps API, :py:obj:`~.cuStreamBatchMemOp`
-    #: and related APIs are supported.
-    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1' in found_values}}
-
-    #: Deprecated, along with v1 MemOps API, 64-bit operations are
-    #: supported in :py:obj:`~.cuStreamBatchMemOp` and related APIs.
-    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1' in found_values}}
-
-    #: Deprecated, along with v1 MemOps API,
-    #: :py:obj:`~.CU_STREAM_WAIT_VALUE_NOR` is supported.
-    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH' in found_values}}
-
-    #: Device supports launching cooperative kernels via
-    #: :py:obj:`~.cuLaunchCooperativeKernel`
-    CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH' in found_values}}
-
-    #: Deprecated, :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` is
-    #: deprecated.
-    CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN' in found_values}}
-
-    #: Maximum optin shared memory per block
-    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES' in found_values}}
-
-    #: The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the
-    #: :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported
-    #: on the device. See :py:obj:`~.Stream Memory Operations` for
-    #: additional details.
-    CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED' in found_values}}
-
-    #: Device supports host memory registration via
-    #: :py:obj:`~.cudaHostRegister`.
-    CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES' in found_values}}
-
-    #: Device accesses pageable memory via the host's page tables.
-    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST' in found_values}}
-
-    #: The host can directly access managed memory on the device without
-    #: migration.
-    CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED' in found_values}}
-
-    #: Deprecated, Use
-    #: CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
-    CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED' in found_values}}
-
-    #: Device supports virtual memory management APIs like
-    #: :py:obj:`~.cuMemAddressReserve`, :py:obj:`~.cuMemCreate`,
-    #: :py:obj:`~.cuMemMap` and related APIs
-    CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED' in found_values}}
-
-    #: Device supports exporting memory to a posix file descriptor with
-    #: :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-    #: :py:obj:`~.cuMemCreate`
-    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED' in found_values}}
-
-    #: Device supports exporting memory to a Win32 NT handle with
-    #: :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-    #: :py:obj:`~.cuMemCreate`
-    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED' in found_values}}
-
-    #: Device supports exporting memory to a Win32 KMT handle with
-    #: :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-    #: :py:obj:`~.cuMemCreate`
-    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR' in found_values}}
-
-    #: Maximum number of blocks per multiprocessor
-    CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED' in found_values}}
-
-    #: Device supports compression of memory
-    CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE' in found_values}}
-
-    #: Maximum L2 persisting lines capacity setting in bytes.
-    CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE' in found_values}}
-
-    #: Maximum value of :py:obj:`~.CUaccessPolicyWindow.num_bytes`.
-    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED' in found_values}}
-
-    #: Device supports specifying the GPUDirect RDMA flag with
-    #: :py:obj:`~.cuMemCreate`
-    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK' in found_values}}
-
-    #: Shared memory reserved by CUDA driver per block in bytes
-    CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED' in found_values}}
-
-    #: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
-    CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED' in found_values}}
-
-    #: Device supports using the :py:obj:`~.cuMemHostRegister` flag
-    #: :py:obj:`~.CU_MEMHOSTERGISTER_READ_ONLY` to register memory that
-    #: must be mapped as read-only to the GPU
-    CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED' in found_values}}
-
-    #: External timeline semaphore interop is supported on the device
-    CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED' in found_values}}
-
-    #: Device supports using the :py:obj:`~.cuMemAllocAsync` and
-    #: :py:obj:`~.cuMemPool` family of APIs
-    CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED' in found_values}}
-
-    #: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
-    #: https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
-    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS' in found_values}}
-
-    #: The returned attribute shall be interpreted as a bitmask, where the
-    #: individual bits are described by the
-    #: :py:obj:`~.CUflushGPUDirectRDMAWritesOptions` enum
-    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING' in found_values}}
-
-    #: GPUDirect RDMA writes to the device do not need to be flushed for
-    #: consumers within the scope indicated by the returned attribute. See
-    #: :py:obj:`~.CUGPUDirectRDMAWritesOrdering` for the numerical values
-    #: returned here.
-    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES' in found_values}}
-
-    #: Handle types supported with mempool based IPC
-    CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH' in found_values}}
-
-    #: Indicates device supports cluster launch
-    CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED' in found_values}}
-
-    #: Device supports deferred mapping CUDA arrays and CUDA mipmapped
-    #: arrays
-    CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS' in found_values}}
-
-    #: 64-bit operations are supported in :py:obj:`~.cuStreamBatchMemOp`
-    #: and related MemOp APIs.
-    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR' in found_values}}
-
-    #: :py:obj:`~.CU_STREAM_WAIT_VALUE_NOR` is supported by MemOp APIs.
-    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED' in found_values}}
-
-    #: Device supports buffer sharing with dma_buf mechanism.
-    CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED' in found_values}}
-
-    #: Device supports IPC Events.
-    CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT' in found_values}}
-
-    #: Number of memory domains the device supports.
-    CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED' in found_values}}
-
-    #: Device supports accessing memory using Tensor Map.
-    CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED' in found_values}}
-
-    #: Device supports exporting memory to a fabric handle with
-    #: :py:obj:`~.cuMemExportToShareableHandle()` or requested with
-    #: :py:obj:`~.cuMemCreate()`
-    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS' in found_values}}
-
-    #: Device supports unified function pointers.
-    CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_NUMA_CONFIG' in found_values}}
-
-    #: NUMA configuration of a device: value is of type
-    #: :py:obj:`~.CUdeviceNumaConfig` enum
-    CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_NUMA_ID' in found_values}}
-
-    #: NUMA node ID of the GPU memory
-    CU_DEVICE_ATTRIBUTE_NUMA_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_NUMA_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED' in found_values}}
-
-    #: Device supports switch multicast and reduction operations.
-    CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MPS_ENABLED' in found_values}}
-
-    #: Indicates if contexts created on this device will be shared via MPS
-    CU_DEVICE_ATTRIBUTE_MPS_ENABLED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MPS_ENABLED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID' in found_values}}
-
-    #: NUMA ID of the host node closest to the device. Returns -1 when
-    #: system does not support NUMA.
-    CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED' in found_values}}
-
-    #: Device supports CIG with D3D12.
-    CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_ATTRIBUTE_MAX' in found_values}}
-    CU_DEVICE_ATTRIBUTE_MAX = cydriver.CUdevice_attribute_enum.CU_DEVICE_ATTRIBUTE_MAX{{endif}}
-{{endif}}
-{{if 'CUpointer_attribute_enum' in found_types}}
-
-class CUpointer_attribute(IntEnum):
-    """
-    Pointer information
-    """
-    {{if 'CU_POINTER_ATTRIBUTE_CONTEXT' in found_values}}
-
-    #: The :py:obj:`~.CUcontext` on which a pointer was allocated or
-    #: registered
-    CU_POINTER_ATTRIBUTE_CONTEXT = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_CONTEXT{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MEMORY_TYPE' in found_values}}
-
-    #: The :py:obj:`~.CUmemorytype` describing the physical location of a
-    #: pointer
-    CU_POINTER_ATTRIBUTE_MEMORY_TYPE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_DEVICE_POINTER' in found_values}}
-
-    #: The address at which a pointer's memory may be accessed on the
-    #: device
-    CU_POINTER_ATTRIBUTE_DEVICE_POINTER = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_HOST_POINTER' in found_values}}
-
-    #: The address at which a pointer's memory may be accessed on the host
-    CU_POINTER_ATTRIBUTE_HOST_POINTER = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_HOST_POINTER{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_P2P_TOKENS' in found_values}}
-
-    #: A pair of tokens for use with the nv-p2p.h Linux kernel interface
-    CU_POINTER_ATTRIBUTE_P2P_TOKENS = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_P2P_TOKENS{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_SYNC_MEMOPS' in found_values}}
-
-    #: Synchronize every synchronous memory operation initiated on this
-    #: region
-    CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_BUFFER_ID' in found_values}}
-
-    #: A process-wide unique ID for an allocated memory region
-    CU_POINTER_ATTRIBUTE_BUFFER_ID = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_BUFFER_ID{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_IS_MANAGED' in found_values}}
-
-    #: Indicates if the pointer points to managed memory
-    CU_POINTER_ATTRIBUTE_IS_MANAGED = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_MANAGED{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL' in found_values}}
-
-    #: A device ordinal of a device on which a pointer was allocated or
-    #: registered
-    CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE' in found_values}}
-
-    #: 1 if this pointer maps to an allocation that is suitable for
-    #: :py:obj:`~.cudaIpcGetMemHandle`, 0 otherwise
-    CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_RANGE_START_ADDR' in found_values}}
-
-    #: Starting address for this requested pointer
-    CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_RANGE_SIZE' in found_values}}
-
-    #: Size of the address range for this requested pointer
-    CU_POINTER_ATTRIBUTE_RANGE_SIZE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_RANGE_SIZE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MAPPED' in found_values}}
-
-    #: 1 if this pointer is in a valid address range that is mapped to a
-    #: backing allocation, 0 otherwise
-    CU_POINTER_ATTRIBUTE_MAPPED = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MAPPED{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES' in found_values}}
-
-    #: Bitmask of allowed :py:obj:`~.CUmemAllocationHandleType` for this
-    #: allocation
-    CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE' in found_values}}
-
-    #: 1 if the memory this pointer is referencing can be used with the
-    #: GPUDirect RDMA API
-    CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAGS' in found_values}}
-
-    #: Returns the access flags the device associated with the current
-    #: context has on the corresponding memory referenced by the pointer
-    #: given
-    CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE' in found_values}}
-
-    #: Returns the mempool handle for the allocation if it was allocated
-    #: from a mempool. Otherwise returns NULL.
-    CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MAPPING_SIZE' in found_values}}
-
-    #: Size of the actual underlying mapping that the pointer belongs to
-    CU_POINTER_ATTRIBUTE_MAPPING_SIZE = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MAPPING_SIZE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR' in found_values}}
-
-    #: The start address of the mapping that the pointer belongs to
-    CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID' in found_values}}
-
-    #: A process-wide unique id corresponding to the physical allocation
-    #: the pointer belongs to
-    CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID{{endif}}
-{{endif}}
-{{if 'CUfunction_attribute_enum' in found_types}}
-
-class CUfunction_attribute(IntEnum):
-    """
-    Function properties
-    """
-    {{if 'CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK' in found_values}}
-
-    #: The maximum number of threads per block, beyond which a launch of
-    #: the function would fail. This number depends on both the function
-    #: and the device on which the function is currently loaded.
-    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES' in found_values}}
-
-    #: The size in bytes of statically-allocated shared memory required by
-    #: this function. This does not include dynamically-allocated shared
-    #: memory requested by the user at runtime.
-    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES' in found_values}}
-
-    #: The size in bytes of user-allocated constant memory required by this
-    #: function.
-    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES' in found_values}}
-
-    #: The size in bytes of local memory used by each thread of this
-    #: function.
-    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_NUM_REGS' in found_values}}
-
-    #: The number of registers used by each thread of this function.
-    CU_FUNC_ATTRIBUTE_NUM_REGS = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_NUM_REGS{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_PTX_VERSION' in found_values}}
-
-    #: The PTX virtual architecture version for which the function was
-    #: compiled. This value is the major PTX version * 10 + the minor PTX
-    #: version, so a PTX version 1.3 function would return the value 13.
-    #: Note that this may return the undefined value of 0 for cubins
-    #: compiled prior to CUDA 3.0.
-    CU_FUNC_ATTRIBUTE_PTX_VERSION = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PTX_VERSION{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_BINARY_VERSION' in found_values}}
-
-    #: The binary architecture version for which the function was compiled.
-    #: This value is the major binary version * 10 + the minor binary
-    #: version, so a binary version 1.3 function would return the value 13.
-    #: Note that this will return a value of 10 for legacy cubins that do
-    #: not have a properly-encoded binary architecture version.
-    CU_FUNC_ATTRIBUTE_BINARY_VERSION = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_BINARY_VERSION{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_CACHE_MODE_CA' in found_values}}
-
-    #: The attribute to indicate whether the function has been compiled
-    #: with user specified option "-Xptxas --dlcm=ca" set .
-    CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES' in found_values}}
-
-    #: The maximum size in bytes of dynamically-allocated shared memory
-    #: that can be used by this function. If the user-specified dynamic
-    #: shared memory size is larger than this value, the launch will fail.
-    #: See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}}
-
-    #: On devices where the L1 cache and shared memory use the same
-    #: hardware resources, this sets the shared memory carveout preference,
-    #: in percent of the total shared memory. Refer to
-    #: :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`.
-    #: This is only a hint, and the driver can choose a different ratio if
-    #: required to execute the function. See
-    #: :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET' in found_values}}
-
-    #: If this attribute is set, the kernel must launch with a valid
-    #: cluster size specified. See :py:obj:`~.cuFuncSetAttribute`,
-    #: :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH' in found_values}}
-
-    #: The required cluster width in blocks. The values must either all be
-    #: 0 or all be positive. The validity of the cluster dimensions is
-    #: otherwise checked at launch time.
-    #:
-    #: If the value is set during compile time, it cannot be set at
-    #: runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
-    #: See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT' in found_values}}
-
-    #: The required cluster height in blocks. The values must either all be
-    #: 0 or all be positive. The validity of the cluster dimensions is
-    #: otherwise checked at launch time.
-    #:
-    #: If the value is set during compile time, it cannot be set at
-    #: runtime. Setting it at runtime should return
-    #: CUDA_ERROR_NOT_PERMITTED. See :py:obj:`~.cuFuncSetAttribute`,
-    #: :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH' in found_values}}
-
-    #: The required cluster depth in blocks. The values must either all be
-    #: 0 or all be positive. The validity of the cluster dimensions is
-    #: otherwise checked at launch time.
-    #:
-    #: If the value is set during compile time, it cannot be set at
-    #: runtime. Setting it at runtime should return
-    #: CUDA_ERROR_NOT_PERMITTED. See :py:obj:`~.cuFuncSetAttribute`,
-    #: :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED' in found_values}}
-
-    #: Whether the function can be launched with non-portable cluster size.
-    #: 1 is allowed, 0 is disallowed. A non-portable cluster size may only
-    #: function on the specific SKUs the program is tested on. The launch
-    #: might fail if the program is run on a different hardware platform.
-    #:
-    #: CUDA API provides cudaOccupancyMaxActiveClusters to assist with
-    #: checking whether the desired size can be launched on the current
-    #: device.
-    #:
-    #: Portable Cluster Size
-    #:
-    #: A portable cluster size is guaranteed to be functional on all
-    #: compute capabilities higher than the target compute capability. The
-    #: portable cluster size for sm_90 is 8 blocks per cluster. This value
-    #: may increase for future compute capabilities.
-    #:
-    #: The specific hardware unit may support higher cluster sizes that’s
-    #: not guaranteed to be portable. See :py:obj:`~.cuFuncSetAttribute`,
-    #: :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE' in found_values}}
-
-    #: The block scheduling policy of a function. The value type is
-    #: CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy. See
-    #: :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE{{endif}}
-    {{if 'CU_FUNC_ATTRIBUTE_MAX' in found_values}}
-    CU_FUNC_ATTRIBUTE_MAX = cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX{{endif}}
-{{endif}}
-{{if 'CUfunc_cache_enum' in found_types}}
-
-class CUfunc_cache(IntEnum):
-    """
-    Function cache configurations
-    """
-    {{if 'CU_FUNC_CACHE_PREFER_NONE' in found_values}}
-
-    #: no preference for shared memory or L1 (default)
-    CU_FUNC_CACHE_PREFER_NONE = cydriver.CUfunc_cache_enum.CU_FUNC_CACHE_PREFER_NONE{{endif}}
-    {{if 'CU_FUNC_CACHE_PREFER_SHARED' in found_values}}
-
-    #: prefer larger shared memory and smaller L1 cache
-    CU_FUNC_CACHE_PREFER_SHARED = cydriver.CUfunc_cache_enum.CU_FUNC_CACHE_PREFER_SHARED{{endif}}
-    {{if 'CU_FUNC_CACHE_PREFER_L1' in found_values}}
-
-    #: prefer larger L1 cache and smaller shared memory
-    CU_FUNC_CACHE_PREFER_L1 = cydriver.CUfunc_cache_enum.CU_FUNC_CACHE_PREFER_L1{{endif}}
-    {{if 'CU_FUNC_CACHE_PREFER_EQUAL' in found_values}}
-
-    #: prefer equal sized L1 cache and shared memory
-    CU_FUNC_CACHE_PREFER_EQUAL = cydriver.CUfunc_cache_enum.CU_FUNC_CACHE_PREFER_EQUAL{{endif}}
-{{endif}}
-{{if 'CUsharedconfig_enum' in found_types}}
-
-class CUsharedconfig(IntEnum):
-    """
-    [Deprecated]  Shared memory configurations
-    """
-    {{if 'CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE' in found_values}}
-
-    #: set default shared memory bank size
-    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = cydriver.CUsharedconfig_enum.CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE{{endif}}
-    {{if 'CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE' in found_values}}
-
-    #: set shared memory bank width to four bytes
-    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = cydriver.CUsharedconfig_enum.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE{{endif}}
-    {{if 'CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE' in found_values}}
-
-    #: set shared memory bank width to eight bytes
-    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = cydriver.CUsharedconfig_enum.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE{{endif}}
-{{endif}}
-{{if 'CUshared_carveout_enum' in found_types}}
-
-class CUshared_carveout(IntEnum):
-    """
-    Shared memory carveout configurations. These may be passed to
-    :py:obj:`~.cuFuncSetAttribute` or :py:obj:`~.cuKernelSetAttribute`
-    """
-    {{if 'CU_SHAREDMEM_CARVEOUT_DEFAULT' in found_values}}
-
-    #: No preference for shared memory or L1 (default)
-    CU_SHAREDMEM_CARVEOUT_DEFAULT = cydriver.CUshared_carveout_enum.CU_SHAREDMEM_CARVEOUT_DEFAULT{{endif}}
-    {{if 'CU_SHAREDMEM_CARVEOUT_MAX_L1' in found_values}}
-
-    #: Prefer maximum available L1 cache, minimum shared memory
-    CU_SHAREDMEM_CARVEOUT_MAX_L1 = cydriver.CUshared_carveout_enum.CU_SHAREDMEM_CARVEOUT_MAX_L1{{endif}}
-    {{if 'CU_SHAREDMEM_CARVEOUT_MAX_SHARED' in found_values}}
-
-    #: Prefer maximum available shared memory, minimum L1 cache
-    CU_SHAREDMEM_CARVEOUT_MAX_SHARED = cydriver.CUshared_carveout_enum.CU_SHAREDMEM_CARVEOUT_MAX_SHARED{{endif}}
-{{endif}}
-{{if 'CUmemorytype_enum' in found_types}}
-
-class CUmemorytype(IntEnum):
-    """
-    Memory types
-    """
-    {{if 'CU_MEMORYTYPE_HOST' in found_values}}
-
-    #: Host memory
-    CU_MEMORYTYPE_HOST = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST{{endif}}
-    {{if 'CU_MEMORYTYPE_DEVICE' in found_values}}
-
-    #: Device memory
-    CU_MEMORYTYPE_DEVICE = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE{{endif}}
-    {{if 'CU_MEMORYTYPE_ARRAY' in found_values}}
-
-    #: Array memory
-    CU_MEMORYTYPE_ARRAY = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_ARRAY{{endif}}
-    {{if 'CU_MEMORYTYPE_UNIFIED' in found_values}}
-
-    #: Unified device or host memory
-    CU_MEMORYTYPE_UNIFIED = cydriver.CUmemorytype_enum.CU_MEMORYTYPE_UNIFIED{{endif}}
-{{endif}}
-{{if 'CUcomputemode_enum' in found_types}}
-
-class CUcomputemode(IntEnum):
-    """
-    Compute Modes
-    """
-    {{if 'CU_COMPUTEMODE_DEFAULT' in found_values}}
-
-    #: Default compute mode (Multiple contexts allowed per device)
-    CU_COMPUTEMODE_DEFAULT = cydriver.CUcomputemode_enum.CU_COMPUTEMODE_DEFAULT{{endif}}
-    {{if 'CU_COMPUTEMODE_PROHIBITED' in found_values}}
-
-    #: Compute-prohibited mode (No contexts can be created on this device
-    #: at this time)
-    CU_COMPUTEMODE_PROHIBITED = cydriver.CUcomputemode_enum.CU_COMPUTEMODE_PROHIBITED{{endif}}
-    {{if 'CU_COMPUTEMODE_EXCLUSIVE_PROCESS' in found_values}}
-
-    #: Compute-exclusive-process mode (Only one context used by a single
-    #: process can be present on this device at a time)
-    CU_COMPUTEMODE_EXCLUSIVE_PROCESS = cydriver.CUcomputemode_enum.CU_COMPUTEMODE_EXCLUSIVE_PROCESS{{endif}}
-{{endif}}
-{{if 'CUmem_advise_enum' in found_types}}
-
-class CUmem_advise(IntEnum):
-    """
-    Memory advise values
-    """
-    {{if 'CU_MEM_ADVISE_SET_READ_MOSTLY' in found_values}}
-
-    #: Data will mostly be read and only occasionally be written to
-    CU_MEM_ADVISE_SET_READ_MOSTLY = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_SET_READ_MOSTLY{{endif}}
-    {{if 'CU_MEM_ADVISE_UNSET_READ_MOSTLY' in found_values}}
-
-    #: Undo the effect of :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`
-    CU_MEM_ADVISE_UNSET_READ_MOSTLY = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_UNSET_READ_MOSTLY{{endif}}
-    {{if 'CU_MEM_ADVISE_SET_PREFERRED_LOCATION' in found_values}}
-
-    #: Set the preferred location for the data as the specified device
-    CU_MEM_ADVISE_SET_PREFERRED_LOCATION = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_SET_PREFERRED_LOCATION{{endif}}
-    {{if 'CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION' in found_values}}
-
-    #: Clear the preferred location for the data
-    CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION{{endif}}
-    {{if 'CU_MEM_ADVISE_SET_ACCESSED_BY' in found_values}}
-
-    #: Data will be accessed by the specified device, so prevent page
-    #: faults as much as possible
-    CU_MEM_ADVISE_SET_ACCESSED_BY = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_SET_ACCESSED_BY{{endif}}
-    {{if 'CU_MEM_ADVISE_UNSET_ACCESSED_BY' in found_values}}
-
-    #: Let the Unified Memory subsystem decide on the page faulting policy
-    #: for the specified device
-    CU_MEM_ADVISE_UNSET_ACCESSED_BY = cydriver.CUmem_advise_enum.CU_MEM_ADVISE_UNSET_ACCESSED_BY{{endif}}
-{{endif}}
-{{if 'CUmem_range_attribute_enum' in found_types}}
-
-class CUmem_range_attribute(IntEnum):
-    """
-
-    """
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY' in found_values}}
-
-    #: Whether the range will mostly be read and only occasionally be
-    #: written to
-    CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION' in found_values}}
-
-    #: The preferred location of the range
-    CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY' in found_values}}
-
-    #: Memory range has :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` set for
-    #: specified device
-    CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION' in found_values}}
-
-    #: The last location to which the range was prefetched
-    CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE' in found_values}}
-
-    #: The preferred location type of the range
-    CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID' in found_values}}
-
-    #: The preferred location id of the range
-    CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE' in found_values}}
-
-    #: The last location type to which the range was prefetched
-    CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE{{endif}}
-    {{if 'CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID' in found_values}}
-
-    #: The last location id to which the range was prefetched
-    CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID = cydriver.CUmem_range_attribute_enum.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID{{endif}}
-{{endif}}
-{{if 'CUjit_option_enum' in found_types}}
-
-class CUjit_option(IntEnum):
-    """
-    Online compiler and linker options
-    """
-    {{if 'CU_JIT_MAX_REGISTERS' in found_values}}
-
-    #: Max number of registers that a thread may use.
-    #: Option type: unsigned int
-    #: Applies to: compiler only
-    CU_JIT_MAX_REGISTERS = cydriver.CUjit_option_enum.CU_JIT_MAX_REGISTERS{{endif}}
-    {{if 'CU_JIT_THREADS_PER_BLOCK' in found_values}}
-
-    #: IN: Specifies minimum number of threads per block to target
-    #: compilation for
-    #: OUT: Returns the number of threads the compiler actually targeted.
-    #: This restricts the resource utilization of the compiler (e.g. max
-    #: registers) such that a block with the given number of threads should
-    #: be able to launch based on register limitations. Note, this option
-    #: does not currently take into account any other resource limitations,
-    #: such as shared memory utilization.
-    #: Cannot be combined with :py:obj:`~.CU_JIT_TARGET`.
-    #: Option type: unsigned int
-    #: Applies to: compiler only
-    CU_JIT_THREADS_PER_BLOCK = cydriver.CUjit_option_enum.CU_JIT_THREADS_PER_BLOCK{{endif}}
-    {{if 'CU_JIT_WALL_TIME' in found_values}}
-
-    #: Overwrites the option value with the total wall clock time, in
-    #: milliseconds, spent in the compiler and linker
-    #: Option type: float
-    #: Applies to: compiler and linker
-    CU_JIT_WALL_TIME = cydriver.CUjit_option_enum.CU_JIT_WALL_TIME{{endif}}
-    {{if 'CU_JIT_INFO_LOG_BUFFER' in found_values}}
-
-    #: Pointer to a buffer in which to print any log messages that are
-    #: informational in nature (the buffer size is specified via option
-    #: :py:obj:`~.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`)
-    #: Option type: char *
-    #: Applies to: compiler and linker
-    CU_JIT_INFO_LOG_BUFFER = cydriver.CUjit_option_enum.CU_JIT_INFO_LOG_BUFFER{{endif}}
-    {{if 'CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES' in found_values}}
-
-    #: IN: Log buffer size in bytes. Log messages will be capped at this
-    #: size (including null terminator)
-    #: OUT: Amount of log buffer filled with messages
-    #: Option type: unsigned int
-    #: Applies to: compiler and linker
-    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = cydriver.CUjit_option_enum.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES{{endif}}
-    {{if 'CU_JIT_ERROR_LOG_BUFFER' in found_values}}
-
-    #: Pointer to a buffer in which to print any log messages that reflect
-    #: errors (the buffer size is specified via option
-    #: :py:obj:`~.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`)
-    #: Option type: char *
-    #: Applies to: compiler and linker
-    CU_JIT_ERROR_LOG_BUFFER = cydriver.CUjit_option_enum.CU_JIT_ERROR_LOG_BUFFER{{endif}}
-    {{if 'CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES' in found_values}}
-
-    #: IN: Log buffer size in bytes. Log messages will be capped at this
-    #: size (including null terminator)
-    #: OUT: Amount of log buffer filled with messages
-    #: Option type: unsigned int
-    #: Applies to: compiler and linker
-    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = cydriver.CUjit_option_enum.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES{{endif}}
-    {{if 'CU_JIT_OPTIMIZATION_LEVEL' in found_values}}
-
-    #: Level of optimizations to apply to generated code (0 - 4), with 4
-    #: being the default and highest level of optimizations.
-    #: Option type: unsigned int
-    #: Applies to: compiler only
-    CU_JIT_OPTIMIZATION_LEVEL = cydriver.CUjit_option_enum.CU_JIT_OPTIMIZATION_LEVEL{{endif}}
-    {{if 'CU_JIT_TARGET_FROM_CUCONTEXT' in found_values}}
-
-    #: No option value required. Determines the target based on the current
-    #: attached context (default)
-    #: Option type: No option value needed
-    #: Applies to: compiler and linker
-    CU_JIT_TARGET_FROM_CUCONTEXT = cydriver.CUjit_option_enum.CU_JIT_TARGET_FROM_CUCONTEXT{{endif}}
-    {{if 'CU_JIT_TARGET' in found_values}}
-
-    #: Target is chosen based on supplied :py:obj:`~.CUjit_target`. Cannot
-    #: be combined with :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`.
-    #: Option type: unsigned int for enumerated type
-    #: :py:obj:`~.CUjit_target`
-    #: Applies to: compiler and linker
-    CU_JIT_TARGET = cydriver.CUjit_option_enum.CU_JIT_TARGET{{endif}}
-    {{if 'CU_JIT_FALLBACK_STRATEGY' in found_values}}
-
-    #: Specifies choice of fallback strategy if matching cubin is not
-    #: found. Choice is based on supplied :py:obj:`~.CUjit_fallback`. This
-    #: option cannot be used with cuLink* APIs as the linker requires exact
-    #: matches.
-    #: Option type: unsigned int for enumerated type
-    #: :py:obj:`~.CUjit_fallback`
-    #: Applies to: compiler only
-    CU_JIT_FALLBACK_STRATEGY = cydriver.CUjit_option_enum.CU_JIT_FALLBACK_STRATEGY{{endif}}
-    {{if 'CU_JIT_GENERATE_DEBUG_INFO' in found_values}}
-
-    #: Specifies whether to create debug information in output (-g) (0:
-    #: false, default)
-    #: Option type: int
-    #: Applies to: compiler and linker
-    CU_JIT_GENERATE_DEBUG_INFO = cydriver.CUjit_option_enum.CU_JIT_GENERATE_DEBUG_INFO{{endif}}
-    {{if 'CU_JIT_LOG_VERBOSE' in found_values}}
-
-    #: Generate verbose log messages (0: false, default)
-    #: Option type: int
-    #: Applies to: compiler and linker
-    CU_JIT_LOG_VERBOSE = cydriver.CUjit_option_enum.CU_JIT_LOG_VERBOSE{{endif}}
-    {{if 'CU_JIT_GENERATE_LINE_INFO' in found_values}}
-
-    #: Generate line number information (-lineinfo) (0: false, default)
-    #: Option type: int
-    #: Applies to: compiler only
-    CU_JIT_GENERATE_LINE_INFO = cydriver.CUjit_option_enum.CU_JIT_GENERATE_LINE_INFO{{endif}}
-    {{if 'CU_JIT_CACHE_MODE' in found_values}}
-
-    #: Specifies whether to enable caching explicitly (-dlcm)
-    #: Choice is based on supplied :py:obj:`~.CUjit_cacheMode_enum`.
-    #: Option type: unsigned int for enumerated type
-    #: :py:obj:`~.CUjit_cacheMode_enum`
-    #: Applies to: compiler only
-    CU_JIT_CACHE_MODE = cydriver.CUjit_option_enum.CU_JIT_CACHE_MODE{{endif}}
-    {{if 'CU_JIT_NEW_SM3X_OPT' in found_values}}
-
-    #: [Deprecated]
-    CU_JIT_NEW_SM3X_OPT = cydriver.CUjit_option_enum.CU_JIT_NEW_SM3X_OPT{{endif}}
-    {{if 'CU_JIT_FAST_COMPILE' in found_values}}
-
-    #: This jit option is used for internal purpose only.
-    CU_JIT_FAST_COMPILE = cydriver.CUjit_option_enum.CU_JIT_FAST_COMPILE{{endif}}
-    {{if 'CU_JIT_GLOBAL_SYMBOL_NAMES' in found_values}}
-
-    #: Array of device symbol names that will be relocated to the
-    #: corresponding host addresses stored in
-    #: :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_ADDRESSES`.
-    #: Must contain :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_COUNT` entries.
-    #: When loading a device module, driver will relocate all encountered
-    #: unresolved symbols to the host addresses.
-    #: It is only allowed to register symbols that correspond to unresolved
-    #: global variables.
-    #: It is illegal to register the same device symbol at multiple
-    #: addresses.
-    #: Option type: const char **
-    #: Applies to: dynamic linker only
-    CU_JIT_GLOBAL_SYMBOL_NAMES = cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_NAMES{{endif}}
-    {{if 'CU_JIT_GLOBAL_SYMBOL_ADDRESSES' in found_values}}
-
-    #: Array of host addresses that will be used to relocate corresponding
-    #: device symbols stored in :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_NAMES`.
-    #: Must contain :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_COUNT` entries.
-    #: Option type: void **
-    #: Applies to: dynamic linker only
-    CU_JIT_GLOBAL_SYMBOL_ADDRESSES = cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_ADDRESSES{{endif}}
-    {{if 'CU_JIT_GLOBAL_SYMBOL_COUNT' in found_values}}
-
-    #: Number of entries in :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_NAMES` and
-    #: :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_ADDRESSES` arrays.
-    #: Option type: unsigned int
-    #: Applies to: dynamic linker only
-    CU_JIT_GLOBAL_SYMBOL_COUNT = cydriver.CUjit_option_enum.CU_JIT_GLOBAL_SYMBOL_COUNT{{endif}}
-    {{if 'CU_JIT_LTO' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_LTO = cydriver.CUjit_option_enum.CU_JIT_LTO{{endif}}
-    {{if 'CU_JIT_FTZ' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_FTZ = cydriver.CUjit_option_enum.CU_JIT_FTZ{{endif}}
-    {{if 'CU_JIT_PREC_DIV' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_PREC_DIV = cydriver.CUjit_option_enum.CU_JIT_PREC_DIV{{endif}}
-    {{if 'CU_JIT_PREC_SQRT' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_PREC_SQRT = cydriver.CUjit_option_enum.CU_JIT_PREC_SQRT{{endif}}
-    {{if 'CU_JIT_FMA' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_FMA = cydriver.CUjit_option_enum.CU_JIT_FMA{{endif}}
-    {{if 'CU_JIT_REFERENCED_KERNEL_NAMES' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_REFERENCED_KERNEL_NAMES = cydriver.CUjit_option_enum.CU_JIT_REFERENCED_KERNEL_NAMES{{endif}}
-    {{if 'CU_JIT_REFERENCED_KERNEL_COUNT' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_REFERENCED_KERNEL_COUNT = cydriver.CUjit_option_enum.CU_JIT_REFERENCED_KERNEL_COUNT{{endif}}
-    {{if 'CU_JIT_REFERENCED_VARIABLE_NAMES' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_REFERENCED_VARIABLE_NAMES = cydriver.CUjit_option_enum.CU_JIT_REFERENCED_VARIABLE_NAMES{{endif}}
-    {{if 'CU_JIT_REFERENCED_VARIABLE_COUNT' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_REFERENCED_VARIABLE_COUNT = cydriver.CUjit_option_enum.CU_JIT_REFERENCED_VARIABLE_COUNT{{endif}}
-    {{if 'CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES = cydriver.CUjit_option_enum.CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES{{endif}}
-    {{if 'CU_JIT_POSITION_INDEPENDENT_CODE' in found_values}}
-
-    #: Generate position independent code (0: false)
-    #: Option type: int
-    #: Applies to: compiler only
-    CU_JIT_POSITION_INDEPENDENT_CODE = cydriver.CUjit_option_enum.CU_JIT_POSITION_INDEPENDENT_CODE{{endif}}
-    {{if 'CU_JIT_MIN_CTA_PER_SM' in found_values}}
-
-    #: This option hints to the JIT compiler the minimum number of CTAs
-    #: from the kernel’s grid to be mapped to a SM. This option is ignored
-    #: when used together with :py:obj:`~.CU_JIT_MAX_REGISTERS` or
-    #: :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`. Optimizations based on this
-    #: option need :py:obj:`~.CU_JIT_MAX_THREADS_PER_BLOCK` to be specified
-    #: as well. For kernels already using PTX directive .minnctapersm, this
-    #: option will be ignored by default. Use
-    #: :py:obj:`~.CU_JIT_OVERRIDE_DIRECTIVE_VALUES` to let this option take
-    #: precedence over the PTX directive. Option type: unsigned int
-    #: Applies to: compiler only
-    CU_JIT_MIN_CTA_PER_SM = cydriver.CUjit_option_enum.CU_JIT_MIN_CTA_PER_SM{{endif}}
-    {{if 'CU_JIT_MAX_THREADS_PER_BLOCK' in found_values}}
-
-    #: Maximum number threads in a thread block, computed as the product of
-    #: the maximum extent specifed for each dimension of the block. This
-    #: limit is guaranteed not to be exeeded in any invocation of the
-    #: kernel. Exceeding the the maximum number of threads results in
-    #: runtime error or kernel launch failure. For kernels already using
-    #: PTX directive .maxntid, this option will be ignored by default. Use
-    #: :py:obj:`~.CU_JIT_OVERRIDE_DIRECTIVE_VALUES` to let this option take
-    #: precedence over the PTX directive. Option type: int
-    #: Applies to: compiler only
-    CU_JIT_MAX_THREADS_PER_BLOCK = cydriver.CUjit_option_enum.CU_JIT_MAX_THREADS_PER_BLOCK{{endif}}
-    {{if 'CU_JIT_OVERRIDE_DIRECTIVE_VALUES' in found_values}}
-
-    #: This option lets the values specified using
-    #: :py:obj:`~.CU_JIT_MAX_REGISTERS`,
-    #: :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`,
-    #: :py:obj:`~.CU_JIT_MAX_THREADS_PER_BLOCK` and
-    #: :py:obj:`~.CU_JIT_MIN_CTA_PER_SM` take precedence over any PTX
-    #: directives. (0: Disable, default; 1: Enable) Option type: int
-    #: Applies to: compiler only
-    CU_JIT_OVERRIDE_DIRECTIVE_VALUES = cydriver.CUjit_option_enum.CU_JIT_OVERRIDE_DIRECTIVE_VALUES{{endif}}
-    {{if 'CU_JIT_NUM_OPTIONS' in found_values}}
-    CU_JIT_NUM_OPTIONS = cydriver.CUjit_option_enum.CU_JIT_NUM_OPTIONS{{endif}}
-{{endif}}
-{{if 'CUjit_target_enum' in found_types}}
-
-class CUjit_target(IntEnum):
-    """
-    Online compilation targets
-    """
-    {{if 'CU_TARGET_COMPUTE_30' in found_values}}
-
-    #: Compute device class 3.0
-    CU_TARGET_COMPUTE_30 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_30{{endif}}
-    {{if 'CU_TARGET_COMPUTE_32' in found_values}}
-
-    #: Compute device class 3.2
-    CU_TARGET_COMPUTE_32 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_32{{endif}}
-    {{if 'CU_TARGET_COMPUTE_35' in found_values}}
-
-    #: Compute device class 3.5
-    CU_TARGET_COMPUTE_35 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_35{{endif}}
-    {{if 'CU_TARGET_COMPUTE_37' in found_values}}
-
-    #: Compute device class 3.7
-    CU_TARGET_COMPUTE_37 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_37{{endif}}
-    {{if 'CU_TARGET_COMPUTE_50' in found_values}}
-
-    #: Compute device class 5.0
-    CU_TARGET_COMPUTE_50 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_50{{endif}}
-    {{if 'CU_TARGET_COMPUTE_52' in found_values}}
-
-    #: Compute device class 5.2
-    CU_TARGET_COMPUTE_52 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_52{{endif}}
-    {{if 'CU_TARGET_COMPUTE_53' in found_values}}
-
-    #: Compute device class 5.3
-    CU_TARGET_COMPUTE_53 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_53{{endif}}
-    {{if 'CU_TARGET_COMPUTE_60' in found_values}}
-
-    #: Compute device class 6.0.
-    CU_TARGET_COMPUTE_60 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_60{{endif}}
-    {{if 'CU_TARGET_COMPUTE_61' in found_values}}
-
-    #: Compute device class 6.1.
-    CU_TARGET_COMPUTE_61 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_61{{endif}}
-    {{if 'CU_TARGET_COMPUTE_62' in found_values}}
-
-    #: Compute device class 6.2.
-    CU_TARGET_COMPUTE_62 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_62{{endif}}
-    {{if 'CU_TARGET_COMPUTE_70' in found_values}}
-
-    #: Compute device class 7.0.
-    CU_TARGET_COMPUTE_70 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_70{{endif}}
-    {{if 'CU_TARGET_COMPUTE_72' in found_values}}
-
-    #: Compute device class 7.2.
-    CU_TARGET_COMPUTE_72 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_72{{endif}}
-    {{if 'CU_TARGET_COMPUTE_75' in found_values}}
-
-    #: Compute device class 7.5.
-    CU_TARGET_COMPUTE_75 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_75{{endif}}
-    {{if 'CU_TARGET_COMPUTE_80' in found_values}}
-
-    #: Compute device class 8.0.
-    CU_TARGET_COMPUTE_80 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_80{{endif}}
-    {{if 'CU_TARGET_COMPUTE_86' in found_values}}
-
-    #: Compute device class 8.6.
-    CU_TARGET_COMPUTE_86 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_86{{endif}}
-    {{if 'CU_TARGET_COMPUTE_87' in found_values}}
-
-    #: Compute device class 8.7.
-    CU_TARGET_COMPUTE_87 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_87{{endif}}
-    {{if 'CU_TARGET_COMPUTE_89' in found_values}}
-
-    #: Compute device class 8.9.
-    CU_TARGET_COMPUTE_89 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_89{{endif}}
-    {{if 'CU_TARGET_COMPUTE_90' in found_values}}
-
-    #: Compute device class 9.0. Compute device class 9.0. with accelerated
-    #: features.
-    CU_TARGET_COMPUTE_90 = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_90{{endif}}
-    {{if 'CU_TARGET_COMPUTE_90A' in found_values}}
-    CU_TARGET_COMPUTE_90A = cydriver.CUjit_target_enum.CU_TARGET_COMPUTE_90A{{endif}}
-{{endif}}
-{{if 'CUjit_fallback_enum' in found_types}}
-
-class CUjit_fallback(IntEnum):
-    """
-    Cubin matching fallback strategies
-    """
-    {{if 'CU_PREFER_PTX' in found_values}}
-
-    #: Prefer to compile ptx if exact binary match not found
-    CU_PREFER_PTX = cydriver.CUjit_fallback_enum.CU_PREFER_PTX{{endif}}
-    {{if 'CU_PREFER_BINARY' in found_values}}
-
-    #: Prefer to fall back to compatible binary code if exact match not
-    #: found
-    CU_PREFER_BINARY = cydriver.CUjit_fallback_enum.CU_PREFER_BINARY{{endif}}
-{{endif}}
-{{if 'CUjit_cacheMode_enum' in found_types}}
-
-class CUjit_cacheMode(IntEnum):
-    """
-    Caching modes for dlcm
-    """
-    {{if 'CU_JIT_CACHE_OPTION_NONE' in found_values}}
-
-    #: Compile with no -dlcm flag specified
-    CU_JIT_CACHE_OPTION_NONE = cydriver.CUjit_cacheMode_enum.CU_JIT_CACHE_OPTION_NONE{{endif}}
-    {{if 'CU_JIT_CACHE_OPTION_CG' in found_values}}
-
-    #: Compile with L1 cache disabled
-    CU_JIT_CACHE_OPTION_CG = cydriver.CUjit_cacheMode_enum.CU_JIT_CACHE_OPTION_CG{{endif}}
-    {{if 'CU_JIT_CACHE_OPTION_CA' in found_values}}
-
-    #: Compile with L1 cache enabled
-    CU_JIT_CACHE_OPTION_CA = cydriver.CUjit_cacheMode_enum.CU_JIT_CACHE_OPTION_CA{{endif}}
-{{endif}}
-{{if 'CUjitInputType_enum' in found_types}}
-
-class CUjitInputType(IntEnum):
-    """
-    Device code formats
-    """
-    {{if 'CU_JIT_INPUT_CUBIN' in found_values}}
-
-    #: Compiled device-class-specific device code
-    #: Applicable options: none
-    CU_JIT_INPUT_CUBIN = cydriver.CUjitInputType_enum.CU_JIT_INPUT_CUBIN{{endif}}
-    {{if 'CU_JIT_INPUT_PTX' in found_values}}
-
-    #: PTX source code
-    #: Applicable options: PTX compiler options
-    CU_JIT_INPUT_PTX = cydriver.CUjitInputType_enum.CU_JIT_INPUT_PTX{{endif}}
-    {{if 'CU_JIT_INPUT_FATBINARY' in found_values}}
-
-    #: Bundle of multiple cubins and/or PTX of some device code
-    #: Applicable options: PTX compiler options,
-    #: :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-    CU_JIT_INPUT_FATBINARY = cydriver.CUjitInputType_enum.CU_JIT_INPUT_FATBINARY{{endif}}
-    {{if 'CU_JIT_INPUT_OBJECT' in found_values}}
-
-    #: Host object with embedded device code
-    #: Applicable options: PTX compiler options,
-    #: :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-    CU_JIT_INPUT_OBJECT = cydriver.CUjitInputType_enum.CU_JIT_INPUT_OBJECT{{endif}}
-    {{if 'CU_JIT_INPUT_LIBRARY' in found_values}}
-
-    #: Archive of host objects with embedded device code
-    #: Applicable options: PTX compiler options,
-    #: :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-    CU_JIT_INPUT_LIBRARY = cydriver.CUjitInputType_enum.CU_JIT_INPUT_LIBRARY{{endif}}
-    {{if 'CU_JIT_INPUT_NVVM' in found_values}}
-
-    #: [Deprecated]
-    #:
-    #: Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-    CU_JIT_INPUT_NVVM = cydriver.CUjitInputType_enum.CU_JIT_INPUT_NVVM{{endif}}
-    {{if 'CU_JIT_NUM_INPUT_TYPES' in found_values}}
-    CU_JIT_NUM_INPUT_TYPES = cydriver.CUjitInputType_enum.CU_JIT_NUM_INPUT_TYPES{{endif}}
-{{endif}}
-{{if 'CUgraphicsRegisterFlags_enum' in found_types}}
-
-class CUgraphicsRegisterFlags(IntEnum):
-    """
-    Flags to register a graphics resource
-    """
-    {{if 'CU_GRAPHICS_REGISTER_FLAGS_NONE' in found_values}}
-    CU_GRAPHICS_REGISTER_FLAGS_NONE = cydriver.CUgraphicsRegisterFlags_enum.CU_GRAPHICS_REGISTER_FLAGS_NONE{{endif}}
-    {{if 'CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY' in found_values}}
-    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = cydriver.CUgraphicsRegisterFlags_enum.CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY{{endif}}
-    {{if 'CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD' in found_values}}
-    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = cydriver.CUgraphicsRegisterFlags_enum.CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD{{endif}}
-    {{if 'CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST' in found_values}}
-    CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = cydriver.CUgraphicsRegisterFlags_enum.CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST{{endif}}
-    {{if 'CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER' in found_values}}
-    CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = cydriver.CUgraphicsRegisterFlags_enum.CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER{{endif}}
-{{endif}}
-{{if 'CUgraphicsMapResourceFlags_enum' in found_types}}
-
-class CUgraphicsMapResourceFlags(IntEnum):
-    """
-    Flags for mapping and unmapping interop resources
-    """
-    {{if 'CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE' in found_values}}
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = cydriver.CUgraphicsMapResourceFlags_enum.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE{{endif}}
-    {{if 'CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY' in found_values}}
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = cydriver.CUgraphicsMapResourceFlags_enum.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY{{endif}}
-    {{if 'CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD' in found_values}}
-    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = cydriver.CUgraphicsMapResourceFlags_enum.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD{{endif}}
-{{endif}}
-{{if 'CUarray_cubemap_face_enum' in found_types}}
-
-class CUarray_cubemap_face(IntEnum):
-    """
-    Array indices for cube faces
-    """
-    {{if 'CU_CUBEMAP_FACE_POSITIVE_X' in found_values}}
-
-    #: Positive X face of cubemap
-    CU_CUBEMAP_FACE_POSITIVE_X = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_POSITIVE_X{{endif}}
-    {{if 'CU_CUBEMAP_FACE_NEGATIVE_X' in found_values}}
-
-    #: Negative X face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_X = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_NEGATIVE_X{{endif}}
-    {{if 'CU_CUBEMAP_FACE_POSITIVE_Y' in found_values}}
-
-    #: Positive Y face of cubemap
-    CU_CUBEMAP_FACE_POSITIVE_Y = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_POSITIVE_Y{{endif}}
-    {{if 'CU_CUBEMAP_FACE_NEGATIVE_Y' in found_values}}
-
-    #: Negative Y face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_Y = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_NEGATIVE_Y{{endif}}
-    {{if 'CU_CUBEMAP_FACE_POSITIVE_Z' in found_values}}
-
-    #: Positive Z face of cubemap
-    CU_CUBEMAP_FACE_POSITIVE_Z = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_POSITIVE_Z{{endif}}
-    {{if 'CU_CUBEMAP_FACE_NEGATIVE_Z' in found_values}}
-
-    #: Negative Z face of cubemap
-    CU_CUBEMAP_FACE_NEGATIVE_Z = cydriver.CUarray_cubemap_face_enum.CU_CUBEMAP_FACE_NEGATIVE_Z{{endif}}
-{{endif}}
-{{if 'CUlimit_enum' in found_types}}
-
-class CUlimit(IntEnum):
-    """
-    Limits
-    """
-    {{if 'CU_LIMIT_STACK_SIZE' in found_values}}
-
-    #: GPU thread stack size
-    CU_LIMIT_STACK_SIZE = cydriver.CUlimit_enum.CU_LIMIT_STACK_SIZE{{endif}}
-    {{if 'CU_LIMIT_PRINTF_FIFO_SIZE' in found_values}}
-
-    #: GPU printf FIFO size
-    CU_LIMIT_PRINTF_FIFO_SIZE = cydriver.CUlimit_enum.CU_LIMIT_PRINTF_FIFO_SIZE{{endif}}
-    {{if 'CU_LIMIT_MALLOC_HEAP_SIZE' in found_values}}
-
-    #: GPU malloc heap size
-    CU_LIMIT_MALLOC_HEAP_SIZE = cydriver.CUlimit_enum.CU_LIMIT_MALLOC_HEAP_SIZE{{endif}}
-    {{if 'CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH' in found_values}}
-
-    #: GPU device runtime launch synchronize depth
-    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = cydriver.CUlimit_enum.CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH{{endif}}
-    {{if 'CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT' in found_values}}
-
-    #: GPU device runtime pending launch count
-    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = cydriver.CUlimit_enum.CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT{{endif}}
-    {{if 'CU_LIMIT_MAX_L2_FETCH_GRANULARITY' in found_values}}
-
-    #: A value between 0 and 128 that indicates the maximum fetch
-    #: granularity of L2 (in Bytes). This is a hint
-    CU_LIMIT_MAX_L2_FETCH_GRANULARITY = cydriver.CUlimit_enum.CU_LIMIT_MAX_L2_FETCH_GRANULARITY{{endif}}
-    {{if 'CU_LIMIT_PERSISTING_L2_CACHE_SIZE' in found_values}}
-
-    #: A size in bytes for L2 persisting lines cache size
-    CU_LIMIT_PERSISTING_L2_CACHE_SIZE = cydriver.CUlimit_enum.CU_LIMIT_PERSISTING_L2_CACHE_SIZE{{endif}}
-    {{if 'CU_LIMIT_SHMEM_SIZE' in found_values}}
-
-    #: A maximum size in bytes of shared memory available to CUDA kernels
-    #: on a CIG context. Can only be queried, cannot be set
-    CU_LIMIT_SHMEM_SIZE = cydriver.CUlimit_enum.CU_LIMIT_SHMEM_SIZE{{endif}}
-    {{if 'CU_LIMIT_CIG_ENABLED' in found_values}}
-
-    #: A non-zero value indicates this CUDA context is a CIG-enabled
-    #: context. Can only be queried, cannot be set
-    CU_LIMIT_CIG_ENABLED = cydriver.CUlimit_enum.CU_LIMIT_CIG_ENABLED{{endif}}
-    {{if 'CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED' in found_values}}
-
-    #: When set to a non-zero value, CUDA will fail to launch a kernel on a
-    #: CIG context, instead of using the fallback path, if the kernel uses
-    #: more shared memory than available
-    CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = cydriver.CUlimit_enum.CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED{{endif}}
-    {{if 'CU_LIMIT_MAX' in found_values}}
-    CU_LIMIT_MAX = cydriver.CUlimit_enum.CU_LIMIT_MAX{{endif}}
-{{endif}}
-{{if 'CUresourcetype_enum' in found_types}}
-
-class CUresourcetype(IntEnum):
-    """
-    Resource types
-    """
-    {{if 'CU_RESOURCE_TYPE_ARRAY' in found_values}}
-
-    #: Array resource
-    CU_RESOURCE_TYPE_ARRAY = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_ARRAY{{endif}}
-    {{if 'CU_RESOURCE_TYPE_MIPMAPPED_ARRAY' in found_values}}
-
-    #: Mipmapped array resource
-    CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY{{endif}}
-    {{if 'CU_RESOURCE_TYPE_LINEAR' in found_values}}
-
-    #: Linear resource
-    CU_RESOURCE_TYPE_LINEAR = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_LINEAR{{endif}}
-    {{if 'CU_RESOURCE_TYPE_PITCH2D' in found_values}}
-
-    #: Pitch 2D resource
-    CU_RESOURCE_TYPE_PITCH2D = cydriver.CUresourcetype_enum.CU_RESOURCE_TYPE_PITCH2D{{endif}}
-{{endif}}
-{{if 'CUaccessProperty_enum' in found_types}}
-
-class CUaccessProperty(IntEnum):
-    """
-    Specifies performance hint with :py:obj:`~.CUaccessPolicyWindow`
-    for hitProp and missProp members.
-    """
-    {{if 'CU_ACCESS_PROPERTY_NORMAL' in found_values}}
-
-    #: Normal cache persistence.
-    CU_ACCESS_PROPERTY_NORMAL = cydriver.CUaccessProperty_enum.CU_ACCESS_PROPERTY_NORMAL{{endif}}
-    {{if 'CU_ACCESS_PROPERTY_STREAMING' in found_values}}
-
-    #: Streaming access is less likely to persit from cache.
-    CU_ACCESS_PROPERTY_STREAMING = cydriver.CUaccessProperty_enum.CU_ACCESS_PROPERTY_STREAMING{{endif}}
-    {{if 'CU_ACCESS_PROPERTY_PERSISTING' in found_values}}
-
-    #: Persisting access is more likely to persist in cache.
-    CU_ACCESS_PROPERTY_PERSISTING = cydriver.CUaccessProperty_enum.CU_ACCESS_PROPERTY_PERSISTING{{endif}}
-{{endif}}
-{{if 'CUgraphConditionalNodeType_enum' in found_types}}
-
-class CUgraphConditionalNodeType(IntEnum):
-    """
-    Conditional node types
-    """
-    {{if 'CU_GRAPH_COND_TYPE_IF' in found_values}}
-
-    #: Conditional 'if' Node. Body executed once if condition value is non-
-    #: zero.
-    CU_GRAPH_COND_TYPE_IF = cydriver.CUgraphConditionalNodeType_enum.CU_GRAPH_COND_TYPE_IF{{endif}}
-    {{if 'CU_GRAPH_COND_TYPE_WHILE' in found_values}}
-
-    #: Conditional 'while' Node. Body executed repeatedly while condition
-    #: value is non-zero.
-    CU_GRAPH_COND_TYPE_WHILE = cydriver.CUgraphConditionalNodeType_enum.CU_GRAPH_COND_TYPE_WHILE{{endif}}
-{{endif}}
-{{if 'CUgraphNodeType_enum' in found_types}}
-
-class CUgraphNodeType(IntEnum):
-    """
-    Graph node types
-    """
-    {{if 'CU_GRAPH_NODE_TYPE_KERNEL' in found_values}}
-
-    #: GPU kernel node
-    CU_GRAPH_NODE_TYPE_KERNEL = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_KERNEL{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_MEMCPY' in found_values}}
-
-    #: Memcpy node
-    CU_GRAPH_NODE_TYPE_MEMCPY = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEMCPY{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_MEMSET' in found_values}}
-
-    #: Memset node
-    CU_GRAPH_NODE_TYPE_MEMSET = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEMSET{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_HOST' in found_values}}
-
-    #: Host (executable) node
-    CU_GRAPH_NODE_TYPE_HOST = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_HOST{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_GRAPH' in found_values}}
-
-    #: Node which executes an embedded graph
-    CU_GRAPH_NODE_TYPE_GRAPH = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_GRAPH{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_EMPTY' in found_values}}
-
-    #: Empty (no-op) node
-    CU_GRAPH_NODE_TYPE_EMPTY = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EMPTY{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_WAIT_EVENT' in found_values}}
-
-    #: External event wait node
-    CU_GRAPH_NODE_TYPE_WAIT_EVENT = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_WAIT_EVENT{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_EVENT_RECORD' in found_values}}
-
-    #: External event record node
-    CU_GRAPH_NODE_TYPE_EVENT_RECORD = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EVENT_RECORD{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL' in found_values}}
-
-    #: External semaphore signal node
-    CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT' in found_values}}
-
-    #: External semaphore wait node
-    CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_MEM_ALLOC' in found_values}}
-
-    #: Memory Allocation Node
-    CU_GRAPH_NODE_TYPE_MEM_ALLOC = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEM_ALLOC{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_MEM_FREE' in found_values}}
-
-    #: Memory Free Node
-    CU_GRAPH_NODE_TYPE_MEM_FREE = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_MEM_FREE{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_BATCH_MEM_OP' in found_values}}
-
-    #: Batch MemOp Node
-    CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_BATCH_MEM_OP{{endif}}
-    {{if 'CU_GRAPH_NODE_TYPE_CONDITIONAL' in found_values}}
-
-    #: Conditional Node                                         May be used
-    #: to implement a conditional execution path or loop
-    #:                                         inside of a graph. The
-    #: graph(s) contained within the body of the conditional node
-    #:                                         can be selectively executed
-    #: or iterated upon based on the value of a conditional
-    #:                                         variable.
-    #:
-    #:                                         Handles must be created in
-    #: advance of creating the node
-    #:                                         using
-    #: :py:obj:`~.cuGraphConditionalHandleCreate`.
-    #:
-    #:                                         The following restrictions
-    #: apply to graphs which contain conditional nodes:
-    #:                                          The graph cannot be used in
-    #: a child node.
-    #:                                          Only one instantiation of
-    #: the graph may exist at any point in time.
-    #:                                          The graph cannot be cloned.
-    #:
-    #:                                         To set the control value,
-    #: supply a default value when creating the handle and/or
-    #:                                         call
-    #: :py:obj:`~.cudaGraphSetConditional` from device code.
-    CU_GRAPH_NODE_TYPE_CONDITIONAL = cydriver.CUgraphNodeType_enum.CU_GRAPH_NODE_TYPE_CONDITIONAL{{endif}}
-{{endif}}
-{{if 'CUgraphDependencyType_enum' in found_types}}
-
-class CUgraphDependencyType(IntEnum):
-    """
-    Type annotations that can be applied to graph edges as part of
-    :py:obj:`~.CUgraphEdgeData`.
-    """
-    {{if 'CU_GRAPH_DEPENDENCY_TYPE_DEFAULT' in found_values}}
-
-    #: This is an ordinary dependency.
-    CU_GRAPH_DEPENDENCY_TYPE_DEFAULT = cydriver.CUgraphDependencyType_enum.CU_GRAPH_DEPENDENCY_TYPE_DEFAULT{{endif}}
-    {{if 'CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC' in found_values}}
-
-    #: This dependency type allows the downstream node to use
-    #: `cudaGridDependencySynchronize()`. It may only be used between
-    #: kernel nodes, and must be used with either the
-    #: :py:obj:`~.CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC` or
-    #: :py:obj:`~.CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER` outgoing port.
-    CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC = cydriver.CUgraphDependencyType_enum.CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC{{endif}}
-{{endif}}
-{{if 'CUgraphInstantiateResult_enum' in found_types}}
-
-class CUgraphInstantiateResult(IntEnum):
-    """
-    Graph instantiation results
-    """
-    {{if 'CUDA_GRAPH_INSTANTIATE_SUCCESS' in found_values}}
-
-    #: Instantiation succeeded
-    CUDA_GRAPH_INSTANTIATE_SUCCESS = cydriver.CUgraphInstantiateResult_enum.CUDA_GRAPH_INSTANTIATE_SUCCESS{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_ERROR' in found_values}}
-
-    #: Instantiation failed for an unexpected reason which is described in
-    #: the return value of the function
-    CUDA_GRAPH_INSTANTIATE_ERROR = cydriver.CUgraphInstantiateResult_enum.CUDA_GRAPH_INSTANTIATE_ERROR{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE' in found_values}}
-
-    #: Instantiation failed due to invalid structure, such as cycles
-    CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE = cydriver.CUgraphInstantiateResult_enum.CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED' in found_values}}
-
-    #: Instantiation for device launch failed because the graph contained
-    #: an unsupported operation
-    CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED = cydriver.CUgraphInstantiateResult_enum.CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED' in found_values}}
-
-    #: Instantiation for device launch failed due to the nodes belonging to
-    #: different contexts
-    CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED = cydriver.CUgraphInstantiateResult_enum.CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED{{endif}}
-{{endif}}
-{{if 'CUsynchronizationPolicy_enum' in found_types}}
-
-class CUsynchronizationPolicy(IntEnum):
-    """
-
-    """
-    {{if 'CU_SYNC_POLICY_AUTO' in found_values}}
-    CU_SYNC_POLICY_AUTO = cydriver.CUsynchronizationPolicy_enum.CU_SYNC_POLICY_AUTO{{endif}}
-    {{if 'CU_SYNC_POLICY_SPIN' in found_values}}
-    CU_SYNC_POLICY_SPIN = cydriver.CUsynchronizationPolicy_enum.CU_SYNC_POLICY_SPIN{{endif}}
-    {{if 'CU_SYNC_POLICY_YIELD' in found_values}}
-    CU_SYNC_POLICY_YIELD = cydriver.CUsynchronizationPolicy_enum.CU_SYNC_POLICY_YIELD{{endif}}
-    {{if 'CU_SYNC_POLICY_BLOCKING_SYNC' in found_values}}
-    CU_SYNC_POLICY_BLOCKING_SYNC = cydriver.CUsynchronizationPolicy_enum.CU_SYNC_POLICY_BLOCKING_SYNC{{endif}}
-{{endif}}
-{{if 'CUclusterSchedulingPolicy_enum' in found_types}}
-
-class CUclusterSchedulingPolicy(IntEnum):
-    """
-    Cluster scheduling policies. These may be passed to
-    :py:obj:`~.cuFuncSetAttribute` or :py:obj:`~.cuKernelSetAttribute`
-    """
-    {{if 'CU_CLUSTER_SCHEDULING_POLICY_DEFAULT' in found_values}}
-
-    #: the default policy
-    CU_CLUSTER_SCHEDULING_POLICY_DEFAULT = cydriver.CUclusterSchedulingPolicy_enum.CU_CLUSTER_SCHEDULING_POLICY_DEFAULT{{endif}}
-    {{if 'CU_CLUSTER_SCHEDULING_POLICY_SPREAD' in found_values}}
-
-    #: spread the blocks within a cluster to the SMs
-    CU_CLUSTER_SCHEDULING_POLICY_SPREAD = cydriver.CUclusterSchedulingPolicy_enum.CU_CLUSTER_SCHEDULING_POLICY_SPREAD{{endif}}
-    {{if 'CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING' in found_values}}
-
-    #: allow the hardware to load-balance the blocks in a cluster to the
-    #: SMs
-    CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING = cydriver.CUclusterSchedulingPolicy_enum.CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING{{endif}}
-{{endif}}
-{{if 'CUlaunchMemSyncDomain_enum' in found_types}}
-
-class CUlaunchMemSyncDomain(IntEnum):
-    """
-    Memory Synchronization Domain  A kernel can be launched in a
-    specified memory synchronization domain that affects all memory
-    operations issued by that kernel. A memory barrier issued in one
-    domain will only order memory operations in that domain, thus
-    eliminating latency increase from memory barriers ordering
-    unrelated traffic.  By default, kernels are launched in domain 0.
-    Kernel launched with :py:obj:`~.CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE`
-    will have a different domain ID. User may also alter the domain ID
-    with :py:obj:`~.CUlaunchMemSyncDomainMap` for a specific stream /
-    graph node / kernel launch. See
-    :py:obj:`~.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN`,
-    :py:obj:`~.cuStreamSetAttribute`, :py:obj:`~.cuLaunchKernelEx`,
-    :py:obj:`~.cuGraphKernelNodeSetAttribute`.  Memory operations done
-    in kernels launched in different domains are considered system-
-    scope distanced. In other words, a GPU scoped memory
-    synchronization is not sufficient for memory order to be observed
-    by kernels in another memory synchronization domain even if they
-    are on the same GPU.
-    """
-    {{if 'CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT' in found_values}}
-
-    #: Launch kernels in the default domain
-    CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT = cydriver.CUlaunchMemSyncDomain_enum.CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT{{endif}}
-    {{if 'CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE' in found_values}}
-
-    #: Launch kernels in the remote domain
-    CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE = cydriver.CUlaunchMemSyncDomain_enum.CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE{{endif}}
-{{endif}}
-{{if 'CUlaunchAttributeID_enum' in found_types}}
-
-class CUlaunchAttributeID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.CUlaunchAttribute`
-    """
-    {{if 'CU_LAUNCH_ATTRIBUTE_IGNORE' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    CU_LAUNCH_ATTRIBUTE_IGNORE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_IGNORE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.accessPolicyWindow`.
-    CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_COOPERATIVE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.cooperative`.
-    CU_LAUNCH_ATTRIBUTE_COOPERATIVE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_COOPERATIVE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.CUlaunchAttributeValue.syncPolicy`.
-    CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterDim`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event through PTX launchdep.release
-    #: or CUDA builtin function cudaTriggerProgrammaticLaunchCompletion().
-    #: A trigger can also be inserted at the beginning of each block's
-    #: execution if triggerAtBlockStart is set to non-0. The dependent
-    #: launches can choose to wait on the dependency using the programmatic
-    #: sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cuEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cuEventSynchronize()` may only observe the event trigger
-    #: long after the associated kernel has completed. This recording type
-    #: is primarily meant for establishing programmatic dependency between
-    #: device tasks. Note also this type of dependency allows, but does not
-    #: guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PRIORITY' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.priority`.
-    CU_LAUNCH_ATTRIBUTE_PRIORITY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PRIORITY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomainMap`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomain`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cuGraphDestroyNode`. Additionally, once opted-in to this
-    #: functionality, a node cannot opt out, and any attempt to set the
-    #: deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cuGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cuGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage
-    #: between 0-100 signals the CUDA driver to set the shared memory
-    #: carveout preference, in percent of the total shared memory for that
-    #: kernel launch. This attribute takes precedence over
-    #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This
-    #: is only a hint, and the CUDA driver can choose a different
-    #: configuration if required for the launch.
-    CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}}
-{{endif}}
-{{if 'CUstreamCaptureStatus_enum' in found_types}}
-
-class CUstreamCaptureStatus(IntEnum):
-    """
-    Possible stream capture statuses returned by
-    :py:obj:`~.cuStreamIsCapturing`
-    """
-    {{if 'CU_STREAM_CAPTURE_STATUS_NONE' in found_values}}
-
-    #: Stream is not capturing
-    CU_STREAM_CAPTURE_STATUS_NONE = cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_NONE{{endif}}
-    {{if 'CU_STREAM_CAPTURE_STATUS_ACTIVE' in found_values}}
-
-    #: Stream is actively capturing
-    CU_STREAM_CAPTURE_STATUS_ACTIVE = cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_ACTIVE{{endif}}
-    {{if 'CU_STREAM_CAPTURE_STATUS_INVALIDATED' in found_values}}
-
-    #: Stream is part of a capture sequence that has been invalidated, but
-    #: not terminated
-    CU_STREAM_CAPTURE_STATUS_INVALIDATED = cydriver.CUstreamCaptureStatus_enum.CU_STREAM_CAPTURE_STATUS_INVALIDATED{{endif}}
-{{endif}}
-{{if 'CUstreamCaptureMode_enum' in found_types}}
-
-class CUstreamCaptureMode(IntEnum):
-    """
-    Possible modes for stream capture thread interactions. For more
-    details see :py:obj:`~.cuStreamBeginCapture` and
-    :py:obj:`~.cuThreadExchangeStreamCaptureMode`
-    """
-    {{if 'CU_STREAM_CAPTURE_MODE_GLOBAL' in found_values}}
-    CU_STREAM_CAPTURE_MODE_GLOBAL = cydriver.CUstreamCaptureMode_enum.CU_STREAM_CAPTURE_MODE_GLOBAL{{endif}}
-    {{if 'CU_STREAM_CAPTURE_MODE_THREAD_LOCAL' in found_values}}
-    CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = cydriver.CUstreamCaptureMode_enum.CU_STREAM_CAPTURE_MODE_THREAD_LOCAL{{endif}}
-    {{if 'CU_STREAM_CAPTURE_MODE_RELAXED' in found_values}}
-    CU_STREAM_CAPTURE_MODE_RELAXED = cydriver.CUstreamCaptureMode_enum.CU_STREAM_CAPTURE_MODE_RELAXED{{endif}}
-{{endif}}
-{{if 'CUdriverProcAddress_flags_enum' in found_types}}
-
-class CUdriverProcAddress_flags(IntEnum):
-    """
-    Flags to specify search options. For more details see
-    :py:obj:`~.cuGetProcAddress`
-    """
-    {{if 'CU_GET_PROC_ADDRESS_DEFAULT' in found_values}}
-
-    #: Default search mode for driver symbols.
-    CU_GET_PROC_ADDRESS_DEFAULT = cydriver.CUdriverProcAddress_flags_enum.CU_GET_PROC_ADDRESS_DEFAULT{{endif}}
-    {{if 'CU_GET_PROC_ADDRESS_LEGACY_STREAM' in found_values}}
-
-    #: Search for legacy versions of driver symbols.
-    CU_GET_PROC_ADDRESS_LEGACY_STREAM = cydriver.CUdriverProcAddress_flags_enum.CU_GET_PROC_ADDRESS_LEGACY_STREAM{{endif}}
-    {{if 'CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM' in found_values}}
-
-    #: Search for per-thread versions of driver symbols.
-    CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = cydriver.CUdriverProcAddress_flags_enum.CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM{{endif}}
-{{endif}}
-{{if 'CUdriverProcAddressQueryResult_enum' in found_types}}
-
-class CUdriverProcAddressQueryResult(IntEnum):
-    """
-    Flags to indicate search status. For more details see
-    :py:obj:`~.cuGetProcAddress`
-    """
-    {{if 'CU_GET_PROC_ADDRESS_SUCCESS' in found_values}}
-
-    #: Symbol was succesfully found
-    CU_GET_PROC_ADDRESS_SUCCESS = cydriver.CUdriverProcAddressQueryResult_enum.CU_GET_PROC_ADDRESS_SUCCESS{{endif}}
-    {{if 'CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND' in found_values}}
-
-    #: Symbol was not found in search
-    CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = cydriver.CUdriverProcAddressQueryResult_enum.CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND{{endif}}
-    {{if 'CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT' in found_values}}
-
-    #: Symbol was found but version supplied was not sufficient
-    CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = cydriver.CUdriverProcAddressQueryResult_enum.CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT{{endif}}
-{{endif}}
-{{if 'CUexecAffinityType_enum' in found_types}}
-
-class CUexecAffinityType(IntEnum):
-    """
-    Execution Affinity Types
-    """
-    {{if 'CU_EXEC_AFFINITY_TYPE_SM_COUNT' in found_values}}
-
-    #: Create a context with limited SMs.
-    CU_EXEC_AFFINITY_TYPE_SM_COUNT = cydriver.CUexecAffinityType_enum.CU_EXEC_AFFINITY_TYPE_SM_COUNT{{endif}}
-    {{if 'CU_EXEC_AFFINITY_TYPE_MAX' in found_values}}
-    CU_EXEC_AFFINITY_TYPE_MAX = cydriver.CUexecAffinityType_enum.CU_EXEC_AFFINITY_TYPE_MAX{{endif}}
-{{endif}}
-{{if 'CUcigDataType_enum' in found_types}}
-
-class CUcigDataType(IntEnum):
-    """
-
-    """
-    {{if 'CIG_DATA_TYPE_D3D12_COMMAND_QUEUE' in found_values}}
-    CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = cydriver.CUcigDataType_enum.CIG_DATA_TYPE_D3D12_COMMAND_QUEUE{{endif}}
-{{endif}}
-{{if 'CUlibraryOption_enum' in found_types}}
-
-class CUlibraryOption(IntEnum):
-    """
-    Library options to be specified with
-    :py:obj:`~.cuLibraryLoadData()` or
-    :py:obj:`~.cuLibraryLoadFromFile()`
-    """
-    {{if 'CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE' in found_values}}
-    CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = cydriver.CUlibraryOption_enum.CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE{{endif}}
-    {{if 'CU_LIBRARY_BINARY_IS_PRESERVED' in found_values}}
-
-    #: Specifes that the argument `code` passed to
-    #: :py:obj:`~.cuLibraryLoadData()` will be preserved. Specifying this
-    #: option will let the driver know that `code` can be accessed at any
-    #: point until :py:obj:`~.cuLibraryUnload()`. The default behavior is
-    #: for the driver to allocate and maintain its own copy of `code`. Note
-    #: that this is only a memory usage optimization hint and the driver
-    #: can choose to ignore it if required. Specifying this option with
-    #: :py:obj:`~.cuLibraryLoadFromFile()` is invalid and will return
-    #: :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-    CU_LIBRARY_BINARY_IS_PRESERVED = cydriver.CUlibraryOption_enum.CU_LIBRARY_BINARY_IS_PRESERVED{{endif}}
-    {{if 'CU_LIBRARY_NUM_OPTIONS' in found_values}}
-    CU_LIBRARY_NUM_OPTIONS = cydriver.CUlibraryOption_enum.CU_LIBRARY_NUM_OPTIONS{{endif}}
-{{endif}}
-{{if 'cudaError_enum' in found_types}}
-
-class CUresult(IntEnum):
-    """
-    Error codes
-    """
-    {{if 'CUDA_SUCCESS' in found_values}}
-
-    #: The API call returned with no errors. In the case of query calls,
-    #: this also means that the operation being queried is complete (see
-    #: :py:obj:`~.cuEventQuery()` and :py:obj:`~.cuStreamQuery()`).
-    CUDA_SUCCESS = cydriver.cudaError_enum.CUDA_SUCCESS{{endif}}
-    {{if 'CUDA_ERROR_INVALID_VALUE' in found_values}}
-
-    #: This indicates that one or more of the parameters passed to the API
-    #: call is not within an acceptable range of values.
-    CUDA_ERROR_INVALID_VALUE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_VALUE{{endif}}
-    {{if 'CUDA_ERROR_OUT_OF_MEMORY' in found_values}}
-
-    #: The API call failed because it was unable to allocate enough memory
-    #: or other resources to perform the requested operation.
-    CUDA_ERROR_OUT_OF_MEMORY = cydriver.cudaError_enum.CUDA_ERROR_OUT_OF_MEMORY{{endif}}
-    {{if 'CUDA_ERROR_NOT_INITIALIZED' in found_values}}
-
-    #: This indicates that the CUDA driver has not been initialized with
-    #: :py:obj:`~.cuInit()` or that initialization has failed.
-    CUDA_ERROR_NOT_INITIALIZED = cydriver.cudaError_enum.CUDA_ERROR_NOT_INITIALIZED{{endif}}
-    {{if 'CUDA_ERROR_DEINITIALIZED' in found_values}}
-
-    #: This indicates that the CUDA driver is in the process of shutting
-    #: down.
-    CUDA_ERROR_DEINITIALIZED = cydriver.cudaError_enum.CUDA_ERROR_DEINITIALIZED{{endif}}
-    {{if 'CUDA_ERROR_PROFILER_DISABLED' in found_values}}
-
-    #: This indicates profiler is not initialized for this run. This can
-    #: happen when the application is running with external profiling tools
-    #: like visual profiler.
-    CUDA_ERROR_PROFILER_DISABLED = cydriver.cudaError_enum.CUDA_ERROR_PROFILER_DISABLED{{endif}}
-    {{if 'CUDA_ERROR_PROFILER_NOT_INITIALIZED' in found_values}}
-
-    #: [Deprecated]
-    CUDA_ERROR_PROFILER_NOT_INITIALIZED = cydriver.cudaError_enum.CUDA_ERROR_PROFILER_NOT_INITIALIZED{{endif}}
-    {{if 'CUDA_ERROR_PROFILER_ALREADY_STARTED' in found_values}}
-
-    #: [Deprecated]
-    CUDA_ERROR_PROFILER_ALREADY_STARTED = cydriver.cudaError_enum.CUDA_ERROR_PROFILER_ALREADY_STARTED{{endif}}
-    {{if 'CUDA_ERROR_PROFILER_ALREADY_STOPPED' in found_values}}
-
-    #: [Deprecated]
-    CUDA_ERROR_PROFILER_ALREADY_STOPPED = cydriver.cudaError_enum.CUDA_ERROR_PROFILER_ALREADY_STOPPED{{endif}}
-    {{if 'CUDA_ERROR_STUB_LIBRARY' in found_values}}
-
-    #: This indicates that the CUDA driver that the application has loaded
-    #: is a stub library. Applications that run with the stub rather than a
-    #: real driver loaded will result in CUDA API returning this error.
-    CUDA_ERROR_STUB_LIBRARY = cydriver.cudaError_enum.CUDA_ERROR_STUB_LIBRARY{{endif}}
-    {{if 'CUDA_ERROR_DEVICE_UNAVAILABLE' in found_values}}
-
-    #: This indicates that requested CUDA device is unavailable at the
-    #: current time. Devices are often unavailable due to use of
-    #: :py:obj:`~.CU_COMPUTEMODE_EXCLUSIVE_PROCESS` or
-    #: :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`.
-    CUDA_ERROR_DEVICE_UNAVAILABLE = cydriver.cudaError_enum.CUDA_ERROR_DEVICE_UNAVAILABLE{{endif}}
-    {{if 'CUDA_ERROR_NO_DEVICE' in found_values}}
-
-    #: This indicates that no CUDA-capable devices were detected by the
-    #: installed CUDA driver.
-    CUDA_ERROR_NO_DEVICE = cydriver.cudaError_enum.CUDA_ERROR_NO_DEVICE{{endif}}
-    {{if 'CUDA_ERROR_INVALID_DEVICE' in found_values}}
-
-    #: This indicates that the device ordinal supplied by the user does not
-    #: correspond to a valid CUDA device or that the action requested is
-    #: invalid for the specified device.
-    CUDA_ERROR_INVALID_DEVICE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_DEVICE{{endif}}
-    {{if 'CUDA_ERROR_DEVICE_NOT_LICENSED' in found_values}}
-
-    #: This error indicates that the Grid license is not applied.
-    CUDA_ERROR_DEVICE_NOT_LICENSED = cydriver.cudaError_enum.CUDA_ERROR_DEVICE_NOT_LICENSED{{endif}}
-    {{if 'CUDA_ERROR_INVALID_IMAGE' in found_values}}
-
-    #: This indicates that the device kernel image is invalid. This can
-    #: also indicate an invalid CUDA module.
-    CUDA_ERROR_INVALID_IMAGE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_IMAGE{{endif}}
-    {{if 'CUDA_ERROR_INVALID_CONTEXT' in found_values}}
-
-    #: This most frequently indicates that there is no context bound to the
-    #: current thread. This can also be returned if the context passed to
-    #: an API call is not a valid handle (such as a context that has had
-    #: :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned
-    #: if a user mixes different API versions (i.e. 3010 context with 3020
-    #: API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details.
-    #: This can also be returned if the green context passed to an API call
-    #: was not converted to a :py:obj:`~.CUcontext` using
-    #: :py:obj:`~.cuCtxFromGreenCtx` API.
-    CUDA_ERROR_INVALID_CONTEXT = cydriver.cudaError_enum.CUDA_ERROR_INVALID_CONTEXT{{endif}}
-    {{if 'CUDA_ERROR_CONTEXT_ALREADY_CURRENT' in found_values}}
-
-    #: This indicated that the context being supplied as a parameter to the
-    #: API call was already the active context. [Deprecated]
-    CUDA_ERROR_CONTEXT_ALREADY_CURRENT = cydriver.cudaError_enum.CUDA_ERROR_CONTEXT_ALREADY_CURRENT{{endif}}
-    {{if 'CUDA_ERROR_MAP_FAILED' in found_values}}
-
-    #: This indicates that a map or register operation has failed.
-    CUDA_ERROR_MAP_FAILED = cydriver.cudaError_enum.CUDA_ERROR_MAP_FAILED{{endif}}
-    {{if 'CUDA_ERROR_UNMAP_FAILED' in found_values}}
-
-    #: This indicates that an unmap or unregister operation has failed.
-    CUDA_ERROR_UNMAP_FAILED = cydriver.cudaError_enum.CUDA_ERROR_UNMAP_FAILED{{endif}}
-    {{if 'CUDA_ERROR_ARRAY_IS_MAPPED' in found_values}}
-
-    #: This indicates that the specified array is currently mapped and thus
-    #: cannot be destroyed.
-    CUDA_ERROR_ARRAY_IS_MAPPED = cydriver.cudaError_enum.CUDA_ERROR_ARRAY_IS_MAPPED{{endif}}
-    {{if 'CUDA_ERROR_ALREADY_MAPPED' in found_values}}
-
-    #: This indicates that the resource is already mapped.
-    CUDA_ERROR_ALREADY_MAPPED = cydriver.cudaError_enum.CUDA_ERROR_ALREADY_MAPPED{{endif}}
-    {{if 'CUDA_ERROR_NO_BINARY_FOR_GPU' in found_values}}
-
-    #: This indicates that there is no kernel image available that is
-    #: suitable for the device. This can occur when a user specifies code
-    #: generation options for a particular CUDA source file that do not
-    #: include the corresponding device configuration.
-    CUDA_ERROR_NO_BINARY_FOR_GPU = cydriver.cudaError_enum.CUDA_ERROR_NO_BINARY_FOR_GPU{{endif}}
-    {{if 'CUDA_ERROR_ALREADY_ACQUIRED' in found_values}}
-
-    #: This indicates that a resource has already been acquired.
-    CUDA_ERROR_ALREADY_ACQUIRED = cydriver.cudaError_enum.CUDA_ERROR_ALREADY_ACQUIRED{{endif}}
-    {{if 'CUDA_ERROR_NOT_MAPPED' in found_values}}
-
-    #: This indicates that a resource is not mapped.
-    CUDA_ERROR_NOT_MAPPED = cydriver.cudaError_enum.CUDA_ERROR_NOT_MAPPED{{endif}}
-    {{if 'CUDA_ERROR_NOT_MAPPED_AS_ARRAY' in found_values}}
-
-    #: This indicates that a mapped resource is not available for access as
-    #: an array.
-    CUDA_ERROR_NOT_MAPPED_AS_ARRAY = cydriver.cudaError_enum.CUDA_ERROR_NOT_MAPPED_AS_ARRAY{{endif}}
-    {{if 'CUDA_ERROR_NOT_MAPPED_AS_POINTER' in found_values}}
-
-    #: This indicates that a mapped resource is not available for access as
-    #: a pointer.
-    CUDA_ERROR_NOT_MAPPED_AS_POINTER = cydriver.cudaError_enum.CUDA_ERROR_NOT_MAPPED_AS_POINTER{{endif}}
-    {{if 'CUDA_ERROR_ECC_UNCORRECTABLE' in found_values}}
-
-    #: This indicates that an uncorrectable ECC error was detected during
-    #: execution.
-    CUDA_ERROR_ECC_UNCORRECTABLE = cydriver.cudaError_enum.CUDA_ERROR_ECC_UNCORRECTABLE{{endif}}
-    {{if 'CUDA_ERROR_UNSUPPORTED_LIMIT' in found_values}}
-
-    #: This indicates that the :py:obj:`~.CUlimit` passed to the API call
-    #: is not supported by the active device.
-    CUDA_ERROR_UNSUPPORTED_LIMIT = cydriver.cudaError_enum.CUDA_ERROR_UNSUPPORTED_LIMIT{{endif}}
-    {{if 'CUDA_ERROR_CONTEXT_ALREADY_IN_USE' in found_values}}
-
-    #: This indicates that the :py:obj:`~.CUcontext` passed to the API call
-    #: can only be bound to a single CPU thread at a time but is already
-    #: bound to a CPU thread.
-    CUDA_ERROR_CONTEXT_ALREADY_IN_USE = cydriver.cudaError_enum.CUDA_ERROR_CONTEXT_ALREADY_IN_USE{{endif}}
-    {{if 'CUDA_ERROR_PEER_ACCESS_UNSUPPORTED' in found_values}}
-
-    #: This indicates that peer access is not supported across the given
-    #: devices.
-    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = cydriver.cudaError_enum.CUDA_ERROR_PEER_ACCESS_UNSUPPORTED{{endif}}
-    {{if 'CUDA_ERROR_INVALID_PTX' in found_values}}
-
-    #: This indicates that a PTX JIT compilation failed.
-    CUDA_ERROR_INVALID_PTX = cydriver.cudaError_enum.CUDA_ERROR_INVALID_PTX{{endif}}
-    {{if 'CUDA_ERROR_INVALID_GRAPHICS_CONTEXT' in found_values}}
-
-    #: This indicates an error with OpenGL or DirectX context.
-    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = cydriver.cudaError_enum.CUDA_ERROR_INVALID_GRAPHICS_CONTEXT{{endif}}
-    {{if 'CUDA_ERROR_NVLINK_UNCORRECTABLE' in found_values}}
-
-    #: This indicates that an uncorrectable NVLink error was detected
-    #: during the execution.
-    CUDA_ERROR_NVLINK_UNCORRECTABLE = cydriver.cudaError_enum.CUDA_ERROR_NVLINK_UNCORRECTABLE{{endif}}
-    {{if 'CUDA_ERROR_JIT_COMPILER_NOT_FOUND' in found_values}}
-
-    #: This indicates that the PTX JIT compiler library was not found.
-    CUDA_ERROR_JIT_COMPILER_NOT_FOUND = cydriver.cudaError_enum.CUDA_ERROR_JIT_COMPILER_NOT_FOUND{{endif}}
-    {{if 'CUDA_ERROR_UNSUPPORTED_PTX_VERSION' in found_values}}
-
-    #: This indicates that the provided PTX was compiled with an
-    #: unsupported toolchain.
-    CUDA_ERROR_UNSUPPORTED_PTX_VERSION = cydriver.cudaError_enum.CUDA_ERROR_UNSUPPORTED_PTX_VERSION{{endif}}
-    {{if 'CUDA_ERROR_JIT_COMPILATION_DISABLED' in found_values}}
-
-    #: This indicates that the PTX JIT compilation was disabled.
-    CUDA_ERROR_JIT_COMPILATION_DISABLED = cydriver.cudaError_enum.CUDA_ERROR_JIT_COMPILATION_DISABLED{{endif}}
-    {{if 'CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY' in found_values}}
-
-    #: This indicates that the :py:obj:`~.CUexecAffinityType` passed to the
-    #: API call is not supported by the active device.
-    CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = cydriver.cudaError_enum.CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY{{endif}}
-    {{if 'CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC' in found_values}}
-
-    #: This indicates that the code to be compiled by the PTX JIT contains
-    #: unsupported call to cudaDeviceSynchronize.
-    CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = cydriver.cudaError_enum.CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC{{endif}}
-    {{if 'CUDA_ERROR_INVALID_SOURCE' in found_values}}
-
-    #: This indicates that the device kernel source is invalid. This
-    #: includes compilation/linker errors encountered in device code or
-    #: user error.
-    CUDA_ERROR_INVALID_SOURCE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_SOURCE{{endif}}
-    {{if 'CUDA_ERROR_FILE_NOT_FOUND' in found_values}}
-
-    #: This indicates that the file specified was not found.
-    CUDA_ERROR_FILE_NOT_FOUND = cydriver.cudaError_enum.CUDA_ERROR_FILE_NOT_FOUND{{endif}}
-    {{if 'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND' in found_values}}
-
-    #: This indicates that a link to a shared object failed to resolve.
-    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = cydriver.cudaError_enum.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND{{endif}}
-    {{if 'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED' in found_values}}
-
-    #: This indicates that initialization of a shared object failed.
-    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = cydriver.cudaError_enum.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED{{endif}}
-    {{if 'CUDA_ERROR_OPERATING_SYSTEM' in found_values}}
-
-    #: This indicates that an OS call failed.
-    CUDA_ERROR_OPERATING_SYSTEM = cydriver.cudaError_enum.CUDA_ERROR_OPERATING_SYSTEM{{endif}}
-    {{if 'CUDA_ERROR_INVALID_HANDLE' in found_values}}
-
-    #: This indicates that a resource handle passed to the API call was not
-    #: valid. Resource handles are opaque types like :py:obj:`~.CUstream`
-    #: and :py:obj:`~.CUevent`.
-    CUDA_ERROR_INVALID_HANDLE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_HANDLE{{endif}}
-    {{if 'CUDA_ERROR_ILLEGAL_STATE' in found_values}}
-
-    #: This indicates that a resource required by the API call is not in a
-    #: valid state to perform the requested operation.
-    CUDA_ERROR_ILLEGAL_STATE = cydriver.cudaError_enum.CUDA_ERROR_ILLEGAL_STATE{{endif}}
-    {{if 'CUDA_ERROR_LOSSY_QUERY' in found_values}}
-
-    #: This indicates an attempt was made to introspect an object in a way
-    #: that would discard semantically important information. This is
-    #: either due to the object using funtionality newer than the API
-    #: version used to introspect it or omission of optional return
-    #: arguments.
-    CUDA_ERROR_LOSSY_QUERY = cydriver.cudaError_enum.CUDA_ERROR_LOSSY_QUERY{{endif}}
-    {{if 'CUDA_ERROR_NOT_FOUND' in found_values}}
-
-    #: This indicates that a named symbol was not found. Examples of
-    #: symbols are global/constant variable names, driver function names,
-    #: texture names, and surface names.
-    CUDA_ERROR_NOT_FOUND = cydriver.cudaError_enum.CUDA_ERROR_NOT_FOUND{{endif}}
-    {{if 'CUDA_ERROR_NOT_READY' in found_values}}
-
-    #: This indicates that asynchronous operations issued previously have
-    #: not completed yet. This result is not actually an error, but must be
-    #: indicated differently than :py:obj:`~.CUDA_SUCCESS` (which indicates
-    #: completion). Calls that may return this value include
-    #: :py:obj:`~.cuEventQuery()` and :py:obj:`~.cuStreamQuery()`.
-    CUDA_ERROR_NOT_READY = cydriver.cudaError_enum.CUDA_ERROR_NOT_READY{{endif}}
-    {{if 'CUDA_ERROR_ILLEGAL_ADDRESS' in found_values}}
-
-    #: While executing a kernel, the device encountered a load or store
-    #: instruction on an invalid memory address. This leaves the process in
-    #: an inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    CUDA_ERROR_ILLEGAL_ADDRESS = cydriver.cudaError_enum.CUDA_ERROR_ILLEGAL_ADDRESS{{endif}}
-    {{if 'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES' in found_values}}
-
-    #: This indicates that a launch did not occur because it did not have
-    #: appropriate resources. This error usually indicates that the user
-    #: has attempted to pass too many arguments to the device kernel, or
-    #: the kernel launch specifies too many threads for the kernel's
-    #: register count. Passing arguments of the wrong size (i.e. a 64-bit
-    #: pointer when a 32-bit int is expected) is equivalent to passing too
-    #: many arguments and can also result in this error.
-    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = cydriver.cudaError_enum.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES{{endif}}
-    {{if 'CUDA_ERROR_LAUNCH_TIMEOUT' in found_values}}
-
-    #: This indicates that the device kernel took too long to execute. This
-    #: can only occur if timeouts are enabled - see the device attribute
-    #: :py:obj:`~.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT` for more
-    #: information. This leaves the process in an inconsistent state and
-    #: any further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    CUDA_ERROR_LAUNCH_TIMEOUT = cydriver.cudaError_enum.CUDA_ERROR_LAUNCH_TIMEOUT{{endif}}
-    {{if 'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING' in found_values}}
-
-    #: This error indicates a kernel launch that uses an incompatible
-    #: texturing mode.
-    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = cydriver.cudaError_enum.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING{{endif}}
-    {{if 'CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED' in found_values}}
-
-    #: This error indicates that a call to
-    #: :py:obj:`~.cuCtxEnablePeerAccess()` is trying to re-enable peer
-    #: access to a context which has already had peer access to it enabled.
-    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = cydriver.cudaError_enum.CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED{{endif}}
-    {{if 'CUDA_ERROR_PEER_ACCESS_NOT_ENABLED' in found_values}}
-
-    #: This error indicates that :py:obj:`~.cuCtxDisablePeerAccess()` is
-    #: trying to disable peer access which has not been enabled yet via
-    #: :py:obj:`~.cuCtxEnablePeerAccess()`.
-    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = cydriver.cudaError_enum.CUDA_ERROR_PEER_ACCESS_NOT_ENABLED{{endif}}
-    {{if 'CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE' in found_values}}
-
-    #: This error indicates that the primary context for the specified
-    #: device has already been initialized.
-    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = cydriver.cudaError_enum.CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE{{endif}}
-    {{if 'CUDA_ERROR_CONTEXT_IS_DESTROYED' in found_values}}
-
-    #: This error indicates that the context current to the calling thread
-    #: has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary
-    #: context which has not yet been initialized.
-    CUDA_ERROR_CONTEXT_IS_DESTROYED = cydriver.cudaError_enum.CUDA_ERROR_CONTEXT_IS_DESTROYED{{endif}}
-    {{if 'CUDA_ERROR_ASSERT' in found_values}}
-
-    #: A device-side assert triggered during kernel execution. The context
-    #: cannot be used anymore, and must be destroyed. All existing device
-    #: memory allocations from this context are invalid and must be
-    #: reconstructed if the program is to continue using CUDA.
-    CUDA_ERROR_ASSERT = cydriver.cudaError_enum.CUDA_ERROR_ASSERT{{endif}}
-    {{if 'CUDA_ERROR_TOO_MANY_PEERS' in found_values}}
-
-    #: This error indicates that the hardware resources required to enable
-    #: peer access have been exhausted for one or more of the devices
-    #: passed to :py:obj:`~.cuCtxEnablePeerAccess()`.
-    CUDA_ERROR_TOO_MANY_PEERS = cydriver.cudaError_enum.CUDA_ERROR_TOO_MANY_PEERS{{endif}}
-    {{if 'CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED' in found_values}}
-
-    #: This error indicates that the memory range passed to
-    #: :py:obj:`~.cuMemHostRegister()` has already been registered.
-    CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = cydriver.cudaError_enum.CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED{{endif}}
-    {{if 'CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED' in found_values}}
-
-    #: This error indicates that the pointer passed to
-    #: :py:obj:`~.cuMemHostUnregister()` does not correspond to any
-    #: currently registered memory region.
-    CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = cydriver.cudaError_enum.CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED{{endif}}
-    {{if 'CUDA_ERROR_HARDWARE_STACK_ERROR' in found_values}}
-
-    #: While executing a kernel, the device encountered a stack error. This
-    #: can be due to stack corruption or exceeding the stack size limit.
-    #: This leaves the process in an inconsistent state and any further
-    #: CUDA work will return the same error. To continue using CUDA, the
-    #: process must be terminated and relaunched.
-    CUDA_ERROR_HARDWARE_STACK_ERROR = cydriver.cudaError_enum.CUDA_ERROR_HARDWARE_STACK_ERROR{{endif}}
-    {{if 'CUDA_ERROR_ILLEGAL_INSTRUCTION' in found_values}}
-
-    #: While executing a kernel, the device encountered an illegal
-    #: instruction. This leaves the process in an inconsistent state and
-    #: any further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    CUDA_ERROR_ILLEGAL_INSTRUCTION = cydriver.cudaError_enum.CUDA_ERROR_ILLEGAL_INSTRUCTION{{endif}}
-    {{if 'CUDA_ERROR_MISALIGNED_ADDRESS' in found_values}}
-
-    #: While executing a kernel, the device encountered a load or store
-    #: instruction on a memory address which is not aligned. This leaves
-    #: the process in an inconsistent state and any further CUDA work will
-    #: return the same error. To continue using CUDA, the process must be
-    #: terminated and relaunched.
-    CUDA_ERROR_MISALIGNED_ADDRESS = cydriver.cudaError_enum.CUDA_ERROR_MISALIGNED_ADDRESS{{endif}}
-    {{if 'CUDA_ERROR_INVALID_ADDRESS_SPACE' in found_values}}
-
-    #: While executing a kernel, the device encountered an instruction
-    #: which can only operate on memory locations in certain address spaces
-    #: (global, shared, or local), but was supplied a memory address not
-    #: belonging to an allowed address space. This leaves the process in an
-    #: inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    CUDA_ERROR_INVALID_ADDRESS_SPACE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_ADDRESS_SPACE{{endif}}
-    {{if 'CUDA_ERROR_INVALID_PC' in found_values}}
-
-    #: While executing a kernel, the device program counter wrapped its
-    #: address space. This leaves the process in an inconsistent state and
-    #: any further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    CUDA_ERROR_INVALID_PC = cydriver.cudaError_enum.CUDA_ERROR_INVALID_PC{{endif}}
-    {{if 'CUDA_ERROR_LAUNCH_FAILED' in found_values}}
-
-    #: An exception occurred on the device while executing a kernel. Common
-    #: causes include dereferencing an invalid device pointer and accessing
-    #: out of bounds shared memory. Less common cases can be system
-    #: specific - more information about these cases can be found in the
-    #: system specific user guide. This leaves the process in an
-    #: inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    CUDA_ERROR_LAUNCH_FAILED = cydriver.cudaError_enum.CUDA_ERROR_LAUNCH_FAILED{{endif}}
-    {{if 'CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE' in found_values}}
-
-    #: This error indicates that the number of blocks launched per grid for
-    #: a kernel that was launched via either
-    #: :py:obj:`~.cuLaunchCooperativeKernel` or
-    #: :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` exceeds the maximum
-    #: number of blocks as allowed by
-    #: :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor` or
-    #: :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
-    #: times the number of multiprocessors as specified by the device
-    #: attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`.
-    CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = cydriver.cudaError_enum.CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE{{endif}}
-    {{if 'CUDA_ERROR_NOT_PERMITTED' in found_values}}
-
-    #: This error indicates that the attempted operation is not permitted.
-    CUDA_ERROR_NOT_PERMITTED = cydriver.cudaError_enum.CUDA_ERROR_NOT_PERMITTED{{endif}}
-    {{if 'CUDA_ERROR_NOT_SUPPORTED' in found_values}}
-
-    #: This error indicates that the attempted operation is not supported
-    #: on the current system or device.
-    CUDA_ERROR_NOT_SUPPORTED = cydriver.cudaError_enum.CUDA_ERROR_NOT_SUPPORTED{{endif}}
-    {{if 'CUDA_ERROR_SYSTEM_NOT_READY' in found_values}}
-
-    #: This error indicates that the system is not yet ready to start any
-    #: CUDA work. To continue using CUDA, verify the system configuration
-    #: is in a valid state and all required driver daemons are actively
-    #: running. More information about this error can be found in the
-    #: system specific user guide.
-    CUDA_ERROR_SYSTEM_NOT_READY = cydriver.cudaError_enum.CUDA_ERROR_SYSTEM_NOT_READY{{endif}}
-    {{if 'CUDA_ERROR_SYSTEM_DRIVER_MISMATCH' in found_values}}
-
-    #: This error indicates that there is a mismatch between the versions
-    #: of the display driver and the CUDA driver. Refer to the
-    #: compatibility documentation for supported versions.
-    CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = cydriver.cudaError_enum.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH{{endif}}
-    {{if 'CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE' in found_values}}
-
-    #: This error indicates that the system was upgraded to run with
-    #: forward compatibility but the visible hardware detected by CUDA does
-    #: not support this configuration. Refer to the compatibility
-    #: documentation for the supported hardware matrix or ensure that only
-    #: supported hardware is visible during initialization via the
-    #: CUDA_VISIBLE_DEVICES environment variable.
-    CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = cydriver.cudaError_enum.CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE{{endif}}
-    {{if 'CUDA_ERROR_MPS_CONNECTION_FAILED' in found_values}}
-
-    #: This error indicates that the MPS client failed to connect to the
-    #: MPS control daemon or the MPS server.
-    CUDA_ERROR_MPS_CONNECTION_FAILED = cydriver.cudaError_enum.CUDA_ERROR_MPS_CONNECTION_FAILED{{endif}}
-    {{if 'CUDA_ERROR_MPS_RPC_FAILURE' in found_values}}
-
-    #: This error indicates that the remote procedural call between the MPS
-    #: server and the MPS client failed.
-    CUDA_ERROR_MPS_RPC_FAILURE = cydriver.cudaError_enum.CUDA_ERROR_MPS_RPC_FAILURE{{endif}}
-    {{if 'CUDA_ERROR_MPS_SERVER_NOT_READY' in found_values}}
-
-    #: This error indicates that the MPS server is not ready to accept new
-    #: MPS client requests. This error can be returned when the MPS server
-    #: is in the process of recovering from a fatal failure.
-    CUDA_ERROR_MPS_SERVER_NOT_READY = cydriver.cudaError_enum.CUDA_ERROR_MPS_SERVER_NOT_READY{{endif}}
-    {{if 'CUDA_ERROR_MPS_MAX_CLIENTS_REACHED' in found_values}}
-
-    #: This error indicates that the hardware resources required to create
-    #: MPS client have been exhausted.
-    CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = cydriver.cudaError_enum.CUDA_ERROR_MPS_MAX_CLIENTS_REACHED{{endif}}
-    {{if 'CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED' in found_values}}
-
-    #: This error indicates the the hardware resources required to support
-    #: device connections have been exhausted.
-    CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = cydriver.cudaError_enum.CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED{{endif}}
-    {{if 'CUDA_ERROR_MPS_CLIENT_TERMINATED' in found_values}}
-
-    #: This error indicates that the MPS client has been terminated by the
-    #: server. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    CUDA_ERROR_MPS_CLIENT_TERMINATED = cydriver.cudaError_enum.CUDA_ERROR_MPS_CLIENT_TERMINATED{{endif}}
-    {{if 'CUDA_ERROR_CDP_NOT_SUPPORTED' in found_values}}
-
-    #: This error indicates that the module is using CUDA Dynamic
-    #: Parallelism, but the current configuration, like MPS, does not
-    #: support it.
-    CUDA_ERROR_CDP_NOT_SUPPORTED = cydriver.cudaError_enum.CUDA_ERROR_CDP_NOT_SUPPORTED{{endif}}
-    {{if 'CUDA_ERROR_CDP_VERSION_MISMATCH' in found_values}}
-
-    #: This error indicates that a module contains an unsupported
-    #: interaction between different versions of CUDA Dynamic Parallelism.
-    CUDA_ERROR_CDP_VERSION_MISMATCH = cydriver.cudaError_enum.CUDA_ERROR_CDP_VERSION_MISMATCH{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED' in found_values}}
-
-    #: This error indicates that the operation is not permitted when the
-    #: stream is capturing.
-    CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_INVALIDATED' in found_values}}
-
-    #: This error indicates that the current capture sequence on the stream
-    #: has been invalidated due to a previous error.
-    CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_INVALIDATED{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_MERGE' in found_values}}
-
-    #: This error indicates that the operation would have resulted in a
-    #: merge of two independent capture sequences.
-    CUDA_ERROR_STREAM_CAPTURE_MERGE = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_MERGE{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_UNMATCHED' in found_values}}
-
-    #: This error indicates that the capture was not initiated in this
-    #: stream.
-    CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_UNMATCHED{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_UNJOINED' in found_values}}
-
-    #: This error indicates that the capture sequence contains a fork that
-    #: was not joined to the primary stream.
-    CUDA_ERROR_STREAM_CAPTURE_UNJOINED = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_UNJOINED{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_ISOLATION' in found_values}}
-
-    #: This error indicates that a dependency would have been created which
-    #: crosses the capture sequence boundary. Only implicit in-stream
-    #: ordering dependencies are allowed to cross the boundary.
-    CUDA_ERROR_STREAM_CAPTURE_ISOLATION = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_ISOLATION{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_IMPLICIT' in found_values}}
-
-    #: This error indicates a disallowed implicit dependency on a current
-    #: capture sequence from cudaStreamLegacy.
-    CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT{{endif}}
-    {{if 'CUDA_ERROR_CAPTURED_EVENT' in found_values}}
-
-    #: This error indicates that the operation is not permitted on an event
-    #: which was last recorded in a capturing stream.
-    CUDA_ERROR_CAPTURED_EVENT = cydriver.cudaError_enum.CUDA_ERROR_CAPTURED_EVENT{{endif}}
-    {{if 'CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD' in found_values}}
-
-    #: A stream capture sequence not initiated with the
-    #: :py:obj:`~.CU_STREAM_CAPTURE_MODE_RELAXED` argument to
-    #: :py:obj:`~.cuStreamBeginCapture` was passed to
-    #: :py:obj:`~.cuStreamEndCapture` in a different thread.
-    CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = cydriver.cudaError_enum.CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD{{endif}}
-    {{if 'CUDA_ERROR_TIMEOUT' in found_values}}
-
-    #: This error indicates that the timeout specified for the wait
-    #: operation has lapsed.
-    CUDA_ERROR_TIMEOUT = cydriver.cudaError_enum.CUDA_ERROR_TIMEOUT{{endif}}
-    {{if 'CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE' in found_values}}
-
-    #: This error indicates that the graph update was not performed because
-    #: it included changes which violated constraints specific to
-    #: instantiated graph update.
-    CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = cydriver.cudaError_enum.CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE{{endif}}
-    {{if 'CUDA_ERROR_EXTERNAL_DEVICE' in found_values}}
-
-    #: This indicates that an async error has occurred in a device outside
-    #: of CUDA. If CUDA was waiting for an external device's signal before
-    #: consuming shared data, the external device signaled an error
-    #: indicating that the data is not valid for consumption. This leaves
-    #: the process in an inconsistent state and any further CUDA work will
-    #: return the same error. To continue using CUDA, the process must be
-    #: terminated and relaunched.
-    CUDA_ERROR_EXTERNAL_DEVICE = cydriver.cudaError_enum.CUDA_ERROR_EXTERNAL_DEVICE{{endif}}
-    {{if 'CUDA_ERROR_INVALID_CLUSTER_SIZE' in found_values}}
-
-    #: Indicates a kernel launch error due to cluster misconfiguration.
-    CUDA_ERROR_INVALID_CLUSTER_SIZE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_CLUSTER_SIZE{{endif}}
-    {{if 'CUDA_ERROR_FUNCTION_NOT_LOADED' in found_values}}
-
-    #: Indiciates a function handle is not loaded when calling an API that
-    #: requires a loaded function.
-    CUDA_ERROR_FUNCTION_NOT_LOADED = cydriver.cudaError_enum.CUDA_ERROR_FUNCTION_NOT_LOADED{{endif}}
-    {{if 'CUDA_ERROR_INVALID_RESOURCE_TYPE' in found_values}}
-
-    #: This error indicates one or more resources passed in are not valid
-    #: resource types for the operation.
-    CUDA_ERROR_INVALID_RESOURCE_TYPE = cydriver.cudaError_enum.CUDA_ERROR_INVALID_RESOURCE_TYPE{{endif}}
-    {{if 'CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION' in found_values}}
-
-    #: This error indicates one or more resources are insufficient or non-
-    #: applicable for the operation.
-    CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = cydriver.cudaError_enum.CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION{{endif}}
-    {{if 'CUDA_ERROR_UNKNOWN' in found_values}}
-
-    #: This indicates that an unknown internal error has occurred.
-    CUDA_ERROR_UNKNOWN = cydriver.cudaError_enum.CUDA_ERROR_UNKNOWN{{endif}}
-{{endif}}
-{{if 'CUdevice_P2PAttribute_enum' in found_types}}
-
-class CUdevice_P2PAttribute(IntEnum):
-    """
-    P2P Attributes
-    """
-    {{if 'CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK' in found_values}}
-
-    #: A relative value indicating the performance of the link between two
-    #: devices
-    CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = cydriver.CUdevice_P2PAttribute_enum.CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK{{endif}}
-    {{if 'CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED' in found_values}}
-
-    #: P2P Access is enable
-    CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = cydriver.CUdevice_P2PAttribute_enum.CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED' in found_values}}
-
-    #: Atomic operation over the link supported
-    CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = cydriver.CUdevice_P2PAttribute_enum.CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED' in found_values}}
-
-    #: [Deprecated]
-    CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = cydriver.CUdevice_P2PAttribute_enum.CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED{{endif}}
-    {{if 'CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED' in found_values}}
-
-    #: Accessing CUDA arrays over the link supported
-    CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = cydriver.CUdevice_P2PAttribute_enum.CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED{{endif}}
-{{endif}}
-{{if 'CUresourceViewFormat_enum' in found_types}}
-
-class CUresourceViewFormat(IntEnum):
-    """
-    Resource view format
-    """
-    {{if 'CU_RES_VIEW_FORMAT_NONE' in found_values}}
-
-    #: No resource view format (use underlying resource format)
-    CU_RES_VIEW_FORMAT_NONE = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_NONE{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_1X8' in found_values}}
-
-    #: 1 channel unsigned 8-bit integers
-    CU_RES_VIEW_FORMAT_UINT_1X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_1X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_2X8' in found_values}}
-
-    #: 2 channel unsigned 8-bit integers
-    CU_RES_VIEW_FORMAT_UINT_2X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_2X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_4X8' in found_values}}
-
-    #: 4 channel unsigned 8-bit integers
-    CU_RES_VIEW_FORMAT_UINT_4X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_4X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_1X8' in found_values}}
-
-    #: 1 channel signed 8-bit integers
-    CU_RES_VIEW_FORMAT_SINT_1X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_1X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_2X8' in found_values}}
-
-    #: 2 channel signed 8-bit integers
-    CU_RES_VIEW_FORMAT_SINT_2X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_2X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_4X8' in found_values}}
-
-    #: 4 channel signed 8-bit integers
-    CU_RES_VIEW_FORMAT_SINT_4X8 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_4X8{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_1X16' in found_values}}
-
-    #: 1 channel unsigned 16-bit integers
-    CU_RES_VIEW_FORMAT_UINT_1X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_1X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_2X16' in found_values}}
-
-    #: 2 channel unsigned 16-bit integers
-    CU_RES_VIEW_FORMAT_UINT_2X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_2X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_4X16' in found_values}}
-
-    #: 4 channel unsigned 16-bit integers
-    CU_RES_VIEW_FORMAT_UINT_4X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_4X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_1X16' in found_values}}
-
-    #: 1 channel signed 16-bit integers
-    CU_RES_VIEW_FORMAT_SINT_1X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_1X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_2X16' in found_values}}
-
-    #: 2 channel signed 16-bit integers
-    CU_RES_VIEW_FORMAT_SINT_2X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_2X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_4X16' in found_values}}
-
-    #: 4 channel signed 16-bit integers
-    CU_RES_VIEW_FORMAT_SINT_4X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_4X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_1X32' in found_values}}
-
-    #: 1 channel unsigned 32-bit integers
-    CU_RES_VIEW_FORMAT_UINT_1X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_1X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_2X32' in found_values}}
-
-    #: 2 channel unsigned 32-bit integers
-    CU_RES_VIEW_FORMAT_UINT_2X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_2X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UINT_4X32' in found_values}}
-
-    #: 4 channel unsigned 32-bit integers
-    CU_RES_VIEW_FORMAT_UINT_4X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UINT_4X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_1X32' in found_values}}
-
-    #: 1 channel signed 32-bit integers
-    CU_RES_VIEW_FORMAT_SINT_1X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_1X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_2X32' in found_values}}
-
-    #: 2 channel signed 32-bit integers
-    CU_RES_VIEW_FORMAT_SINT_2X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_2X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SINT_4X32' in found_values}}
-
-    #: 4 channel signed 32-bit integers
-    CU_RES_VIEW_FORMAT_SINT_4X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SINT_4X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_1X16' in found_values}}
-
-    #: 1 channel 16-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_1X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_1X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_2X16' in found_values}}
-
-    #: 2 channel 16-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_2X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_2X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_4X16' in found_values}}
-
-    #: 4 channel 16-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_4X16 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_4X16{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_1X32' in found_values}}
-
-    #: 1 channel 32-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_1X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_1X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_2X32' in found_values}}
-
-    #: 2 channel 32-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_2X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_2X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_FLOAT_4X32' in found_values}}
-
-    #: 4 channel 32-bit floating point
-    CU_RES_VIEW_FORMAT_FLOAT_4X32 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_FLOAT_4X32{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC1' in found_values}}
-
-    #: Block compressed 1
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC1{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC2' in found_values}}
-
-    #: Block compressed 2
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC2{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC3' in found_values}}
-
-    #: Block compressed 3
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC3{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC4' in found_values}}
-
-    #: Block compressed 4 unsigned
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC4{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SIGNED_BC4' in found_values}}
-
-    #: Block compressed 4 signed
-    CU_RES_VIEW_FORMAT_SIGNED_BC4 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SIGNED_BC4{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC5' in found_values}}
-
-    #: Block compressed 5 unsigned
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC5{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SIGNED_BC5' in found_values}}
-
-    #: Block compressed 5 signed
-    CU_RES_VIEW_FORMAT_SIGNED_BC5 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SIGNED_BC5{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC6H' in found_values}}
-
-    #: Block compressed 6 unsigned half-float
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC6H{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_SIGNED_BC6H' in found_values}}
-
-    #: Block compressed 6 signed half-float
-    CU_RES_VIEW_FORMAT_SIGNED_BC6H = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_SIGNED_BC6H{{endif}}
-    {{if 'CU_RES_VIEW_FORMAT_UNSIGNED_BC7' in found_values}}
-
-    #: Block compressed 7
-    CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = cydriver.CUresourceViewFormat_enum.CU_RES_VIEW_FORMAT_UNSIGNED_BC7{{endif}}
-{{endif}}
-{{if 'CUtensorMapDataType_enum' in found_types}}
-
-class CUtensorMapDataType(IntEnum):
-    """
-    Tensor map data type
-    """
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_UINT8' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_UINT8 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_UINT8{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_UINT16' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_UINT16 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_UINT16{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_UINT32' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_UINT32 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_UINT32{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_INT32' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_INT32 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_INT32{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_UINT64' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_UINT64 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_UINT64{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_INT64' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_INT64 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_INT64{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_FLOAT16' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_FLOAT16 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_FLOAT16{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_FLOAT32' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_FLOAT32 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_FLOAT32{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_FLOAT64' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_FLOAT64 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_FLOAT64{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_BFLOAT16' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_BFLOAT16 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_BFLOAT16{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_TFLOAT32' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_TFLOAT32 = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_TFLOAT32{{endif}}
-    {{if 'CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ' in found_values}}
-    CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ = cydriver.CUtensorMapDataType_enum.CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ{{endif}}
-{{endif}}
-{{if 'CUtensorMapInterleave_enum' in found_types}}
-
-class CUtensorMapInterleave(IntEnum):
-    """
-    Tensor map interleave layout type
-    """
-    {{if 'CU_TENSOR_MAP_INTERLEAVE_NONE' in found_values}}
-    CU_TENSOR_MAP_INTERLEAVE_NONE = cydriver.CUtensorMapInterleave_enum.CU_TENSOR_MAP_INTERLEAVE_NONE{{endif}}
-    {{if 'CU_TENSOR_MAP_INTERLEAVE_16B' in found_values}}
-    CU_TENSOR_MAP_INTERLEAVE_16B = cydriver.CUtensorMapInterleave_enum.CU_TENSOR_MAP_INTERLEAVE_16B{{endif}}
-    {{if 'CU_TENSOR_MAP_INTERLEAVE_32B' in found_values}}
-    CU_TENSOR_MAP_INTERLEAVE_32B = cydriver.CUtensorMapInterleave_enum.CU_TENSOR_MAP_INTERLEAVE_32B{{endif}}
-{{endif}}
-{{if 'CUtensorMapSwizzle_enum' in found_types}}
-
-class CUtensorMapSwizzle(IntEnum):
-    """
-    Tensor map swizzling mode of shared memory banks
-    """
-    {{if 'CU_TENSOR_MAP_SWIZZLE_NONE' in found_values}}
-    CU_TENSOR_MAP_SWIZZLE_NONE = cydriver.CUtensorMapSwizzle_enum.CU_TENSOR_MAP_SWIZZLE_NONE{{endif}}
-    {{if 'CU_TENSOR_MAP_SWIZZLE_32B' in found_values}}
-    CU_TENSOR_MAP_SWIZZLE_32B = cydriver.CUtensorMapSwizzle_enum.CU_TENSOR_MAP_SWIZZLE_32B{{endif}}
-    {{if 'CU_TENSOR_MAP_SWIZZLE_64B' in found_values}}
-    CU_TENSOR_MAP_SWIZZLE_64B = cydriver.CUtensorMapSwizzle_enum.CU_TENSOR_MAP_SWIZZLE_64B{{endif}}
-    {{if 'CU_TENSOR_MAP_SWIZZLE_128B' in found_values}}
-    CU_TENSOR_MAP_SWIZZLE_128B = cydriver.CUtensorMapSwizzle_enum.CU_TENSOR_MAP_SWIZZLE_128B{{endif}}
-{{endif}}
-{{if 'CUtensorMapL2promotion_enum' in found_types}}
-
-class CUtensorMapL2promotion(IntEnum):
-    """
-    Tensor map L2 promotion type
-    """
-    {{if 'CU_TENSOR_MAP_L2_PROMOTION_NONE' in found_values}}
-    CU_TENSOR_MAP_L2_PROMOTION_NONE = cydriver.CUtensorMapL2promotion_enum.CU_TENSOR_MAP_L2_PROMOTION_NONE{{endif}}
-    {{if 'CU_TENSOR_MAP_L2_PROMOTION_L2_64B' in found_values}}
-    CU_TENSOR_MAP_L2_PROMOTION_L2_64B = cydriver.CUtensorMapL2promotion_enum.CU_TENSOR_MAP_L2_PROMOTION_L2_64B{{endif}}
-    {{if 'CU_TENSOR_MAP_L2_PROMOTION_L2_128B' in found_values}}
-    CU_TENSOR_MAP_L2_PROMOTION_L2_128B = cydriver.CUtensorMapL2promotion_enum.CU_TENSOR_MAP_L2_PROMOTION_L2_128B{{endif}}
-    {{if 'CU_TENSOR_MAP_L2_PROMOTION_L2_256B' in found_values}}
-    CU_TENSOR_MAP_L2_PROMOTION_L2_256B = cydriver.CUtensorMapL2promotion_enum.CU_TENSOR_MAP_L2_PROMOTION_L2_256B{{endif}}
-{{endif}}
-{{if 'CUtensorMapFloatOOBfill_enum' in found_types}}
-
-class CUtensorMapFloatOOBfill(IntEnum):
-    """
-    Tensor map out-of-bounds fill type
-    """
-    {{if 'CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE' in found_values}}
-    CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = cydriver.CUtensorMapFloatOOBfill_enum.CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE{{endif}}
-    {{if 'CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA' in found_values}}
-    CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA = cydriver.CUtensorMapFloatOOBfill_enum.CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA{{endif}}
-{{endif}}
-{{if 'CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum' in found_types}}
-
-class CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS(IntEnum):
-    """
-    Access flags that specify the level of access the current context's
-    device has on the memory referenced.
-    """
-    {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE' in found_values}}
-
-    #: No access, meaning the device cannot access this memory at all, thus
-    #: must be staged through accessible memory in order to complete
-    #: certain operations
-    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = cydriver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ' in found_values}}
-
-    #: Read-only access, meaning writes to this memory are considered
-    #: invalid accesses and thus return error in that case.
-    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = cydriver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ{{endif}}
-    {{if 'CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE' in found_values}}
-
-    #: Read-write access, the device has full read-write access to the
-    #: memory
-    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = cydriver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE{{endif}}
-{{endif}}
-{{if 'CUexternalMemoryHandleType_enum' in found_types}}
-
-class CUexternalMemoryHandleType(IntEnum):
-    """
-    External memory handle types
-    """
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD' in found_values}}
-
-    #: Handle is an opaque file descriptor
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32' in found_values}}
-
-    #: Handle is an opaque shared NT handle
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT' in found_values}}
-
-    #: Handle is an opaque, globally shared handle
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP' in found_values}}
-
-    #: Handle is a D3D12 heap object
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE' in found_values}}
-
-    #: Handle is a D3D12 committed resource
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE' in found_values}}
-
-    #: Handle is a shared NT handle to a D3D11 resource
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT' in found_values}}
-
-    #: Handle is a globally shared handle to a D3D11 resource
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT{{endif}}
-    {{if 'CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF' in found_values}}
-
-    #: Handle is an NvSciBuf object
-    CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF{{endif}}
-{{endif}}
-{{if 'CUexternalSemaphoreHandleType_enum' in found_types}}
-
-class CUexternalSemaphoreHandleType(IntEnum):
-    """
-    External semaphore handle types
-    """
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD' in found_values}}
-
-    #: Handle is an opaque file descriptor
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32' in found_values}}
-
-    #: Handle is an opaque shared NT handle
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT' in found_values}}
-
-    #: Handle is an opaque, globally shared handle
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D12 fence object
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D11 fence object
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC' in found_values}}
-
-    #: Opaque handle to NvSciSync Object
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D11 keyed mutex object
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT' in found_values}}
-
-    #: Handle is a globally shared handle referencing a D3D11 keyed mutex
-    #: object
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD' in found_values}}
-
-    #: Handle is an opaque file descriptor referencing a timeline semaphore
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD{{endif}}
-    {{if 'CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32' in found_values}}
-
-    #: Handle is an opaque shared NT handle referencing a timeline
-    #: semaphore
-    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32{{endif}}
-{{endif}}
-{{if 'CUmemAllocationHandleType_enum' in found_types}}
-
-class CUmemAllocationHandleType(IntEnum):
-    """
-    Flags for specifying particular handle types
-    """
-    {{if 'CU_MEM_HANDLE_TYPE_NONE' in found_values}}
-
-    #: Does not allow any export mechanism. >
-    CU_MEM_HANDLE_TYPE_NONE = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_NONE{{endif}}
-    {{if 'CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR' in found_values}}
-
-    #: Allows a file descriptor to be used for exporting. Permitted only on
-    #: POSIX systems. (int)
-    CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR{{endif}}
-    {{if 'CU_MEM_HANDLE_TYPE_WIN32' in found_values}}
-
-    #: Allows a Win32 NT handle to be used for exporting. (HANDLE)
-    CU_MEM_HANDLE_TYPE_WIN32 = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32{{endif}}
-    {{if 'CU_MEM_HANDLE_TYPE_WIN32_KMT' in found_values}}
-
-    #: Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
-    CU_MEM_HANDLE_TYPE_WIN32_KMT = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_WIN32_KMT{{endif}}
-    {{if 'CU_MEM_HANDLE_TYPE_FABRIC' in found_values}}
-
-    #: Allows a fabric handle to be used for exporting. (CUmemFabricHandle)
-    CU_MEM_HANDLE_TYPE_FABRIC = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_FABRIC{{endif}}
-    {{if 'CU_MEM_HANDLE_TYPE_MAX' in found_values}}
-    CU_MEM_HANDLE_TYPE_MAX = cydriver.CUmemAllocationHandleType_enum.CU_MEM_HANDLE_TYPE_MAX{{endif}}
-{{endif}}
-{{if 'CUmemAccess_flags_enum' in found_types}}
-
-class CUmemAccess_flags(IntEnum):
-    """
-    Specifies the memory protection flags for mapping.
-    """
-    {{if 'CU_MEM_ACCESS_FLAGS_PROT_NONE' in found_values}}
-
-    #: Default, make the address range not accessible
-    CU_MEM_ACCESS_FLAGS_PROT_NONE = cydriver.CUmemAccess_flags_enum.CU_MEM_ACCESS_FLAGS_PROT_NONE{{endif}}
-    {{if 'CU_MEM_ACCESS_FLAGS_PROT_READ' in found_values}}
-
-    #: Make the address range read accessible
-    CU_MEM_ACCESS_FLAGS_PROT_READ = cydriver.CUmemAccess_flags_enum.CU_MEM_ACCESS_FLAGS_PROT_READ{{endif}}
-    {{if 'CU_MEM_ACCESS_FLAGS_PROT_READWRITE' in found_values}}
-
-    #: Make the address range read-write accessible
-    CU_MEM_ACCESS_FLAGS_PROT_READWRITE = cydriver.CUmemAccess_flags_enum.CU_MEM_ACCESS_FLAGS_PROT_READWRITE{{endif}}
-    {{if 'CU_MEM_ACCESS_FLAGS_PROT_MAX' in found_values}}
-    CU_MEM_ACCESS_FLAGS_PROT_MAX = cydriver.CUmemAccess_flags_enum.CU_MEM_ACCESS_FLAGS_PROT_MAX{{endif}}
-{{endif}}
-{{if 'CUmemLocationType_enum' in found_types}}
-
-class CUmemLocationType(IntEnum):
-    """
-    Specifies the type of location
-    """
-    {{if 'CU_MEM_LOCATION_TYPE_INVALID' in found_values}}
-    CU_MEM_LOCATION_TYPE_INVALID = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_INVALID{{endif}}
-    {{if 'CU_MEM_LOCATION_TYPE_DEVICE' in found_values}}
-
-    #: Location is a device location, thus id is a device ordinal
-    CU_MEM_LOCATION_TYPE_DEVICE = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_DEVICE{{endif}}
-    {{if 'CU_MEM_LOCATION_TYPE_HOST' in found_values}}
-
-    #: Location is host, id is ignored
-    CU_MEM_LOCATION_TYPE_HOST = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_HOST{{endif}}
-    {{if 'CU_MEM_LOCATION_TYPE_HOST_NUMA' in found_values}}
-
-    #: Location is a host NUMA node, thus id is a host NUMA node id
-    CU_MEM_LOCATION_TYPE_HOST_NUMA = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_HOST_NUMA{{endif}}
-    {{if 'CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT' in found_values}}
-
-    #: Location is a host NUMA node of the current thread, id is ignored
-    CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT{{endif}}
-    {{if 'CU_MEM_LOCATION_TYPE_MAX' in found_values}}
-    CU_MEM_LOCATION_TYPE_MAX = cydriver.CUmemLocationType_enum.CU_MEM_LOCATION_TYPE_MAX{{endif}}
-{{endif}}
-{{if 'CUmemAllocationType_enum' in found_types}}
-
-class CUmemAllocationType(IntEnum):
-    """
-    Defines the allocation types available
-    """
-    {{if 'CU_MEM_ALLOCATION_TYPE_INVALID' in found_values}}
-    CU_MEM_ALLOCATION_TYPE_INVALID = cydriver.CUmemAllocationType_enum.CU_MEM_ALLOCATION_TYPE_INVALID{{endif}}
-    {{if 'CU_MEM_ALLOCATION_TYPE_PINNED' in found_values}}
-
-    #: This allocation type is 'pinned', i.e. cannot migrate from its
-    #: current location while the application is actively using it
-    CU_MEM_ALLOCATION_TYPE_PINNED = cydriver.CUmemAllocationType_enum.CU_MEM_ALLOCATION_TYPE_PINNED{{endif}}
-    {{if 'CU_MEM_ALLOCATION_TYPE_MAX' in found_values}}
-    CU_MEM_ALLOCATION_TYPE_MAX = cydriver.CUmemAllocationType_enum.CU_MEM_ALLOCATION_TYPE_MAX{{endif}}
-{{endif}}
-{{if 'CUmemAllocationGranularity_flags_enum' in found_types}}
-
-class CUmemAllocationGranularity_flags(IntEnum):
-    """
-    Flag for requesting different optimal and required granularities
-    for an allocation.
-    """
-    {{if 'CU_MEM_ALLOC_GRANULARITY_MINIMUM' in found_values}}
-
-    #: Minimum required granularity for allocation
-    CU_MEM_ALLOC_GRANULARITY_MINIMUM = cydriver.CUmemAllocationGranularity_flags_enum.CU_MEM_ALLOC_GRANULARITY_MINIMUM{{endif}}
-    {{if 'CU_MEM_ALLOC_GRANULARITY_RECOMMENDED' in found_values}}
-
-    #: Recommended granularity for allocation for best performance
-    CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = cydriver.CUmemAllocationGranularity_flags_enum.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED{{endif}}
-{{endif}}
-{{if 'CUmemRangeHandleType_enum' in found_types}}
-
-class CUmemRangeHandleType(IntEnum):
-    """
-    Specifies the handle type for address range
-    """
-    {{if 'CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD' in found_values}}
-    CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = cydriver.CUmemRangeHandleType_enum.CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD{{endif}}
-    {{if 'CU_MEM_RANGE_HANDLE_TYPE_MAX' in found_values}}
-    CU_MEM_RANGE_HANDLE_TYPE_MAX = cydriver.CUmemRangeHandleType_enum.CU_MEM_RANGE_HANDLE_TYPE_MAX{{endif}}
-{{endif}}
-{{if 'CUarraySparseSubresourceType_enum' in found_types}}
-
-class CUarraySparseSubresourceType(IntEnum):
-    """
-    Sparse subresource types
-    """
-    {{if 'CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL' in found_values}}
-    CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = cydriver.CUarraySparseSubresourceType_enum.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL{{endif}}
-    {{if 'CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL' in found_values}}
-    CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = cydriver.CUarraySparseSubresourceType_enum.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL{{endif}}
-{{endif}}
-{{if 'CUmemOperationType_enum' in found_types}}
-
-class CUmemOperationType(IntEnum):
-    """
-    Memory operation types
-    """
-    {{if 'CU_MEM_OPERATION_TYPE_MAP' in found_values}}
-    CU_MEM_OPERATION_TYPE_MAP = cydriver.CUmemOperationType_enum.CU_MEM_OPERATION_TYPE_MAP{{endif}}
-    {{if 'CU_MEM_OPERATION_TYPE_UNMAP' in found_values}}
-    CU_MEM_OPERATION_TYPE_UNMAP = cydriver.CUmemOperationType_enum.CU_MEM_OPERATION_TYPE_UNMAP{{endif}}
-{{endif}}
-{{if 'CUmemHandleType_enum' in found_types}}
-
-class CUmemHandleType(IntEnum):
-    """
-    Memory handle types
-    """
-    {{if 'CU_MEM_HANDLE_TYPE_GENERIC' in found_values}}
-    CU_MEM_HANDLE_TYPE_GENERIC = cydriver.CUmemHandleType_enum.CU_MEM_HANDLE_TYPE_GENERIC{{endif}}
-{{endif}}
-{{if 'CUmemAllocationCompType_enum' in found_types}}
-
-class CUmemAllocationCompType(IntEnum):
-    """
-    Specifies compression attribute for an allocation.
-    """
-    {{if 'CU_MEM_ALLOCATION_COMP_NONE' in found_values}}
-
-    #: Allocating non-compressible memory
-    CU_MEM_ALLOCATION_COMP_NONE = cydriver.CUmemAllocationCompType_enum.CU_MEM_ALLOCATION_COMP_NONE{{endif}}
-    {{if 'CU_MEM_ALLOCATION_COMP_GENERIC' in found_values}}
-
-    #: Allocating compressible memory
-    CU_MEM_ALLOCATION_COMP_GENERIC = cydriver.CUmemAllocationCompType_enum.CU_MEM_ALLOCATION_COMP_GENERIC{{endif}}
-{{endif}}
-{{if 'CUmulticastGranularity_flags_enum' in found_types}}
-
-class CUmulticastGranularity_flags(IntEnum):
-    """
-    Flags for querying different granularities for a multicast object
-    """
-    {{if 'CU_MULTICAST_GRANULARITY_MINIMUM' in found_values}}
-
-    #: Minimum required granularity
-    CU_MULTICAST_GRANULARITY_MINIMUM = cydriver.CUmulticastGranularity_flags_enum.CU_MULTICAST_GRANULARITY_MINIMUM{{endif}}
-    {{if 'CU_MULTICAST_GRANULARITY_RECOMMENDED' in found_values}}
-
-    #: Recommended granularity for best performance
-    CU_MULTICAST_GRANULARITY_RECOMMENDED = cydriver.CUmulticastGranularity_flags_enum.CU_MULTICAST_GRANULARITY_RECOMMENDED{{endif}}
-{{endif}}
-{{if 'CUgraphExecUpdateResult_enum' in found_types}}
-
-class CUgraphExecUpdateResult(IntEnum):
-    """
-    CUDA Graph Update error types
-    """
-    {{if 'CU_GRAPH_EXEC_UPDATE_SUCCESS' in found_values}}
-
-    #: The update succeeded
-    CU_GRAPH_EXEC_UPDATE_SUCCESS = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_SUCCESS{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR' in found_values}}
-
-    #: The update failed for an unexpected reason which is described in the
-    #: return value of the function
-    CU_GRAPH_EXEC_UPDATE_ERROR = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED' in found_values}}
-
-    #: The update failed because the topology changed
-    CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED' in found_values}}
-
-    #: The update failed because a node type changed
-    CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED' in found_values}}
-
-    #: The update failed because the function of a kernel node changed
-    #: (CUDA driver < 11.2)
-    CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED' in found_values}}
-
-    #: The update failed because the parameters changed in a way that is
-    #: not supported
-    CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED' in found_values}}
-
-    #: The update failed because something about the node is not supported
-    CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE' in found_values}}
-
-    #: The update failed because the function of a kernel node changed in
-    #: an unsupported way
-    CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE{{endif}}
-    {{if 'CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED' in found_values}}
-
-    #: The update failed because the node attributes changed in a way that
-    #: is not supported
-    CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED = cydriver.CUgraphExecUpdateResult_enum.CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED{{endif}}
-{{endif}}
-{{if 'CUmemPool_attribute_enum' in found_types}}
-
-class CUmemPool_attribute(IntEnum):
-    """
-    CUDA memory pool attributes
-    """
-    {{if 'CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES' in found_values}}
-
-    #: (value type = int) Allow cuMemAllocAsync to use memory
-    #: asynchronously freed in another streams as long as a stream ordering
-    #: dependency of the allocating stream on the free action exists. Cuda
-    #: events and null stream interactions can create the required stream
-    #: ordered dependencies. (default enabled)
-    CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC' in found_values}}
-
-    #: (value type = int) Allow reuse of already completed frees when there
-    #: is no dependency between the free and allocation. (default enabled)
-    CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES' in found_values}}
-
-    #: (value type = int) Allow cuMemAllocAsync to insert new stream
-    #: dependencies in order to establish the stream ordering required to
-    #: reuse a piece of memory released by cuFreeAsync (default enabled).
-    CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_RELEASE_THRESHOLD' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of reserved memory in bytes to hold
-    #: onto before trying to release memory back to the OS. When more than
-    #: the release threshold bytes of memory are held by the memory pool,
-    #: the allocator will try to release memory back to the OS on the next
-    #: call to stream, event or context synchronize. (default 0)
-    CU_MEMPOOL_ATTR_RELEASE_THRESHOLD = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of backing memory currently
-    #: allocated for the mempool.
-    CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of backing memory allocated
-    #: for the mempool since the last time it was reset. High watermark can
-    #: only be reset to zero.
-    CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_USED_MEM_CURRENT' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory from the pool that is
-    #: currently in use by the application.
-    CU_MEMPOOL_ATTR_USED_MEM_CURRENT = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_CURRENT{{endif}}
-    {{if 'CU_MEMPOOL_ATTR_USED_MEM_HIGH' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of the amount of memory
-    #: from the pool that was in use by the application since the last time
-    #: it was reset. High watermark can only be reset to zero.
-    CU_MEMPOOL_ATTR_USED_MEM_HIGH = cydriver.CUmemPool_attribute_enum.CU_MEMPOOL_ATTR_USED_MEM_HIGH{{endif}}
-{{endif}}
-{{if 'CUgraphMem_attribute_enum' in found_types}}
-
-class CUgraphMem_attribute(IntEnum):
-    """
-
-    """
-    {{if 'CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory, in bytes, currently
-    #: associated with graphs
-    CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT = cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT{{endif}}
-    {{if 'CU_GRAPH_MEM_ATTR_USED_MEM_HIGH' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of memory, in bytes,
-    #: associated with graphs since the last time it was reset. High
-    #: watermark can only be reset to zero.
-    CU_GRAPH_MEM_ATTR_USED_MEM_HIGH = cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH{{endif}}
-    {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory, in bytes, currently
-    #: allocated for use by the CUDA graphs asynchronous allocator.
-    CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT = cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT{{endif}}
-    {{if 'CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of memory, in bytes,
-    #: currently allocated for use by the CUDA graphs asynchronous
-    #: allocator.
-    CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH = cydriver.CUgraphMem_attribute_enum.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH{{endif}}
-{{endif}}
-{{if 'CUflushGPUDirectRDMAWritesOptions_enum' in found_types}}
-
-class CUflushGPUDirectRDMAWritesOptions(IntEnum):
-    """
-    Bitmasks for
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS`
-    """
-    {{if 'CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST' in found_values}}
-
-    #: :py:obj:`~.cuFlushGPUDirectRDMAWrites()` and its CUDA Runtime API
-    #: counterpart are supported on the device.
-    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = cydriver.CUflushGPUDirectRDMAWritesOptions_enum.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST{{endif}}
-    {{if 'CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS' in found_values}}
-
-    #: The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the
-    #: :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported
-    #: on the device.
-    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = cydriver.CUflushGPUDirectRDMAWritesOptions_enum.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS{{endif}}
-{{endif}}
-{{if 'CUGPUDirectRDMAWritesOrdering_enum' in found_types}}
-
-class CUGPUDirectRDMAWritesOrdering(IntEnum):
-    """
-    Platform native ordering for GPUDirect RDMA writes
-    """
-    {{if 'CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE' in found_values}}
-
-    #: The device does not natively support ordering of remote writes.
-    #: :py:obj:`~.cuFlushGPUDirectRDMAWrites()` can be leveraged if
-    #: supported.
-    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = cydriver.CUGPUDirectRDMAWritesOrdering_enum.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE{{endif}}
-    {{if 'CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER' in found_values}}
-
-    #: Natively, the device can consistently consume remote writes,
-    #: although other CUDA devices may not.
-    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = cydriver.CUGPUDirectRDMAWritesOrdering_enum.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER{{endif}}
-    {{if 'CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES' in found_values}}
-
-    #: Any CUDA device in the system can consistently consume remote writes
-    #: to this device.
-    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = cydriver.CUGPUDirectRDMAWritesOrdering_enum.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES{{endif}}
-{{endif}}
-{{if 'CUflushGPUDirectRDMAWritesScope_enum' in found_types}}
-
-class CUflushGPUDirectRDMAWritesScope(IntEnum):
-    """
-    The scopes for :py:obj:`~.cuFlushGPUDirectRDMAWrites`
-    """
-    {{if 'CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER' in found_values}}
-
-    #: Blocks until remote writes are visible to the CUDA device context
-    #: owning the data.
-    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = cydriver.CUflushGPUDirectRDMAWritesScope_enum.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER{{endif}}
-    {{if 'CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES' in found_values}}
-
-    #: Blocks until remote writes are visible to all CUDA device contexts.
-    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = cydriver.CUflushGPUDirectRDMAWritesScope_enum.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES{{endif}}
-{{endif}}
-{{if 'CUflushGPUDirectRDMAWritesTarget_enum' in found_types}}
-
-class CUflushGPUDirectRDMAWritesTarget(IntEnum):
-    """
-    The targets for :py:obj:`~.cuFlushGPUDirectRDMAWrites`
-    """
-    {{if 'CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX' in found_values}}
-
-    #: Sets the target for :py:obj:`~.cuFlushGPUDirectRDMAWrites()` to the
-    #: currently active CUDA device context.
-    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = cydriver.CUflushGPUDirectRDMAWritesTarget_enum.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX{{endif}}
-{{endif}}
-{{if 'CUgraphDebugDot_flags_enum' in found_types}}
-
-class CUgraphDebugDot_flags(IntEnum):
-    """
-    The additional write options for :py:obj:`~.cuGraphDebugDotPrint`
-    """
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE' in found_values}}
-
-    #: Output all debug data as if every debug flag is enabled
-    CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES' in found_values}}
-
-    #: Use CUDA Runtime structures for output
-    CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_KERNEL_NODE_PARAMS values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_MEMCPY3D values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_MEMSET_NODE_PARAMS values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_HOST_NODE_PARAMS values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS' in found_values}}
-
-    #: Adds CUevent handle from record and wait nodes to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_EXT_SEM_SIGNAL_NODE_PARAMS values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS' in found_values}}
-
-    #: Adds CUDA_EXT_SEM_WAIT_NODE_PARAMS values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES' in found_values}}
-
-    #: Adds CUkernelNodeAttrValue values to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES' in found_values}}
-
-    #: Adds node handles and every kernel function handle to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS' in found_values}}
-
-    #: Adds memory alloc node parameters to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS' in found_values}}
-
-    #: Adds memory free node parameters to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS' in found_values}}
-
-    #: Adds batch mem op node parameters to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO' in found_values}}
-
-    #: Adds edge numbering information
-    CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO{{endif}}
-    {{if 'CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS' in found_values}}
-
-    #: Adds conditional node parameters to output
-    CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS = cydriver.CUgraphDebugDot_flags_enum.CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS{{endif}}
-{{endif}}
-{{if 'CUuserObject_flags_enum' in found_types}}
-
-class CUuserObject_flags(IntEnum):
-    """
-    Flags for user objects for graphs
-    """
-    {{if 'CU_USER_OBJECT_NO_DESTRUCTOR_SYNC' in found_values}}
-
-    #: Indicates the destructor execution is not synchronized by any CUDA
-    #: handle.
-    CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = cydriver.CUuserObject_flags_enum.CU_USER_OBJECT_NO_DESTRUCTOR_SYNC{{endif}}
-{{endif}}
-{{if 'CUuserObjectRetain_flags_enum' in found_types}}
-
-class CUuserObjectRetain_flags(IntEnum):
-    """
-    Flags for retaining user object references for graphs
-    """
-    {{if 'CU_GRAPH_USER_OBJECT_MOVE' in found_values}}
-
-    #: Transfer references from the caller rather than creating new
-    #: references.
-    CU_GRAPH_USER_OBJECT_MOVE = cydriver.CUuserObjectRetain_flags_enum.CU_GRAPH_USER_OBJECT_MOVE{{endif}}
-{{endif}}
-{{if 'CUgraphInstantiate_flags_enum' in found_types}}
-
-class CUgraphInstantiate_flags(IntEnum):
-    """
-    Flags for instantiating a graph
-    """
-    {{if 'CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH' in found_values}}
-
-    #: Automatically free memory allocated in a graph before relaunching.
-    CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = cydriver.CUgraphInstantiate_flags_enum.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD' in found_values}}
-
-    #: Automatically upload the graph after instantiation. Only supported
-    #: by :py:obj:`~.cuGraphInstantiateWithParams`. The upload will be
-    #: performed using the stream provided in `instantiateParams`.
-    CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD = cydriver.CUgraphInstantiate_flags_enum.CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH' in found_values}}
-
-    #: Instantiate the graph to be launchable from the device. This flag
-    #: can only be used on platforms which support unified addressing. This
-    #: flag cannot be used in conjunction with
-    #: CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH.
-    CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH = cydriver.CUgraphInstantiate_flags_enum.CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH{{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY' in found_values}}
-
-    #: Run the graph using the per-node priority attributes rather than the
-    #: priority of the stream it is launched into.
-    CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = cydriver.CUgraphInstantiate_flags_enum.CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY{{endif}}
-{{endif}}
-{{if 'CUdeviceNumaConfig_enum' in found_types}}
-
-class CUdeviceNumaConfig(IntEnum):
-    """
-    CUDA device NUMA configuration
-    """
-    {{if 'CU_DEVICE_NUMA_CONFIG_NONE' in found_values}}
-
-    #: The GPU is not a NUMA node
-    CU_DEVICE_NUMA_CONFIG_NONE = cydriver.CUdeviceNumaConfig_enum.CU_DEVICE_NUMA_CONFIG_NONE{{endif}}
-    {{if 'CU_DEVICE_NUMA_CONFIG_NUMA_NODE' in found_values}}
-
-    #: The GPU is a NUMA node, CU_DEVICE_ATTRIBUTE_NUMA_ID contains its
-    #: NUMA ID
-    CU_DEVICE_NUMA_CONFIG_NUMA_NODE = cydriver.CUdeviceNumaConfig_enum.CU_DEVICE_NUMA_CONFIG_NUMA_NODE{{endif}}
-{{endif}}
-{{if 'CUmoduleLoadingMode_enum' in found_types}}
-
-class CUmoduleLoadingMode(IntEnum):
-    """
-    CUDA Lazy Loading status
-    """
-    {{if 'CU_MODULE_EAGER_LOADING' in found_values}}
-
-    #: Lazy Kernel Loading is not enabled
-    CU_MODULE_EAGER_LOADING = cydriver.CUmoduleLoadingMode_enum.CU_MODULE_EAGER_LOADING{{endif}}
-    {{if 'CU_MODULE_LAZY_LOADING' in found_values}}
-
-    #: Lazy Kernel Loading is enabled
-    CU_MODULE_LAZY_LOADING = cydriver.CUmoduleLoadingMode_enum.CU_MODULE_LAZY_LOADING{{endif}}
-{{endif}}
-{{if 'CUfunctionLoadingState_enum' in found_types}}
-
-class CUfunctionLoadingState(IntEnum):
-    """
-
-    """
-    {{if 'CU_FUNCTION_LOADING_STATE_UNLOADED' in found_values}}
-    CU_FUNCTION_LOADING_STATE_UNLOADED = cydriver.CUfunctionLoadingState_enum.CU_FUNCTION_LOADING_STATE_UNLOADED{{endif}}
-    {{if 'CU_FUNCTION_LOADING_STATE_LOADED' in found_values}}
-    CU_FUNCTION_LOADING_STATE_LOADED = cydriver.CUfunctionLoadingState_enum.CU_FUNCTION_LOADING_STATE_LOADED{{endif}}
-    {{if 'CU_FUNCTION_LOADING_STATE_MAX' in found_values}}
-    CU_FUNCTION_LOADING_STATE_MAX = cydriver.CUfunctionLoadingState_enum.CU_FUNCTION_LOADING_STATE_MAX{{endif}}
-{{endif}}
-{{if 'CUcoredumpSettings_enum' in found_types}}
-
-class CUcoredumpSettings(IntEnum):
-    """
-    Flags for choosing a coredump attribute to get/set
-    """
-    {{if 'CU_COREDUMP_ENABLE_ON_EXCEPTION' in found_values}}
-    CU_COREDUMP_ENABLE_ON_EXCEPTION = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_ON_EXCEPTION{{endif}}
-    {{if 'CU_COREDUMP_TRIGGER_HOST' in found_values}}
-    CU_COREDUMP_TRIGGER_HOST = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_TRIGGER_HOST{{endif}}
-    {{if 'CU_COREDUMP_LIGHTWEIGHT' in found_values}}
-    CU_COREDUMP_LIGHTWEIGHT = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_LIGHTWEIGHT{{endif}}
-    {{if 'CU_COREDUMP_ENABLE_USER_TRIGGER' in found_values}}
-    CU_COREDUMP_ENABLE_USER_TRIGGER = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_ENABLE_USER_TRIGGER{{endif}}
-    {{if 'CU_COREDUMP_FILE' in found_values}}
-    CU_COREDUMP_FILE = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_FILE{{endif}}
-    {{if 'CU_COREDUMP_PIPE' in found_values}}
-    CU_COREDUMP_PIPE = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_PIPE{{endif}}
-    {{if 'CU_COREDUMP_GENERATION_FLAGS' in found_values}}
-    CU_COREDUMP_GENERATION_FLAGS = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_GENERATION_FLAGS{{endif}}
-    {{if 'CU_COREDUMP_MAX' in found_values}}
-    CU_COREDUMP_MAX = cydriver.CUcoredumpSettings_enum.CU_COREDUMP_MAX{{endif}}
-{{endif}}
-{{if 'CUCoredumpGenerationFlags' in found_types}}
-
-class CUCoredumpGenerationFlags(IntEnum):
-    """
-    Flags for controlling coredump contents
-    """
-    {{if 'CU_COREDUMP_DEFAULT_FLAGS' in found_values}}
-    CU_COREDUMP_DEFAULT_FLAGS = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_DEFAULT_FLAGS{{endif}}
-    {{if 'CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES' in found_values}}
-    CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES{{endif}}
-    {{if 'CU_COREDUMP_SKIP_GLOBAL_MEMORY' in found_values}}
-    CU_COREDUMP_SKIP_GLOBAL_MEMORY = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_GLOBAL_MEMORY{{endif}}
-    {{if 'CU_COREDUMP_SKIP_SHARED_MEMORY' in found_values}}
-    CU_COREDUMP_SKIP_SHARED_MEMORY = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_SHARED_MEMORY{{endif}}
-    {{if 'CU_COREDUMP_SKIP_LOCAL_MEMORY' in found_values}}
-    CU_COREDUMP_SKIP_LOCAL_MEMORY = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_LOCAL_MEMORY{{endif}}
-    {{if 'CU_COREDUMP_SKIP_ABORT' in found_values}}
-    CU_COREDUMP_SKIP_ABORT = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_ABORT{{endif}}
-    {{if 'CU_COREDUMP_SKIP_CONSTBANK_MEMORY' in found_values}}
-    CU_COREDUMP_SKIP_CONSTBANK_MEMORY = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_CONSTBANK_MEMORY{{endif}}
-    {{if 'CU_COREDUMP_LIGHTWEIGHT_FLAGS' in found_values}}
-    CU_COREDUMP_LIGHTWEIGHT_FLAGS = cydriver.CUCoredumpGenerationFlags.CU_COREDUMP_LIGHTWEIGHT_FLAGS{{endif}}
-{{endif}}
-{{if 'CUgreenCtxCreate_flags' in found_types}}
-
-class CUgreenCtxCreate_flags(IntEnum):
-    """
-
-    """
-    {{if 'CU_GREEN_CTX_DEFAULT_STREAM' in found_values}}
-
-    #: Required. Creates a default stream to use inside the green context
-    CU_GREEN_CTX_DEFAULT_STREAM = cydriver.CUgreenCtxCreate_flags.CU_GREEN_CTX_DEFAULT_STREAM{{endif}}
-{{endif}}
-{{if 'CUdevSmResourceSplit_flags' in found_types}}
-
-class CUdevSmResourceSplit_flags(IntEnum):
-    """
-
-    """
-    {{if 'CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING' in found_values}}
-    CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = cydriver.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING{{endif}}
-    {{if 'CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE' in found_values}}
-    CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = cydriver.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE{{endif}}
-{{endif}}
-{{if 'CUdevResourceType' in found_types}}
-
-class CUdevResourceType(IntEnum):
-    """
-    Type of resource
-    """
-    {{if 'CU_DEV_RESOURCE_TYPE_INVALID' in found_values}}
-    CU_DEV_RESOURCE_TYPE_INVALID = cydriver.CUdevResourceType.CU_DEV_RESOURCE_TYPE_INVALID{{endif}}
-    {{if 'CU_DEV_RESOURCE_TYPE_SM' in found_values}}
-
-    #: Streaming multiprocessors related information
-    CU_DEV_RESOURCE_TYPE_SM = cydriver.CUdevResourceType.CU_DEV_RESOURCE_TYPE_SM{{endif}}
-{{endif}}
-{{if 'CUoutput_mode_enum' in found_types}}
-
-class CUoutput_mode(IntEnum):
-    """
-    Profiler Output Modes
-    """
-    {{if 'CU_OUT_KEY_VALUE_PAIR' in found_values}}
-
-    #: Output mode Key-Value pair format.
-    CU_OUT_KEY_VALUE_PAIR = cydriver.CUoutput_mode_enum.CU_OUT_KEY_VALUE_PAIR{{endif}}
-    {{if 'CU_OUT_CSV' in found_values}}
-
-    #: Output mode Comma separated values format.
-    CU_OUT_CSV = cydriver.CUoutput_mode_enum.CU_OUT_CSV{{endif}}
-{{endif}}
-{{if True}}
-
-class CUeglFrameType(IntEnum):
-    """
-    CUDA EglFrame type - array or pointer
-    """
-    {{if True}}
-
-    #: Frame type CUDA array
-    CU_EGL_FRAME_TYPE_ARRAY = cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_ARRAY{{endif}}
-    {{if True}}
-
-    #: Frame type pointer
-    CU_EGL_FRAME_TYPE_PITCH = cydriver.CUeglFrameType_enum.CU_EGL_FRAME_TYPE_PITCH{{endif}}
-{{endif}}
-{{if True}}
-
-class CUeglResourceLocationFlags(IntEnum):
-    """
-    Resource location flags- sysmem or vidmem  For CUDA context on
-    iGPU, since video and system memory are equivalent - these flags
-    will not have an effect on the execution.  For CUDA context on
-    dGPU, applications can use the flag
-    :py:obj:`~.CUeglResourceLocationFlags` to give a hint about the
-    desired location.  :py:obj:`~.CU_EGL_RESOURCE_LOCATION_SYSMEM` -
-    the frame data is made resident on the system memory to be accessed
-    by CUDA.  :py:obj:`~.CU_EGL_RESOURCE_LOCATION_VIDMEM` - the frame
-    data is made resident on the dedicated video memory to be accessed
-    by CUDA.  There may be an additional latency due to new allocation
-    and data migration, if the frame is produced on a different memory.
-    """
-    {{if True}}
-
-    #: Resource location sysmem
-    CU_EGL_RESOURCE_LOCATION_SYSMEM = cydriver.CUeglResourceLocationFlags_enum.CU_EGL_RESOURCE_LOCATION_SYSMEM{{endif}}
-    {{if True}}
-
-    #: Resource location vidmem
-    CU_EGL_RESOURCE_LOCATION_VIDMEM = cydriver.CUeglResourceLocationFlags_enum.CU_EGL_RESOURCE_LOCATION_VIDMEM{{endif}}
-{{endif}}
-{{if True}}
-
-class CUeglColorFormat(IntEnum):
-    """
-    CUDA EGL Color Format - The different planar and multiplanar
-    formats currently supported for CUDA_EGL interops. Three channel
-    formats are currently not supported for
-    :py:obj:`~.CU_EGL_FRAME_TYPE_ARRAY`
-    """
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
-    #: width, height ratio same as YUV420Planar.
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV422_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces with VU byte ordering, width, height ratio
-    #: same as YUV422Planar.
-    CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: R/G/B three channels in one surface with BGR byte ordering. Only
-    #: pitch linear format supported.
-    CU_EGL_COLOR_FORMAT_RGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RGB{{endif}}
-    {{if True}}
-
-    #: R/G/B three channels in one surface with RGB byte ordering. Only
-    #: pitch linear format supported.
-    CU_EGL_COLOR_FORMAT_BGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BGR{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with BGRA byte ordering.
-    CU_EGL_COLOR_FORMAT_ARGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ARGB{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with ABGR byte ordering.
-    CU_EGL_COLOR_FORMAT_RGBA = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RGBA{{endif}}
-    {{if True}}
-
-    #: single luminance channel in one surface.
-    CU_EGL_COLOR_FORMAT_L = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_L{{endif}}
-    {{if True}}
-
-    #: single color channel in one surface.
-    CU_EGL_COLOR_FORMAT_R = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_R{{endif}}
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width = Y
-    #: width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV444_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
-    #: width, height ratio same as YUV444Planar.
-    CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as UYVY in one channel.
-    CU_EGL_COLOR_FORMAT_YUYV_422 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_422{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as YUYV in one channel.
-    CU_EGL_COLOR_FORMAT_UYVY_422 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_422{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with RGBA byte ordering.
-    CU_EGL_COLOR_FORMAT_ABGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_ABGR{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with ARGB byte ordering.
-    CU_EGL_COLOR_FORMAT_BGRA = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BGRA{{endif}}
-    {{if True}}
-
-    #: Alpha color format - one channel in one surface.
-    CU_EGL_COLOR_FORMAT_A = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_A{{endif}}
-    {{if True}}
-
-    #: R/G color format - two channels in one surface with GR byte ordering
-    CU_EGL_COLOR_FORMAT_RG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_RG{{endif}}
-    {{if True}}
-
-    #: Y, U, V, A four channels in one surface, interleaved as VUYA.
-    CU_EGL_COLOR_FORMAT_AYUV = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = 1/2 Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as YVYU in one
-    #: channel.
-    CU_EGL_COLOR_FORMAT_VYUY_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as YUYV in one
-    #: channel.
-    CU_EGL_COLOR_FORMAT_UYVY_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_UYVY_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as UYVY in one
-    #: channel.
-    CU_EGL_COLOR_FORMAT_YUYV_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUYV_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as VYUY in one
-    #: channel.
-    CU_EGL_COLOR_FORMAT_YVYU_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V three channels in one surface, interleaved as
-    #: VUY. Only pitch linear format supported.
-    CU_EGL_COLOR_FORMAT_YUV_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V, A four channels in one surface, interleaved
-    #: as AVUY.
-    CU_EGL_COLOR_FORMAT_YUVA_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V, A four channels in one surface, interleaved
-    #: as VUYA.
-    CU_EGL_COLOR_FORMAT_AYUV_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_AYUV_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V
-    #: height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V
-    #: height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved RGGB
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved BGGR
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved GRBG
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved GBRG
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER10_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER10_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER10_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER10_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER14_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER14_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER14_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER14_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER14_GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER20_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER20_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER20_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER20_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER20_GBRG{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width = Y
-    #: width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU444_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU444_PLANAR{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_YVU422_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU422_PLANAR{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved RGGB ordering and mapped to opaque integer
-    #: datatype.
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved BGGR ordering and mapped to opaque integer
-    #: datatype.
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved GRBG ordering and mapped to opaque integer
-    #: datatype.
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved GBRG ordering and mapped to opaque integer
-    #: datatype.
-    CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved BCCR
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_BCCR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_BCCR{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved RCCB
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_RCCB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_RCCB{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved CRBC
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_CRBC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CRBC{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved CBRC
-    #: ordering.
-    CU_EGL_COLOR_FORMAT_BAYER_CBRC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER_CBRC{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved CCCC
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER10_CCCC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER10_CCCC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved BCCR
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_BCCR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_BCCR{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved RCCB
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_RCCB = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_RCCB{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CRBC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_CRBC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CRBC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CBRC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_CBRC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CBRC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CCCC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    CU_EGL_COLOR_FORMAT_BAYER12_CCCC = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_BAYER12_CCCC{{endif}}
-    {{if True}}
-
-    #: Color format for single Y plane.
-    CU_EGL_COLOR_FORMAT_Y = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height= 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709{{endif}}
-    {{if True}}
-
-    #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709{{endif}}
-    {{if True}}
-
-    #: Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y
-    #: width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y
-    #: width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709 = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y plane.
-    CU_EGL_COLOR_FORMAT_Y_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y plane.
-    CU_EGL_COLOR_FORMAT_Y_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y10 plane.
-    CU_EGL_COLOR_FORMAT_Y10_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y10 plane.
-    CU_EGL_COLOR_FORMAT_Y10_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y12 plane.
-    CU_EGL_COLOR_FORMAT_Y12_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y12 plane.
-    CU_EGL_COLOR_FORMAT_Y12_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12_709_ER{{endif}}
-    {{if True}}
-
-    #: Y, U, V, A four channels in one surface, interleaved as AVUY.
-    CU_EGL_COLOR_FORMAT_YUVA = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUVA{{endif}}
-    {{if True}}
-
-    #: Y, U, V three channels in one surface, interleaved as VUY. Only
-    #: pitch linear format supported.
-    CU_EGL_COLOR_FORMAT_YUV = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YUV{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as YVYU in one channel.
-    CU_EGL_COLOR_FORMAT_YVYU = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_YVYU{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as VYUY in one channel.
-    CU_EGL_COLOR_FORMAT_VYUY = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_VYUY{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER{{endif}}
-    {{if True}}
-    CU_EGL_COLOR_FORMAT_MAX = cydriver.CUeglColorFormat_enum.CU_EGL_COLOR_FORMAT_MAX{{endif}}
-{{endif}}
-{{if True}}
-
-class CUGLDeviceList(IntEnum):
-    """
-    CUDA devices corresponding to an OpenGL device
-    """
-    {{if True}}
-
-    #: The CUDA devices for all GPUs used by the current OpenGL context
-    CU_GL_DEVICE_LIST_ALL = cydriver.CUGLDeviceList_enum.CU_GL_DEVICE_LIST_ALL{{endif}}
-    {{if True}}
-
-    #: The CUDA devices for the GPUs used by the current OpenGL context in
-    #: its currently rendering frame
-    CU_GL_DEVICE_LIST_CURRENT_FRAME = cydriver.CUGLDeviceList_enum.CU_GL_DEVICE_LIST_CURRENT_FRAME{{endif}}
-    {{if True}}
-
-    #: The CUDA devices for the GPUs to be used by the current OpenGL
-    #: context in the next frame
-    CU_GL_DEVICE_LIST_NEXT_FRAME = cydriver.CUGLDeviceList_enum.CU_GL_DEVICE_LIST_NEXT_FRAME{{endif}}
-{{endif}}
-{{if True}}
-
-class CUGLmap_flags(IntEnum):
-    """
-    Flags to map or unmap a resource
-    """
-    {{if True}}
-    CU_GL_MAP_RESOURCE_FLAGS_NONE = cydriver.CUGLmap_flags_enum.CU_GL_MAP_RESOURCE_FLAGS_NONE{{endif}}
-    {{if True}}
-    CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = cydriver.CUGLmap_flags_enum.CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY{{endif}}
-    {{if True}}
-    CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = cydriver.CUGLmap_flags_enum.CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD{{endif}}
-{{endif}}
-{{if 'CUdeviceptr' in found_types}}
-
-cdef class CUdeviceptr:
-    """
-
-    CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdeviceptr *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUdeviceptr ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUdevice' in found_types}}
-
-cdef class CUdevice:
-    """
-
-    CUDA device
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdevice *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUdevice ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUtexObject' in found_types}}
-
-cdef class CUtexObject:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUtexObject *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUtexObject ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUsurfObject' in found_types}}
-
-cdef class CUsurfObject:
-    """
-
-    An opaque value that represents a CUDA surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUsurfObject *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUsurfObject ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraphConditionalHandle' in found_types}}
-
-cdef class CUgraphConditionalHandle:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint64_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUgraphConditionalHandle *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUgraphConditionalHandle ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint64_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUlaunchAttributeID_enum' in found_types}}
-
-class CUkernelNodeAttrID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.CUlaunchAttribute`
-    """
-    {{if 'CU_LAUNCH_ATTRIBUTE_IGNORE' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    CU_LAUNCH_ATTRIBUTE_IGNORE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_IGNORE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.accessPolicyWindow`.
-    CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_COOPERATIVE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.cooperative`.
-    CU_LAUNCH_ATTRIBUTE_COOPERATIVE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_COOPERATIVE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.CUlaunchAttributeValue.syncPolicy`.
-    CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterDim`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event through PTX launchdep.release
-    #: or CUDA builtin function cudaTriggerProgrammaticLaunchCompletion().
-    #: A trigger can also be inserted at the beginning of each block's
-    #: execution if triggerAtBlockStart is set to non-0. The dependent
-    #: launches can choose to wait on the dependency using the programmatic
-    #: sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cuEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cuEventSynchronize()` may only observe the event trigger
-    #: long after the associated kernel has completed. This recording type
-    #: is primarily meant for establishing programmatic dependency between
-    #: device tasks. Note also this type of dependency allows, but does not
-    #: guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PRIORITY' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.priority`.
-    CU_LAUNCH_ATTRIBUTE_PRIORITY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PRIORITY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomainMap`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomain`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cuGraphDestroyNode`. Additionally, once opted-in to this
-    #: functionality, a node cannot opt out, and any attempt to set the
-    #: deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cuGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cuGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage
-    #: between 0-100 signals the CUDA driver to set the shared memory
-    #: carveout preference, in percent of the total shared memory for that
-    #: kernel launch. This attribute takes precedence over
-    #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This
-    #: is only a hint, and the CUDA driver can choose a different
-    #: configuration if required for the launch.
-    CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}}
-{{endif}}
-{{if 'CUlaunchAttributeID_enum' in found_types}}
-
-class CUstreamAttrID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.CUlaunchAttribute`
-    """
-    {{if 'CU_LAUNCH_ATTRIBUTE_IGNORE' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    CU_LAUNCH_ATTRIBUTE_IGNORE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_IGNORE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.accessPolicyWindow`.
-    CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_COOPERATIVE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.cooperative`.
-    CU_LAUNCH_ATTRIBUTE_COOPERATIVE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_COOPERATIVE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.CUlaunchAttributeValue.syncPolicy`.
-    CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterDim`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event through PTX launchdep.release
-    #: or CUDA builtin function cudaTriggerProgrammaticLaunchCompletion().
-    #: A trigger can also be inserted at the beginning of each block's
-    #: execution if triggerAtBlockStart is set to non-0. The dependent
-    #: launches can choose to wait on the dependency using the programmatic
-    #: sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cuEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cuEventSynchronize()` may only observe the event trigger
-    #: long after the associated kernel has completed. This recording type
-    #: is primarily meant for establishing programmatic dependency between
-    #: device tasks. Note also this type of dependency allows, but does not
-    #: guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PRIORITY' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.priority`.
-    CU_LAUNCH_ATTRIBUTE_PRIORITY = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PRIORITY{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomainMap`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.CUlaunchAttributeValue.memSyncDomain`.
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.CUlaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-    CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cuGraphDestroyNode`. Additionally, once opted-in to this
-    #: functionality, a node cannot opt out, and any attempt to set the
-    #: deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cuGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cuGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE{{endif}}
-    {{if 'CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage
-    #: between 0-100 signals the CUDA driver to set the shared memory
-    #: carveout preference, in percent of the total shared memory for that
-    #: kernel launch. This attribute takes precedence over
-    #: :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This
-    #: is only a hint, and the CUDA driver can choose a different
-    #: configuration if required for the launch.
-    CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = cydriver.CUlaunchAttributeID_enum.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT{{endif}}
-{{endif}}
-{{if 'CUmemGenericAllocationHandle' in found_types}}
-
-cdef class CUmemGenericAllocationHandle:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemGenericAllocationHandle *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUmemGenericAllocationHandle ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUcontext' in found_types}}
-
-cdef class CUcontext:
-    """
-
-    A regular context handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUcontext>init_value
-        else:
-            self._ptr = <cydriver.CUcontext *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUcontext ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUmodule' in found_types}}
-
-cdef class CUmodule:
-    """
-
-    CUDA module
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUmodule>init_value
-        else:
-            self._ptr = <cydriver.CUmodule *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUmodule ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUfunction' in found_types}}
-
-cdef class CUfunction:
-    """
-
-    CUDA function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUfunction>init_value
-        else:
-            self._ptr = <cydriver.CUfunction *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUfunction ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUlibrary' in found_types}}
-
-cdef class CUlibrary:
-    """
-
-    CUDA library
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUlibrary>init_value
-        else:
-            self._ptr = <cydriver.CUlibrary *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUlibrary ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUkernel' in found_types}}
-
-cdef class CUkernel:
-    """
-
-    CUDA kernel
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUkernel>init_value
-        else:
-            self._ptr = <cydriver.CUkernel *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUkernel ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUarray' in found_types}}
-
-cdef class CUarray:
-    """
-
-    CUDA array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUarray>init_value
-        else:
-            self._ptr = <cydriver.CUarray *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUarray ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUmipmappedArray' in found_types}}
-
-cdef class CUmipmappedArray:
-    """
-
-    CUDA mipmapped array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUmipmappedArray>init_value
-        else:
-            self._ptr = <cydriver.CUmipmappedArray *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUmipmappedArray ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUtexref' in found_types}}
-
-cdef class CUtexref:
-    """
-
-    CUDA texture reference
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUtexref>init_value
-        else:
-            self._ptr = <cydriver.CUtexref *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUtexref ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUsurfref' in found_types}}
-
-cdef class CUsurfref:
-    """
-
-    CUDA surface reference
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUsurfref>init_value
-        else:
-            self._ptr = <cydriver.CUsurfref *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUsurfref ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUevent' in found_types}}
-
-cdef class CUevent:
-    """
-
-    CUDA event
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUevent>init_value
-        else:
-            self._ptr = <cydriver.CUevent *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUevent ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUstream' in found_types}}
-
-cdef class CUstream:
-    """
-
-    CUDA stream
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUstream>init_value
-        else:
-            self._ptr = <cydriver.CUstream *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUstream ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraphicsResource' in found_types}}
-
-cdef class CUgraphicsResource:
-    """
-
-    CUDA graphics interop resource
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgraphicsResource>init_value
-        else:
-            self._ptr = <cydriver.CUgraphicsResource *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgraphicsResource ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUexternalMemory' in found_types}}
-
-cdef class CUexternalMemory:
-    """
-
-    CUDA external memory
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUexternalMemory>init_value
-        else:
-            self._ptr = <cydriver.CUexternalMemory *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUexternalMemory ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUexternalSemaphore' in found_types}}
-
-cdef class CUexternalSemaphore:
-    """
-
-    CUDA external semaphore
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUexternalSemaphore>init_value
-        else:
-            self._ptr = <cydriver.CUexternalSemaphore *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUexternalSemaphore ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraph' in found_types}}
-
-cdef class CUgraph:
-    """
-
-    CUDA graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgraph>init_value
-        else:
-            self._ptr = <cydriver.CUgraph *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgraph ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraphNode' in found_types}}
-
-cdef class CUgraphNode:
-    """
-
-    CUDA graph node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgraphNode>init_value
-        else:
-            self._ptr = <cydriver.CUgraphNode *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgraphNode ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraphExec' in found_types}}
-
-cdef class CUgraphExec:
-    """
-
-    CUDA executable graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgraphExec>init_value
-        else:
-            self._ptr = <cydriver.CUgraphExec *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgraphExec ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUmemoryPool' in found_types}}
-
-cdef class CUmemoryPool:
-    """
-
-    CUDA memory pool
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUmemoryPool>init_value
-        else:
-            self._ptr = <cydriver.CUmemoryPool *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUmemoryPool ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUuserObject' in found_types}}
-
-cdef class CUuserObject:
-    """
-
-    CUDA user object for graphs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUuserObject>init_value
-        else:
-            self._ptr = <cydriver.CUuserObject *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUuserObject ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgraphDeviceNode' in found_types}}
-
-cdef class CUgraphDeviceNode:
-    """
-
-    CUDA graph device node handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgraphDeviceNode>init_value
-        else:
-            self._ptr = <cydriver.CUgraphDeviceNode *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgraphDeviceNode ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUasyncCallbackHandle' in found_types}}
-
-cdef class CUasyncCallbackHandle:
-    """
-
-    CUDA async notification callback handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUasyncCallbackHandle>init_value
-        else:
-            self._ptr = <cydriver.CUasyncCallbackHandle *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUasyncCallbackHandle ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUgreenCtx' in found_types}}
-
-cdef class CUgreenCtx:
-    """
-
-    A green context handle. This handle can be used safely from only one CPU thread at a time. Created via cuGreenCtxCreate
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUgreenCtx>init_value
-        else:
-            self._ptr = <cydriver.CUgreenCtx *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUgreenCtx ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUlinkState' in found_types}}
-
-cdef class CUlinkState:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUlinkState>init_value
-        else:
-            self._ptr = <cydriver.CUlinkState *>_ptr
-    def __init__(self, *args, **kwargs):
-        self._keepalive = []
-    def __repr__(self):
-        return '<CUlinkState ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUdevResourceDesc' in found_types}}
-
-cdef class CUdevResourceDesc:
-    """
-
-    An opaque descriptor handle. The descriptor encapsulates multiple created and configured resources. Created via cuDevResourceGenerateDesc
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUdevResourceDesc>init_value
-        else:
-            self._ptr = <cydriver.CUdevResourceDesc *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUdevResourceDesc ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class CUeglStreamConnection:
-    """
-
-    CUDA EGLSream Connection
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUeglStreamConnection>init_value
-        else:
-            self._ptr = <cydriver.CUeglStreamConnection *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUeglStreamConnection ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLImageKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.EGLImageKHR>init_value
-        else:
-            self._ptr = <cydriver.EGLImageKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLImageKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLStreamKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.EGLStreamKHR>init_value
-        else:
-            self._ptr = <cydriver.EGLStreamKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLStreamKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLSyncKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.EGLSyncKHR>init_value
-        else:
-            self._ptr = <cydriver.EGLSyncKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLSyncKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUasyncCallback' in found_types}}
-
-cdef class CUasyncCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUasyncCallback>init_value
-        else:
-            self._ptr = <cydriver.CUasyncCallback *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUasyncCallback ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUhostFn' in found_types}}
-
-cdef class CUhostFn:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUhostFn>init_value
-        else:
-            self._ptr = <cydriver.CUhostFn *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUhostFn ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUstreamCallback' in found_types}}
-
-cdef class CUstreamCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUstreamCallback>init_value
-        else:
-            self._ptr = <cydriver.CUstreamCallback *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUstreamCallback ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUoccupancyB2DSize' in found_types}}
-
-cdef class CUoccupancyB2DSize:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cydriver.CUoccupancyB2DSize>init_value
-        else:
-            self._ptr = <cydriver.CUoccupancyB2DSize *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<CUoccupancyB2DSize ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'struct CUuuid_st' in found_types}}
-
-cdef class CUuuid_st:
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUuuid_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['bytes : ' + str(self.bytes.hex())]
-            except ValueError:
-                str_list += ['bytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def bytes(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].bytes, 16)
-
-{{endif}}
-{{if 'struct CUmemFabricHandle_st' in found_types}}
-
-cdef class CUmemFabricHandle_st:
-    """
-    Fabric handle - An opaque handle representing a memory allocation
-    that can be exported to processes in same or different nodes. For
-    IPC between processes on different nodes they must be connected via
-    the NVSwitch fabric.
-
-    Attributes
-    ----------
-    data : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemFabricHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['data : ' + str(self.data)]
-            except ValueError:
-                str_list += ['data : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def data(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].data, 64)
-    @data.setter
-    def data(self, data):
-        if len(data) != 64:
-            raise ValueError("data length must be 64, is " + str(len(data)))
-        for i, b in enumerate(data):
-            self._ptr[0].data[i] = b
-{{endif}}
-{{if 'struct CUipcEventHandle_st' in found_types}}
-
-cdef class CUipcEventHandle_st:
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUipcEventHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(reserved):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].reserved[i] = b
-        else:
-            for i, b in enumerate(reserved):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct CUipcMemHandle_st' in found_types}}
-
-cdef class CUipcMemHandle_st:
-    """
-    CUDA IPC mem handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUipcMemHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(reserved):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].reserved[i] = b
-        else:
-            for i, b in enumerate(reserved):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpWaitValueParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    address : CUdeviceptr
-
-    value : cuuint32_t
-
-    value64 : cuuint64_t
-
-    flags : unsigned int
-
-    alias : CUdeviceptr
-        For driver internal use. Initial value is unimportant.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUstreamBatchMemOpParams_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._address = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].waitValue.address)
-        self._value = cuuint32_t(_ptr=<void_ptr>&self._ptr[0].waitValue.value)
-        self._value64 = cuuint64_t(_ptr=<void_ptr>&self._ptr[0].waitValue.value64)
-        self._alias = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].waitValue.alias)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].waitValue
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['operation : ' + str(self.operation)]
-            except ValueError:
-                str_list += ['operation : <ValueError>']
-            try:
-                str_list += ['address : ' + str(self.address)]
-            except ValueError:
-                str_list += ['address : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['value64 : ' + str(self.value64)]
-            except ValueError:
-                str_list += ['value64 : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['alias : ' + str(self.alias)]
-            except ValueError:
-                str_list += ['alias : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def operation(self):
-        return CUstreamBatchMemOpType(self._ptr[0].waitValue.operation)
-    @operation.setter
-    def operation(self, operation not None : CUstreamBatchMemOpType):
-        self._ptr[0].waitValue.operation = operation.value
-    @property
-    def address(self):
-        return self._address
-    @address.setter
-    def address(self, address):
-        cdef cydriver.CUdeviceptr cyaddress
-        if address is None:
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(address, (CUdeviceptr)):
-            paddress = int(address)
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>paddress
-        else:
-            paddress = int(CUdeviceptr(address))
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>paddress
-        self._address._ptr[0] = cyaddress
-
-    @property
-    def value(self):
-        return self._value
-    @value.setter
-    def value(self, value):
-        cdef cydriver.cuuint32_t cyvalue
-        if value is None:
-            cyvalue = <cydriver.cuuint32_t><void_ptr>0
-        elif isinstance(value, (cuuint32_t)):
-            pvalue = int(value)
-            cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-        else:
-            pvalue = int(cuuint32_t(value))
-            cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-        self._value._ptr[0] = cyvalue
-
-    @property
-    def value64(self):
-        return self._value64
-    @value64.setter
-    def value64(self, value64):
-        cdef cydriver.cuuint64_t cyvalue64
-        if value64 is None:
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>0
-        elif isinstance(value64, (cuuint64_t)):
-            pvalue64 = int(value64)
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>pvalue64
-        else:
-            pvalue64 = int(cuuint64_t(value64))
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>pvalue64
-        self._value64._ptr[0] = cyvalue64
-
-    @property
-    def flags(self):
-        return self._ptr[0].waitValue.flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].waitValue.flags = flags
-    @property
-    def alias(self):
-        return self._alias
-    @alias.setter
-    def alias(self, alias):
-        cdef cydriver.CUdeviceptr cyalias
-        if alias is None:
-            cyalias = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(alias, (CUdeviceptr)):
-            palias = int(alias)
-            cyalias = <cydriver.CUdeviceptr><void_ptr>palias
-        else:
-            palias = int(CUdeviceptr(alias))
-            cyalias = <cydriver.CUdeviceptr><void_ptr>palias
-        self._alias._ptr[0] = cyalias
-
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpWriteValueParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    address : CUdeviceptr
-
-    value : cuuint32_t
-
-    value64 : cuuint64_t
-
-    flags : unsigned int
-
-    alias : CUdeviceptr
-        For driver internal use. Initial value is unimportant.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUstreamBatchMemOpParams_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._address = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].writeValue.address)
-        self._value = cuuint32_t(_ptr=<void_ptr>&self._ptr[0].writeValue.value)
-        self._value64 = cuuint64_t(_ptr=<void_ptr>&self._ptr[0].writeValue.value64)
-        self._alias = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].writeValue.alias)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].writeValue
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['operation : ' + str(self.operation)]
-            except ValueError:
-                str_list += ['operation : <ValueError>']
-            try:
-                str_list += ['address : ' + str(self.address)]
-            except ValueError:
-                str_list += ['address : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['value64 : ' + str(self.value64)]
-            except ValueError:
-                str_list += ['value64 : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['alias : ' + str(self.alias)]
-            except ValueError:
-                str_list += ['alias : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def operation(self):
-        return CUstreamBatchMemOpType(self._ptr[0].writeValue.operation)
-    @operation.setter
-    def operation(self, operation not None : CUstreamBatchMemOpType):
-        self._ptr[0].writeValue.operation = operation.value
-    @property
-    def address(self):
-        return self._address
-    @address.setter
-    def address(self, address):
-        cdef cydriver.CUdeviceptr cyaddress
-        if address is None:
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(address, (CUdeviceptr)):
-            paddress = int(address)
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>paddress
-        else:
-            paddress = int(CUdeviceptr(address))
-            cyaddress = <cydriver.CUdeviceptr><void_ptr>paddress
-        self._address._ptr[0] = cyaddress
-
-    @property
-    def value(self):
-        return self._value
-    @value.setter
-    def value(self, value):
-        cdef cydriver.cuuint32_t cyvalue
-        if value is None:
-            cyvalue = <cydriver.cuuint32_t><void_ptr>0
-        elif isinstance(value, (cuuint32_t)):
-            pvalue = int(value)
-            cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-        else:
-            pvalue = int(cuuint32_t(value))
-            cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-        self._value._ptr[0] = cyvalue
-
-    @property
-    def value64(self):
-        return self._value64
-    @value64.setter
-    def value64(self, value64):
-        cdef cydriver.cuuint64_t cyvalue64
-        if value64 is None:
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>0
-        elif isinstance(value64, (cuuint64_t)):
-            pvalue64 = int(value64)
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>pvalue64
-        else:
-            pvalue64 = int(cuuint64_t(value64))
-            cyvalue64 = <cydriver.cuuint64_t><void_ptr>pvalue64
-        self._value64._ptr[0] = cyvalue64
-
-    @property
-    def flags(self):
-        return self._ptr[0].writeValue.flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].writeValue.flags = flags
-    @property
-    def alias(self):
-        return self._alias
-    @alias.setter
-    def alias(self, alias):
-        cdef cydriver.CUdeviceptr cyalias
-        if alias is None:
-            cyalias = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(alias, (CUdeviceptr)):
-            palias = int(alias)
-            cyalias = <cydriver.CUdeviceptr><void_ptr>palias
-        else:
-            palias = int(CUdeviceptr(alias))
-            cyalias = <cydriver.CUdeviceptr><void_ptr>palias
-        self._alias._ptr[0] = cyalias
-
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpFlushRemoteWritesParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUstreamBatchMemOpParams_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].flushRemoteWrites
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['operation : ' + str(self.operation)]
-            except ValueError:
-                str_list += ['operation : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def operation(self):
-        return CUstreamBatchMemOpType(self._ptr[0].flushRemoteWrites.operation)
-    @operation.setter
-    def operation(self, operation not None : CUstreamBatchMemOpType):
-        self._ptr[0].flushRemoteWrites.operation = operation.value
-    @property
-    def flags(self):
-        return self._ptr[0].flushRemoteWrites.flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flushRemoteWrites.flags = flags
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamMemOpMemoryBarrierParams_st:
-    """
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-        < Only supported in the _v2 API
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUstreamBatchMemOpParams_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].memoryBarrier
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['operation : ' + str(self.operation)]
-            except ValueError:
-                str_list += ['operation : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def operation(self):
-        return CUstreamBatchMemOpType(self._ptr[0].memoryBarrier.operation)
-    @operation.setter
-    def operation(self, operation not None : CUstreamBatchMemOpType):
-        self._ptr[0].memoryBarrier.operation = operation.value
-    @property
-    def flags(self):
-        return self._ptr[0].memoryBarrier.flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].memoryBarrier.flags = flags
-{{endif}}
-{{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-
-cdef class CUstreamBatchMemOpParams_union:
-    """
-    Per-operation parameters for cuStreamBatchMemOp
-
-    Attributes
-    ----------
-    operation : CUstreamBatchMemOpType
-
-    waitValue : CUstreamMemOpWaitValueParams_st
-
-    writeValue : CUstreamMemOpWriteValueParams_st
-
-    flushRemoteWrites : CUstreamMemOpFlushRemoteWritesParams_st
-
-    memoryBarrier : CUstreamMemOpMemoryBarrierParams_st
-
-    pad : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUstreamBatchMemOpParams_union *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._waitValue = CUstreamMemOpWaitValueParams_st(_ptr=<void_ptr>self._ptr)
-        self._writeValue = CUstreamMemOpWriteValueParams_st(_ptr=<void_ptr>self._ptr)
-        self._flushRemoteWrites = CUstreamMemOpFlushRemoteWritesParams_st(_ptr=<void_ptr>self._ptr)
-        self._memoryBarrier = CUstreamMemOpMemoryBarrierParams_st(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['operation : ' + str(self.operation)]
-            except ValueError:
-                str_list += ['operation : <ValueError>']
-            try:
-                str_list += ['waitValue :\n' + '\n'.join(['    ' + line for line in str(self.waitValue).splitlines()])]
-            except ValueError:
-                str_list += ['waitValue : <ValueError>']
-            try:
-                str_list += ['writeValue :\n' + '\n'.join(['    ' + line for line in str(self.writeValue).splitlines()])]
-            except ValueError:
-                str_list += ['writeValue : <ValueError>']
-            try:
-                str_list += ['flushRemoteWrites :\n' + '\n'.join(['    ' + line for line in str(self.flushRemoteWrites).splitlines()])]
-            except ValueError:
-                str_list += ['flushRemoteWrites : <ValueError>']
-            try:
-                str_list += ['memoryBarrier :\n' + '\n'.join(['    ' + line for line in str(self.memoryBarrier).splitlines()])]
-            except ValueError:
-                str_list += ['memoryBarrier : <ValueError>']
-            try:
-                str_list += ['pad : ' + str(self.pad)]
-            except ValueError:
-                str_list += ['pad : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def operation(self):
-        return CUstreamBatchMemOpType(self._ptr[0].operation)
-    @operation.setter
-    def operation(self, operation not None : CUstreamBatchMemOpType):
-        self._ptr[0].operation = operation.value
-    @property
-    def waitValue(self):
-        return self._waitValue
-    @waitValue.setter
-    def waitValue(self, waitValue not None : CUstreamMemOpWaitValueParams_st):
-        string.memcpy(&self._ptr[0].waitValue, <cydriver.CUstreamMemOpWaitValueParams_st*><void_ptr>waitValue.getPtr(), sizeof(self._ptr[0].waitValue))
-    @property
-    def writeValue(self):
-        return self._writeValue
-    @writeValue.setter
-    def writeValue(self, writeValue not None : CUstreamMemOpWriteValueParams_st):
-        string.memcpy(&self._ptr[0].writeValue, <cydriver.CUstreamMemOpWriteValueParams_st*><void_ptr>writeValue.getPtr(), sizeof(self._ptr[0].writeValue))
-    @property
-    def flushRemoteWrites(self):
-        return self._flushRemoteWrites
-    @flushRemoteWrites.setter
-    def flushRemoteWrites(self, flushRemoteWrites not None : CUstreamMemOpFlushRemoteWritesParams_st):
-        string.memcpy(&self._ptr[0].flushRemoteWrites, <cydriver.CUstreamMemOpFlushRemoteWritesParams_st*><void_ptr>flushRemoteWrites.getPtr(), sizeof(self._ptr[0].flushRemoteWrites))
-    @property
-    def memoryBarrier(self):
-        return self._memoryBarrier
-    @memoryBarrier.setter
-    def memoryBarrier(self, memoryBarrier not None : CUstreamMemOpMemoryBarrierParams_st):
-        string.memcpy(&self._ptr[0].memoryBarrier, <cydriver.CUstreamMemOpMemoryBarrierParams_st*><void_ptr>memoryBarrier.getPtr(), sizeof(self._ptr[0].memoryBarrier))
-    @property
-    def pad(self):
-        return [cuuint64_t(init_value=_pad) for _pad in self._ptr[0].pad]
-    @pad.setter
-    def pad(self, pad):
-        self._ptr[0].pad = pad
-
-{{endif}}
-{{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st:
-    """
-    Attributes
-    ----------
-    ctx : CUcontext
-
-    count : unsigned int
-
-    paramArray : CUstreamBatchMemOpParams
-
-    flags : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        if self._paramArray is not NULL:
-            free(self._paramArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            try:
-                str_list += ['count : ' + str(self.count)]
-            except ValueError:
-                str_list += ['count : <ValueError>']
-            try:
-                str_list += ['paramArray : ' + str(self.paramArray)]
-            except ValueError:
-                str_list += ['paramArray : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-    @property
-    def count(self):
-        return self._ptr[0].count
-    @count.setter
-    def count(self, unsigned int count):
-        self._ptr[0].count = count
-    @property
-    def paramArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramArray + x*sizeof(cydriver.CUstreamBatchMemOpParams) for x in range(self._paramArray_length)]
-        return [CUstreamBatchMemOpParams(_ptr=arr) for arr in arrs]
-    @paramArray.setter
-    def paramArray(self, val):
-        if len(val) == 0:
-            free(self._paramArray)
-            self._paramArray_length = 0
-            self._ptr[0].paramArray = NULL
-        else:
-            if self._paramArray_length != <size_t>len(val):
-                free(self._paramArray)
-                self._paramArray = <cydriver.CUstreamBatchMemOpParams*> calloc(len(val), sizeof(cydriver.CUstreamBatchMemOpParams))
-                if self._paramArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams)))
-                self._paramArray_length = <size_t>len(val)
-                self._ptr[0].paramArray = self._paramArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramArray[idx], (<CUstreamBatchMemOpParams>val[idx])._ptr, sizeof(cydriver.CUstreamBatchMemOpParams))
-
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st:
-    """
-    Batch memory operation node parameters
-
-    Attributes
-    ----------
-    ctx : CUcontext
-        Context to use for the operations.
-    count : unsigned int
-        Number of operations in paramArray.
-    paramArray : CUstreamBatchMemOpParams
-        Array of batch memory operations.
-    flags : unsigned int
-        Flags to control the node.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        if self._paramArray is not NULL:
-            free(self._paramArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            try:
-                str_list += ['count : ' + str(self.count)]
-            except ValueError:
-                str_list += ['count : <ValueError>']
-            try:
-                str_list += ['paramArray : ' + str(self.paramArray)]
-            except ValueError:
-                str_list += ['paramArray : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-    @property
-    def count(self):
-        return self._ptr[0].count
-    @count.setter
-    def count(self, unsigned int count):
-        self._ptr[0].count = count
-    @property
-    def paramArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramArray + x*sizeof(cydriver.CUstreamBatchMemOpParams) for x in range(self._paramArray_length)]
-        return [CUstreamBatchMemOpParams(_ptr=arr) for arr in arrs]
-    @paramArray.setter
-    def paramArray(self, val):
-        if len(val) == 0:
-            free(self._paramArray)
-            self._paramArray_length = 0
-            self._ptr[0].paramArray = NULL
-        else:
-            if self._paramArray_length != <size_t>len(val):
-                free(self._paramArray)
-                self._paramArray = <cydriver.CUstreamBatchMemOpParams*> calloc(len(val), sizeof(cydriver.CUstreamBatchMemOpParams))
-                if self._paramArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams)))
-                self._paramArray_length = <size_t>len(val)
-                self._ptr[0].paramArray = self._paramArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramArray[idx], (<CUstreamBatchMemOpParams>val[idx])._ptr, sizeof(cydriver.CUstreamBatchMemOpParams))
-
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class anon_struct0:
-    """
-    Attributes
-    ----------
-    bytesOverBudget : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUasyncNotificationInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].info.overBudget
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['bytesOverBudget : ' + str(self.bytesOverBudget)]
-            except ValueError:
-                str_list += ['bytesOverBudget : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def bytesOverBudget(self):
-        return self._ptr[0].info.overBudget.bytesOverBudget
-    @bytesOverBudget.setter
-    def bytesOverBudget(self, unsigned long long bytesOverBudget):
-        self._ptr[0].info.overBudget.bytesOverBudget = bytesOverBudget
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class anon_union2:
-    """
-    Attributes
-    ----------
-    overBudget : anon_struct0
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUasyncNotificationInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._overBudget = anon_struct0(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].info
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['overBudget :\n' + '\n'.join(['    ' + line for line in str(self.overBudget).splitlines()])]
-            except ValueError:
-                str_list += ['overBudget : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def overBudget(self):
-        return self._overBudget
-    @overBudget.setter
-    def overBudget(self, overBudget not None : anon_struct0):
-        string.memcpy(&self._ptr[0].info.overBudget, <cydriver.anon_struct0*><void_ptr>overBudget.getPtr(), sizeof(self._ptr[0].info.overBudget))
-{{endif}}
-{{if 'struct CUasyncNotificationInfo_st' in found_types}}
-
-cdef class CUasyncNotificationInfo_st:
-    """
-    Information passed to the user via the async notification callback
-
-    Attributes
-    ----------
-    type : CUasyncNotificationType
-
-    info : anon_union2
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUasyncNotificationInfo_st *>calloc(1, sizeof(cydriver.CUasyncNotificationInfo_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUasyncNotificationInfo_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._info = anon_union2(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['info :\n' + '\n'.join(['    ' + line for line in str(self.info).splitlines()])]
-            except ValueError:
-                str_list += ['info : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUasyncNotificationType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUasyncNotificationType):
-        self._ptr[0].type = type.value
-    @property
-    def info(self):
-        return self._info
-    @info.setter
-    def info(self, info not None : anon_union2):
-        string.memcpy(&self._ptr[0].info, <cydriver.anon_union2*><void_ptr>info.getPtr(), sizeof(self._ptr[0].info))
-{{endif}}
-{{if 'struct CUdevprop_st' in found_types}}
-
-cdef class CUdevprop_st:
-    """
-    Legacy device properties
-
-    Attributes
-    ----------
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    sharedMemPerBlock : int
-        Shared memory available per block in bytes
-    totalConstantMemory : int
-        Constant memory available on device in bytes
-    SIMDWidth : int
-        Warp size in threads
-    memPitch : int
-        Maximum pitch in bytes allowed by memory copies
-    regsPerBlock : int
-        32-bit registers available per block
-    clockRate : int
-        Clock frequency in kilohertz
-    textureAlign : int
-        Alignment requirement for textures
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdevprop_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]
-            except ValueError:
-                str_list += ['maxThreadsPerBlock : <ValueError>']
-            try:
-                str_list += ['maxThreadsDim : ' + str(self.maxThreadsDim)]
-            except ValueError:
-                str_list += ['maxThreadsDim : <ValueError>']
-            try:
-                str_list += ['maxGridSize : ' + str(self.maxGridSize)]
-            except ValueError:
-                str_list += ['maxGridSize : <ValueError>']
-            try:
-                str_list += ['sharedMemPerBlock : ' + str(self.sharedMemPerBlock)]
-            except ValueError:
-                str_list += ['sharedMemPerBlock : <ValueError>']
-            try:
-                str_list += ['totalConstantMemory : ' + str(self.totalConstantMemory)]
-            except ValueError:
-                str_list += ['totalConstantMemory : <ValueError>']
-            try:
-                str_list += ['SIMDWidth : ' + str(self.SIMDWidth)]
-            except ValueError:
-                str_list += ['SIMDWidth : <ValueError>']
-            try:
-                str_list += ['memPitch : ' + str(self.memPitch)]
-            except ValueError:
-                str_list += ['memPitch : <ValueError>']
-            try:
-                str_list += ['regsPerBlock : ' + str(self.regsPerBlock)]
-            except ValueError:
-                str_list += ['regsPerBlock : <ValueError>']
-            try:
-                str_list += ['clockRate : ' + str(self.clockRate)]
-            except ValueError:
-                str_list += ['clockRate : <ValueError>']
-            try:
-                str_list += ['textureAlign : ' + str(self.textureAlign)]
-            except ValueError:
-                str_list += ['textureAlign : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def maxThreadsPerBlock(self):
-        return self._ptr[0].maxThreadsPerBlock
-    @maxThreadsPerBlock.setter
-    def maxThreadsPerBlock(self, int maxThreadsPerBlock):
-        self._ptr[0].maxThreadsPerBlock = maxThreadsPerBlock
-    @property
-    def maxThreadsDim(self):
-        return self._ptr[0].maxThreadsDim
-    @maxThreadsDim.setter
-    def maxThreadsDim(self, maxThreadsDim):
-        self._ptr[0].maxThreadsDim = maxThreadsDim
-    @property
-    def maxGridSize(self):
-        return self._ptr[0].maxGridSize
-    @maxGridSize.setter
-    def maxGridSize(self, maxGridSize):
-        self._ptr[0].maxGridSize = maxGridSize
-    @property
-    def sharedMemPerBlock(self):
-        return self._ptr[0].sharedMemPerBlock
-    @sharedMemPerBlock.setter
-    def sharedMemPerBlock(self, int sharedMemPerBlock):
-        self._ptr[0].sharedMemPerBlock = sharedMemPerBlock
-    @property
-    def totalConstantMemory(self):
-        return self._ptr[0].totalConstantMemory
-    @totalConstantMemory.setter
-    def totalConstantMemory(self, int totalConstantMemory):
-        self._ptr[0].totalConstantMemory = totalConstantMemory
-    @property
-    def SIMDWidth(self):
-        return self._ptr[0].SIMDWidth
-    @SIMDWidth.setter
-    def SIMDWidth(self, int SIMDWidth):
-        self._ptr[0].SIMDWidth = SIMDWidth
-    @property
-    def memPitch(self):
-        return self._ptr[0].memPitch
-    @memPitch.setter
-    def memPitch(self, int memPitch):
-        self._ptr[0].memPitch = memPitch
-    @property
-    def regsPerBlock(self):
-        return self._ptr[0].regsPerBlock
-    @regsPerBlock.setter
-    def regsPerBlock(self, int regsPerBlock):
-        self._ptr[0].regsPerBlock = regsPerBlock
-    @property
-    def clockRate(self):
-        return self._ptr[0].clockRate
-    @clockRate.setter
-    def clockRate(self, int clockRate):
-        self._ptr[0].clockRate = clockRate
-    @property
-    def textureAlign(self):
-        return self._ptr[0].textureAlign
-    @textureAlign.setter
-    def textureAlign(self, int textureAlign):
-        self._ptr[0].textureAlign = textureAlign
-{{endif}}
-{{if 'struct CUaccessPolicyWindow_st' in found_types}}
-
-cdef class CUaccessPolicyWindow_st:
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    num_bytes is limited by
-    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE. Partition into
-    many segments and assign segments such that: sum of "hit segments"
-    / window == approx. ratio. sum of "miss segments" / window ==
-    approx 1-ratio. Segments and ratio specifications are fitted to the
-    capabilities of the architecture. Accesses in a hit segment apply
-    the hitProp access policy. Accesses in a miss segment apply the
-    missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : CUaccessProperty
-        CUaccessProperty set for hit.
-    missProp : CUaccessProperty
-        CUaccessProperty set for miss. Must be either NORMAL or STREAMING
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUaccessPolicyWindow_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['base_ptr : ' + hex(self.base_ptr)]
-            except ValueError:
-                str_list += ['base_ptr : <ValueError>']
-            try:
-                str_list += ['num_bytes : ' + str(self.num_bytes)]
-            except ValueError:
-                str_list += ['num_bytes : <ValueError>']
-            try:
-                str_list += ['hitRatio : ' + str(self.hitRatio)]
-            except ValueError:
-                str_list += ['hitRatio : <ValueError>']
-            try:
-                str_list += ['hitProp : ' + str(self.hitProp)]
-            except ValueError:
-                str_list += ['hitProp : <ValueError>']
-            try:
-                str_list += ['missProp : ' + str(self.missProp)]
-            except ValueError:
-                str_list += ['missProp : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def base_ptr(self):
-        return <void_ptr>self._ptr[0].base_ptr
-    @base_ptr.setter
-    def base_ptr(self, base_ptr):
-        _cybase_ptr = utils.HelperInputVoidPtr(base_ptr)
-        self._ptr[0].base_ptr = <void*><void_ptr>_cybase_ptr.cptr
-    @property
-    def num_bytes(self):
-        return self._ptr[0].num_bytes
-    @num_bytes.setter
-    def num_bytes(self, size_t num_bytes):
-        self._ptr[0].num_bytes = num_bytes
-    @property
-    def hitRatio(self):
-        return self._ptr[0].hitRatio
-    @hitRatio.setter
-    def hitRatio(self, float hitRatio):
-        self._ptr[0].hitRatio = hitRatio
-    @property
-    def hitProp(self):
-        return CUaccessProperty(self._ptr[0].hitProp)
-    @hitProp.setter
-    def hitProp(self, hitProp not None : CUaccessProperty):
-        self._ptr[0].hitProp = hitProp.value
-    @property
-    def missProp(self):
-        return CUaccessProperty(self._ptr[0].missProp)
-    @missProp.setter
-    def missProp(self, missProp not None : CUaccessProperty):
-        self._ptr[0].missProp = missProp.value
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_KERNEL_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._func = CUfunction(_ptr=<void_ptr>&self._ptr[0].func)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['func : ' + str(self.func)]
-            except ValueError:
-                str_list += ['func : <ValueError>']
-            try:
-                str_list += ['gridDimX : ' + str(self.gridDimX)]
-            except ValueError:
-                str_list += ['gridDimX : <ValueError>']
-            try:
-                str_list += ['gridDimY : ' + str(self.gridDimY)]
-            except ValueError:
-                str_list += ['gridDimY : <ValueError>']
-            try:
-                str_list += ['gridDimZ : ' + str(self.gridDimZ)]
-            except ValueError:
-                str_list += ['gridDimZ : <ValueError>']
-            try:
-                str_list += ['blockDimX : ' + str(self.blockDimX)]
-            except ValueError:
-                str_list += ['blockDimX : <ValueError>']
-            try:
-                str_list += ['blockDimY : ' + str(self.blockDimY)]
-            except ValueError:
-                str_list += ['blockDimY : <ValueError>']
-            try:
-                str_list += ['blockDimZ : ' + str(self.blockDimZ)]
-            except ValueError:
-                str_list += ['blockDimZ : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            try:
-                str_list += ['extra : ' + str(self.extra)]
-            except ValueError:
-                str_list += ['extra : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def func(self):
-        return self._func
-    @func.setter
-    def func(self, func):
-        cdef cydriver.CUfunction cyfunc
-        if func is None:
-            cyfunc = <cydriver.CUfunction><void_ptr>0
-        elif isinstance(func, (CUfunction,)):
-            pfunc = int(func)
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        else:
-            pfunc = int(CUfunction(func))
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        self._func._ptr[0] = cyfunc
-    @property
-    def gridDimX(self):
-        return self._ptr[0].gridDimX
-    @gridDimX.setter
-    def gridDimX(self, unsigned int gridDimX):
-        self._ptr[0].gridDimX = gridDimX
-    @property
-    def gridDimY(self):
-        return self._ptr[0].gridDimY
-    @gridDimY.setter
-    def gridDimY(self, unsigned int gridDimY):
-        self._ptr[0].gridDimY = gridDimY
-    @property
-    def gridDimZ(self):
-        return self._ptr[0].gridDimZ
-    @gridDimZ.setter
-    def gridDimZ(self, unsigned int gridDimZ):
-        self._ptr[0].gridDimZ = gridDimZ
-    @property
-    def blockDimX(self):
-        return self._ptr[0].blockDimX
-    @blockDimX.setter
-    def blockDimX(self, unsigned int blockDimX):
-        self._ptr[0].blockDimX = blockDimX
-    @property
-    def blockDimY(self):
-        return self._ptr[0].blockDimY
-    @blockDimY.setter
-    def blockDimY(self, unsigned int blockDimY):
-        self._ptr[0].blockDimY = blockDimY
-    @property
-    def blockDimZ(self):
-        return self._ptr[0].blockDimZ
-    @blockDimZ.setter
-    def blockDimZ(self, unsigned int blockDimZ):
-        self._ptr[0].blockDimZ = blockDimZ
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-    @property
-    def extra(self):
-        return <void_ptr>self._ptr[0].extra
-    @extra.setter
-    def extra(self, void_ptr extra):
-        self._ptr[0].extra = <void**>extra
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v2_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_KERNEL_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._func = CUfunction(_ptr=<void_ptr>&self._ptr[0].func)
-        self._kern = CUkernel(_ptr=<void_ptr>&self._ptr[0].kern)
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['func : ' + str(self.func)]
-            except ValueError:
-                str_list += ['func : <ValueError>']
-            try:
-                str_list += ['gridDimX : ' + str(self.gridDimX)]
-            except ValueError:
-                str_list += ['gridDimX : <ValueError>']
-            try:
-                str_list += ['gridDimY : ' + str(self.gridDimY)]
-            except ValueError:
-                str_list += ['gridDimY : <ValueError>']
-            try:
-                str_list += ['gridDimZ : ' + str(self.gridDimZ)]
-            except ValueError:
-                str_list += ['gridDimZ : <ValueError>']
-            try:
-                str_list += ['blockDimX : ' + str(self.blockDimX)]
-            except ValueError:
-                str_list += ['blockDimX : <ValueError>']
-            try:
-                str_list += ['blockDimY : ' + str(self.blockDimY)]
-            except ValueError:
-                str_list += ['blockDimY : <ValueError>']
-            try:
-                str_list += ['blockDimZ : ' + str(self.blockDimZ)]
-            except ValueError:
-                str_list += ['blockDimZ : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            try:
-                str_list += ['extra : ' + str(self.extra)]
-            except ValueError:
-                str_list += ['extra : <ValueError>']
-            try:
-                str_list += ['kern : ' + str(self.kern)]
-            except ValueError:
-                str_list += ['kern : <ValueError>']
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def func(self):
-        return self._func
-    @func.setter
-    def func(self, func):
-        cdef cydriver.CUfunction cyfunc
-        if func is None:
-            cyfunc = <cydriver.CUfunction><void_ptr>0
-        elif isinstance(func, (CUfunction,)):
-            pfunc = int(func)
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        else:
-            pfunc = int(CUfunction(func))
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        self._func._ptr[0] = cyfunc
-    @property
-    def gridDimX(self):
-        return self._ptr[0].gridDimX
-    @gridDimX.setter
-    def gridDimX(self, unsigned int gridDimX):
-        self._ptr[0].gridDimX = gridDimX
-    @property
-    def gridDimY(self):
-        return self._ptr[0].gridDimY
-    @gridDimY.setter
-    def gridDimY(self, unsigned int gridDimY):
-        self._ptr[0].gridDimY = gridDimY
-    @property
-    def gridDimZ(self):
-        return self._ptr[0].gridDimZ
-    @gridDimZ.setter
-    def gridDimZ(self, unsigned int gridDimZ):
-        self._ptr[0].gridDimZ = gridDimZ
-    @property
-    def blockDimX(self):
-        return self._ptr[0].blockDimX
-    @blockDimX.setter
-    def blockDimX(self, unsigned int blockDimX):
-        self._ptr[0].blockDimX = blockDimX
-    @property
-    def blockDimY(self):
-        return self._ptr[0].blockDimY
-    @blockDimY.setter
-    def blockDimY(self, unsigned int blockDimY):
-        self._ptr[0].blockDimY = blockDimY
-    @property
-    def blockDimZ(self):
-        return self._ptr[0].blockDimZ
-    @blockDimZ.setter
-    def blockDimZ(self, unsigned int blockDimZ):
-        self._ptr[0].blockDimZ = blockDimZ
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-    @property
-    def extra(self):
-        return <void_ptr>self._ptr[0].extra
-    @extra.setter
-    def extra(self, void_ptr extra):
-        self._ptr[0].extra = <void**>extra
-    @property
-    def kern(self):
-        return self._kern
-    @kern.setter
-    def kern(self, kern):
-        cdef cydriver.CUkernel cykern
-        if kern is None:
-            cykern = <cydriver.CUkernel><void_ptr>0
-        elif isinstance(kern, (CUkernel,)):
-            pkern = int(kern)
-            cykern = <cydriver.CUkernel><void_ptr>pkern
-        else:
-            pkern = int(CUkernel(kern))
-            cykern = <cydriver.CUkernel><void_ptr>pkern
-        self._kern._ptr[0] = cykern
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-{{endif}}
-{{if 'struct CUDA_KERNEL_NODE_PARAMS_v3_st' in found_types}}
-
-cdef class CUDA_KERNEL_NODE_PARAMS_v3_st:
-    """
-    GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : Any
-        Extra options
-    kern : CUkernel
-        Kernel to launch, will only be referenced if func is NULL
-    ctx : CUcontext
-        Context for the kernel task to run in. The value NULL will indicate
-        the current context should be used by the api. This field is
-        ignored if func is set.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_KERNEL_NODE_PARAMS_v3_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._func = CUfunction(_ptr=<void_ptr>&self._ptr[0].func)
-        self._kern = CUkernel(_ptr=<void_ptr>&self._ptr[0].kern)
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['func : ' + str(self.func)]
-            except ValueError:
-                str_list += ['func : <ValueError>']
-            try:
-                str_list += ['gridDimX : ' + str(self.gridDimX)]
-            except ValueError:
-                str_list += ['gridDimX : <ValueError>']
-            try:
-                str_list += ['gridDimY : ' + str(self.gridDimY)]
-            except ValueError:
-                str_list += ['gridDimY : <ValueError>']
-            try:
-                str_list += ['gridDimZ : ' + str(self.gridDimZ)]
-            except ValueError:
-                str_list += ['gridDimZ : <ValueError>']
-            try:
-                str_list += ['blockDimX : ' + str(self.blockDimX)]
-            except ValueError:
-                str_list += ['blockDimX : <ValueError>']
-            try:
-                str_list += ['blockDimY : ' + str(self.blockDimY)]
-            except ValueError:
-                str_list += ['blockDimY : <ValueError>']
-            try:
-                str_list += ['blockDimZ : ' + str(self.blockDimZ)]
-            except ValueError:
-                str_list += ['blockDimZ : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            try:
-                str_list += ['extra : ' + str(self.extra)]
-            except ValueError:
-                str_list += ['extra : <ValueError>']
-            try:
-                str_list += ['kern : ' + str(self.kern)]
-            except ValueError:
-                str_list += ['kern : <ValueError>']
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def func(self):
-        return self._func
-    @func.setter
-    def func(self, func):
-        cdef cydriver.CUfunction cyfunc
-        if func is None:
-            cyfunc = <cydriver.CUfunction><void_ptr>0
-        elif isinstance(func, (CUfunction,)):
-            pfunc = int(func)
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        else:
-            pfunc = int(CUfunction(func))
-            cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-        self._func._ptr[0] = cyfunc
-    @property
-    def gridDimX(self):
-        return self._ptr[0].gridDimX
-    @gridDimX.setter
-    def gridDimX(self, unsigned int gridDimX):
-        self._ptr[0].gridDimX = gridDimX
-    @property
-    def gridDimY(self):
-        return self._ptr[0].gridDimY
-    @gridDimY.setter
-    def gridDimY(self, unsigned int gridDimY):
-        self._ptr[0].gridDimY = gridDimY
-    @property
-    def gridDimZ(self):
-        return self._ptr[0].gridDimZ
-    @gridDimZ.setter
-    def gridDimZ(self, unsigned int gridDimZ):
-        self._ptr[0].gridDimZ = gridDimZ
-    @property
-    def blockDimX(self):
-        return self._ptr[0].blockDimX
-    @blockDimX.setter
-    def blockDimX(self, unsigned int blockDimX):
-        self._ptr[0].blockDimX = blockDimX
-    @property
-    def blockDimY(self):
-        return self._ptr[0].blockDimY
-    @blockDimY.setter
-    def blockDimY(self, unsigned int blockDimY):
-        self._ptr[0].blockDimY = blockDimY
-    @property
-    def blockDimZ(self):
-        return self._ptr[0].blockDimZ
-    @blockDimZ.setter
-    def blockDimZ(self, unsigned int blockDimZ):
-        self._ptr[0].blockDimZ = blockDimZ
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-    @property
-    def extra(self):
-        return <void_ptr>self._ptr[0].extra
-    @extra.setter
-    def extra(self, void_ptr extra):
-        self._ptr[0].extra = <void**>extra
-    @property
-    def kern(self):
-        return self._kern
-    @kern.setter
-    def kern(self, kern):
-        cdef cydriver.CUkernel cykern
-        if kern is None:
-            cykern = <cydriver.CUkernel><void_ptr>0
-        elif isinstance(kern, (CUkernel,)):
-            pkern = int(kern)
-            cykern = <cydriver.CUkernel><void_ptr>pkern
-        else:
-            pkern = int(CUkernel(kern))
-            cykern = <cydriver.CUkernel><void_ptr>pkern
-        self._kern._ptr[0] = cykern
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-{{endif}}
-{{if 'struct CUDA_MEMSET_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_st:
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMSET_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._dst = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dst)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dst : ' + str(self.dst)]
-            except ValueError:
-                str_list += ['dst : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['elementSize : ' + str(self.elementSize)]
-            except ValueError:
-                str_list += ['elementSize : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dst(self):
-        return self._dst
-    @dst.setter
-    def dst(self, dst):
-        cdef cydriver.CUdeviceptr cydst
-        if dst is None:
-            cydst = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dst, (CUdeviceptr)):
-            pdst = int(dst)
-            cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-        else:
-            pdst = int(CUdeviceptr(dst))
-            cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-        self._dst._ptr[0] = cydst
-
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, size_t pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def value(self):
-        return self._ptr[0].value
-    @value.setter
-    def value(self, unsigned int value):
-        self._ptr[0].value = value
-    @property
-    def elementSize(self):
-        return self._ptr[0].elementSize
-    @elementSize.setter
-    def elementSize(self, unsigned int elementSize):
-        self._ptr[0].elementSize = elementSize
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-{{endif}}
-{{if 'struct CUDA_MEMSET_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_MEMSET_NODE_PARAMS_v2_st:
-    """
-    Memset node parameters
-
-    Attributes
-    ----------
-    dst : CUdeviceptr
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-    ctx : CUcontext
-        Context on which to run the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMSET_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._dst = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dst)
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dst : ' + str(self.dst)]
-            except ValueError:
-                str_list += ['dst : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['elementSize : ' + str(self.elementSize)]
-            except ValueError:
-                str_list += ['elementSize : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dst(self):
-        return self._dst
-    @dst.setter
-    def dst(self, dst):
-        cdef cydriver.CUdeviceptr cydst
-        if dst is None:
-            cydst = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dst, (CUdeviceptr)):
-            pdst = int(dst)
-            cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-        else:
-            pdst = int(CUdeviceptr(dst))
-            cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-        self._dst._ptr[0] = cydst
-
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, size_t pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def value(self):
-        return self._ptr[0].value
-    @value.setter
-    def value(self, unsigned int value):
-        self._ptr[0].value = value
-    @property
-    def elementSize(self):
-        return self._ptr[0].elementSize
-    @elementSize.setter
-    def elementSize(self, unsigned int elementSize):
-        self._ptr[0].elementSize = elementSize
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-{{endif}}
-{{if 'struct CUDA_HOST_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_st:
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_HOST_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._fn = CUhostFn(_ptr=<void_ptr>&self._ptr[0].fn)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fn : ' + str(self.fn)]
-            except ValueError:
-                str_list += ['fn : <ValueError>']
-            try:
-                str_list += ['userData : ' + hex(self.userData)]
-            except ValueError:
-                str_list += ['userData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fn(self):
-        return self._fn
-    @fn.setter
-    def fn(self, fn):
-        cdef cydriver.CUhostFn cyfn
-        if fn is None:
-            cyfn = <cydriver.CUhostFn><void_ptr>0
-        elif isinstance(fn, (CUhostFn)):
-            pfn = int(fn)
-            cyfn = <cydriver.CUhostFn><void_ptr>pfn
-        else:
-            pfn = int(CUhostFn(fn))
-            cyfn = <cydriver.CUhostFn><void_ptr>pfn
-        self._fn._ptr[0] = cyfn
-    @property
-    def userData(self):
-        return <void_ptr>self._ptr[0].userData
-    @userData.setter
-    def userData(self, userData):
-        _cyuserData = utils.HelperInputVoidPtr(userData)
-        self._ptr[0].userData = <void*><void_ptr>_cyuserData.cptr
-{{endif}}
-{{if 'struct CUDA_HOST_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_HOST_NODE_PARAMS_v2_st:
-    """
-    Host node parameters
-
-    Attributes
-    ----------
-    fn : CUhostFn
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_HOST_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._fn = CUhostFn(_ptr=<void_ptr>&self._ptr[0].fn)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fn : ' + str(self.fn)]
-            except ValueError:
-                str_list += ['fn : <ValueError>']
-            try:
-                str_list += ['userData : ' + hex(self.userData)]
-            except ValueError:
-                str_list += ['userData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fn(self):
-        return self._fn
-    @fn.setter
-    def fn(self, fn):
-        cdef cydriver.CUhostFn cyfn
-        if fn is None:
-            cyfn = <cydriver.CUhostFn><void_ptr>0
-        elif isinstance(fn, (CUhostFn)):
-            pfn = int(fn)
-            cyfn = <cydriver.CUhostFn><void_ptr>pfn
-        else:
-            pfn = int(CUhostFn(fn))
-            cyfn = <cydriver.CUhostFn><void_ptr>pfn
-        self._fn._ptr[0] = cyfn
-    @property
-    def userData(self):
-        return <void_ptr>self._ptr[0].userData
-    @userData.setter
-    def userData(self, userData):
-        _cyuserData = utils.HelperInputVoidPtr(userData)
-        self._ptr[0].userData = <void*><void_ptr>_cyuserData.cptr
-{{endif}}
-{{if 'struct CUDA_CONDITIONAL_NODE_PARAMS' in found_types}}
-
-cdef class CUDA_CONDITIONAL_NODE_PARAMS:
-    """
-    Conditional node parameters
-
-    Attributes
-    ----------
-    handle : CUgraphConditionalHandle
-        Conditional node handle. Handles must be created in advance of
-        creating the node using cuGraphConditionalHandleCreate.
-    type : CUgraphConditionalNodeType
-        Type of conditional node.
-    size : unsigned int
-        Size of graph output array. Must be 1.
-    phGraph_out : CUgraph
-        CUDA-owned array populated with conditional node child graphs
-        during creation of the node. Valid for the lifetime of the
-        conditional node. The contents of the graph(s) are subject to the
-        following constraints:   - Allowed node types are kernel nodes,
-        empty nodes, child graphs, memsets, memcopies, and conditionals.
-        This applies recursively to child graphs and conditional bodies.
-        - All kernels, including kernels in nested conditionals or child
-        graphs at any level, must belong to the same CUDA context.
-        These graphs may be populated using graph node creation APIs or
-        cuStreamBeginCaptureToGraph.
-    ctx : CUcontext
-        Context on which to run the node. Must match context used to create
-        the handle and all body nodes.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_CONDITIONAL_NODE_PARAMS *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = CUgraphConditionalHandle(_ptr=<void_ptr>&self._ptr[0].handle)
-        self._ctx = CUcontext(_ptr=<void_ptr>&self._ptr[0].ctx)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + str(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['phGraph_out : ' + str(self.phGraph_out)]
-            except ValueError:
-                str_list += ['phGraph_out : <ValueError>']
-            try:
-                str_list += ['ctx : ' + str(self.ctx)]
-            except ValueError:
-                str_list += ['ctx : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle):
-        cdef cydriver.CUgraphConditionalHandle cyhandle
-        if handle is None:
-            cyhandle = <cydriver.CUgraphConditionalHandle><void_ptr>0
-        elif isinstance(handle, (CUgraphConditionalHandle)):
-            phandle = int(handle)
-            cyhandle = <cydriver.CUgraphConditionalHandle><void_ptr>phandle
-        else:
-            phandle = int(CUgraphConditionalHandle(handle))
-            cyhandle = <cydriver.CUgraphConditionalHandle><void_ptr>phandle
-        self._handle._ptr[0] = cyhandle
-
-    @property
-    def type(self):
-        return CUgraphConditionalNodeType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUgraphConditionalNodeType):
-        self._ptr[0].type = type.value
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned int size):
-        self._ptr[0].size = size
-    @property
-    def phGraph_out(self):
-        arrs = [<void_ptr>self._ptr[0].phGraph_out + x*sizeof(cydriver.CUgraph) for x in range(self.size)]
-        return [CUgraph(_ptr=arr) for arr in arrs]
-    @property
-    def ctx(self):
-        return self._ctx
-    @ctx.setter
-    def ctx(self, ctx):
-        cdef cydriver.CUcontext cyctx
-        if ctx is None:
-            cyctx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(ctx, (CUcontext,)):
-            pctx = int(ctx)
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        else:
-            pctx = int(CUcontext(ctx))
-            cyctx = <cydriver.CUcontext><void_ptr>pctx
-        self._ctx._ptr[0] = cyctx
-{{endif}}
-{{if 'struct CUgraphEdgeData_st' in found_types}}
-
-cdef class CUgraphEdgeData_st:
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        CU_GRAPH_KERNEL_NODE_PORT_DEFAULT,
-        CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC, or
-        CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from CUgraphDependencyType.
-        (It is typed as char due to compiler-specific layout of bitfields.)
-        See CUgraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUgraphEdgeData_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['from_port : ' + str(self.from_port)]
-            except ValueError:
-                str_list += ['from_port : <ValueError>']
-            try:
-                str_list += ['to_port : ' + str(self.to_port)]
-            except ValueError:
-                str_list += ['to_port : <ValueError>']
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def from_port(self):
-        return self._ptr[0].from_port
-    @from_port.setter
-    def from_port(self, unsigned char from_port):
-        self._ptr[0].from_port = from_port
-    @property
-    def to_port(self):
-        return self._ptr[0].to_port
-    @to_port.setter
-    def to_port(self, unsigned char to_port):
-        self._ptr[0].to_port = to_port
-    @property
-    def type(self):
-        return self._ptr[0].type
-    @type.setter
-    def type(self, unsigned char type):
-        self._ptr[0].type = type
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 5)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 5:
-            raise ValueError("reserved length must be 5, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct CUDA_GRAPH_INSTANTIATE_PARAMS_st' in found_types}}
-
-cdef class CUDA_GRAPH_INSTANTIATE_PARAMS_st:
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : cuuint64_t
-        Instantiation flags
-    hUploadStream : CUstream
-        Upload stream
-    hErrNode_out : CUgraphNode
-        The node which caused instantiation to fail, if any
-    result_out : CUgraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._flags = cuuint64_t(_ptr=<void_ptr>&self._ptr[0].flags)
-        self._hUploadStream = CUstream(_ptr=<void_ptr>&self._ptr[0].hUploadStream)
-        self._hErrNode_out = CUgraphNode(_ptr=<void_ptr>&self._ptr[0].hErrNode_out)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['hUploadStream : ' + str(self.hUploadStream)]
-            except ValueError:
-                str_list += ['hUploadStream : <ValueError>']
-            try:
-                str_list += ['hErrNode_out : ' + str(self.hErrNode_out)]
-            except ValueError:
-                str_list += ['hErrNode_out : <ValueError>']
-            try:
-                str_list += ['result_out : ' + str(self.result_out)]
-            except ValueError:
-                str_list += ['result_out : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def flags(self):
-        return self._flags
-    @flags.setter
-    def flags(self, flags):
-        cdef cydriver.cuuint64_t cyflags
-        if flags is None:
-            cyflags = <cydriver.cuuint64_t><void_ptr>0
-        elif isinstance(flags, (cuuint64_t)):
-            pflags = int(flags)
-            cyflags = <cydriver.cuuint64_t><void_ptr>pflags
-        else:
-            pflags = int(cuuint64_t(flags))
-            cyflags = <cydriver.cuuint64_t><void_ptr>pflags
-        self._flags._ptr[0] = cyflags
-
-    @property
-    def hUploadStream(self):
-        return self._hUploadStream
-    @hUploadStream.setter
-    def hUploadStream(self, hUploadStream):
-        cdef cydriver.CUstream cyhUploadStream
-        if hUploadStream is None:
-            cyhUploadStream = <cydriver.CUstream><void_ptr>0
-        elif isinstance(hUploadStream, (CUstream,)):
-            phUploadStream = int(hUploadStream)
-            cyhUploadStream = <cydriver.CUstream><void_ptr>phUploadStream
-        else:
-            phUploadStream = int(CUstream(hUploadStream))
-            cyhUploadStream = <cydriver.CUstream><void_ptr>phUploadStream
-        self._hUploadStream._ptr[0] = cyhUploadStream
-    @property
-    def hErrNode_out(self):
-        return self._hErrNode_out
-    @hErrNode_out.setter
-    def hErrNode_out(self, hErrNode_out):
-        cdef cydriver.CUgraphNode cyhErrNode_out
-        if hErrNode_out is None:
-            cyhErrNode_out = <cydriver.CUgraphNode><void_ptr>0
-        elif isinstance(hErrNode_out, (CUgraphNode,)):
-            phErrNode_out = int(hErrNode_out)
-            cyhErrNode_out = <cydriver.CUgraphNode><void_ptr>phErrNode_out
-        else:
-            phErrNode_out = int(CUgraphNode(hErrNode_out))
-            cyhErrNode_out = <cydriver.CUgraphNode><void_ptr>phErrNode_out
-        self._hErrNode_out._ptr[0] = cyhErrNode_out
-    @property
-    def result_out(self):
-        return CUgraphInstantiateResult(self._ptr[0].result_out)
-    @result_out.setter
-    def result_out(self, result_out not None : CUgraphInstantiateResult):
-        self._ptr[0].result_out = result_out.value
-{{endif}}
-{{if 'struct CUlaunchMemSyncDomainMap_st' in found_types}}
-
-cdef class CUlaunchMemSyncDomainMap_st:
-    """
-    Memory Synchronization Domain map  See ::cudaLaunchMemSyncDomain.
-    By default, kernels are launched in domain 0. Kernel launched with
-    CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a different domain ID.
-    User may also alter the domain ID with CUlaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.  Domain ID range is
-    available through CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUlaunchMemSyncDomainMap_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['default_ : ' + str(self.default_)]
-            except ValueError:
-                str_list += ['default_ : <ValueError>']
-            try:
-                str_list += ['remote : ' + str(self.remote)]
-            except ValueError:
-                str_list += ['remote : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def default_(self):
-        return self._ptr[0].default_
-    @default_.setter
-    def default_(self, unsigned char default_):
-        self._ptr[0].default_ = default_
-    @property
-    def remote(self):
-        return self._ptr[0].remote
-    @remote.setter
-    def remote(self, unsigned char remote):
-        self._ptr[0].remote = remote
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct1:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUlaunchAttributeValue_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].clusterDim
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['x : ' + str(self.x)]
-            except ValueError:
-                str_list += ['x : <ValueError>']
-            try:
-                str_list += ['y : ' + str(self.y)]
-            except ValueError:
-                str_list += ['y : <ValueError>']
-            try:
-                str_list += ['z : ' + str(self.z)]
-            except ValueError:
-                str_list += ['z : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def x(self):
-        return self._ptr[0].clusterDim.x
-    @x.setter
-    def x(self, unsigned int x):
-        self._ptr[0].clusterDim.x = x
-    @property
-    def y(self):
-        return self._ptr[0].clusterDim.y
-    @y.setter
-    def y(self, unsigned int y):
-        self._ptr[0].clusterDim.y = y
-    @property
-    def z(self):
-        return self._ptr[0].clusterDim.z
-    @z.setter
-    def z(self, unsigned int z):
-        self._ptr[0].clusterDim.z = z
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct2:
-    """
-    Attributes
-    ----------
-    event : CUevent
-
-    flags : int
-
-    triggerAtBlockStart : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUlaunchAttributeValue_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._event = CUevent(_ptr=<void_ptr>&self._ptr[0].programmaticEvent.event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].programmaticEvent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['triggerAtBlockStart : ' + str(self.triggerAtBlockStart)]
-            except ValueError:
-                str_list += ['triggerAtBlockStart : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cydriver.CUevent cyevent
-        if event is None:
-            cyevent = <cydriver.CUevent><void_ptr>0
-        elif isinstance(event, (CUevent,)):
-            pevent = int(event)
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        else:
-            pevent = int(CUevent(event))
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-    @property
-    def flags(self):
-        return self._ptr[0].programmaticEvent.flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].programmaticEvent.flags = flags
-    @property
-    def triggerAtBlockStart(self):
-        return self._ptr[0].programmaticEvent.triggerAtBlockStart
-    @triggerAtBlockStart.setter
-    def triggerAtBlockStart(self, int triggerAtBlockStart):
-        self._ptr[0].programmaticEvent.triggerAtBlockStart = triggerAtBlockStart
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct3:
-    """
-    Attributes
-    ----------
-    event : CUevent
-
-    flags : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUlaunchAttributeValue_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._event = CUevent(_ptr=<void_ptr>&self._ptr[0].launchCompletionEvent.event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].launchCompletionEvent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cydriver.CUevent cyevent
-        if event is None:
-            cyevent = <cydriver.CUevent><void_ptr>0
-        elif isinstance(event, (CUevent,)):
-            pevent = int(event)
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        else:
-            pevent = int(CUevent(event))
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-    @property
-    def flags(self):
-        return self._ptr[0].launchCompletionEvent.flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].launchCompletionEvent.flags = flags
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class anon_struct4:
-    """
-    Attributes
-    ----------
-    deviceUpdatable : int
-
-    devNode : CUgraphDeviceNode
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUlaunchAttributeValue_union *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._devNode = CUgraphDeviceNode(_ptr=<void_ptr>&self._ptr[0].deviceUpdatableKernelNode.devNode)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].deviceUpdatableKernelNode
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['deviceUpdatable : ' + str(self.deviceUpdatable)]
-            except ValueError:
-                str_list += ['deviceUpdatable : <ValueError>']
-            try:
-                str_list += ['devNode : ' + str(self.devNode)]
-            except ValueError:
-                str_list += ['devNode : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def deviceUpdatable(self):
-        return self._ptr[0].deviceUpdatableKernelNode.deviceUpdatable
-    @deviceUpdatable.setter
-    def deviceUpdatable(self, int deviceUpdatable):
-        self._ptr[0].deviceUpdatableKernelNode.deviceUpdatable = deviceUpdatable
-    @property
-    def devNode(self):
-        return self._devNode
-    @devNode.setter
-    def devNode(self, devNode):
-        cdef cydriver.CUgraphDeviceNode cydevNode
-        if devNode is None:
-            cydevNode = <cydriver.CUgraphDeviceNode><void_ptr>0
-        elif isinstance(devNode, (CUgraphDeviceNode,)):
-            pdevNode = int(devNode)
-            cydevNode = <cydriver.CUgraphDeviceNode><void_ptr>pdevNode
-        else:
-            pdevNode = int(CUgraphDeviceNode(devNode))
-            cydevNode = <cydriver.CUgraphDeviceNode><void_ptr>pdevNode
-        self._devNode._ptr[0] = cydevNode
-{{endif}}
-{{if 'union CUlaunchAttributeValue_union' in found_types}}
-
-cdef class CUlaunchAttributeValue_union:
-    """
-    Launch attributes union; used as value field of CUlaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : CUaccessPolicyWindow
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW.
-    cooperative : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_COOPERATIVE. Nonzero
-        indicates a cooperative kernel (see cuLaunchCooperativeKernel).
-    syncPolicy : CUsynchronizationPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY.
-        ::CUsynchronizationPolicy for work queued up in this stream
-    clusterDim : anon_struct1
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-        that represents the desired cluster dimensions for the kernel.
-        Opaque type with the following fields: - `x` - The X dimension of
-        the cluster, in blocks. Must be a divisor of the grid X dimension.
-        - `y` - The Y dimension of the cluster, in blocks. Must be a
-        divisor of the grid Y dimension.    - `z` - The Z dimension of the
-        cluster, in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : CUclusterSchedulingPolicy
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION.
-    programmaticEvent : anon_struct2
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-        with the following fields: - `CUevent` event - Event to fire when
-        all blocks trigger it.    - `Event` record flags, see
-        cuEventRecordWithFlags. Does not accept :CU_EVENT_RECORD_EXTERNAL.
-        - `triggerAtBlockStart` - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    launchCompletionEvent : anon_struct3
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT with the following
-        fields: - `CUevent` event - Event to fire when the last block
-        launches    - `int` flags; - Event record flags, see
-        cuEventRecordWithFlags. Does not accept CU_EVENT_RECORD_EXTERNAL.
-    priority : int
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_PRIORITY. Execution
-        priority of the kernel.
-    memSyncDomainMap : CUlaunchMemSyncDomainMap
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP.
-        See CUlaunchMemSyncDomainMap.
-    memSyncDomain : CUlaunchMemSyncDomain
-        Value of launch attribute CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN.
-        See::CUlaunchMemSyncDomain
-    deviceUpdatableKernelNode : anon_struct4
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE. with the
-        following fields: - `int` deviceUpdatable - Whether or not the
-        resulting kernel node should be device-updatable.    -
-        `CUgraphDeviceNode` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUlaunchAttributeValue_union *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._accessPolicyWindow = CUaccessPolicyWindow(_ptr=<void_ptr>&self._ptr[0].accessPolicyWindow)
-        self._clusterDim = anon_struct1(_ptr=<void_ptr>self._ptr)
-        self._programmaticEvent = anon_struct2(_ptr=<void_ptr>self._ptr)
-        self._launchCompletionEvent = anon_struct3(_ptr=<void_ptr>self._ptr)
-        self._memSyncDomainMap = CUlaunchMemSyncDomainMap(_ptr=<void_ptr>&self._ptr[0].memSyncDomainMap)
-        self._deviceUpdatableKernelNode = anon_struct4(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['pad : ' + str(self.pad)]
-            except ValueError:
-                str_list += ['pad : <ValueError>']
-            try:
-                str_list += ['accessPolicyWindow :\n' + '\n'.join(['    ' + line for line in str(self.accessPolicyWindow).splitlines()])]
-            except ValueError:
-                str_list += ['accessPolicyWindow : <ValueError>']
-            try:
-                str_list += ['cooperative : ' + str(self.cooperative)]
-            except ValueError:
-                str_list += ['cooperative : <ValueError>']
-            try:
-                str_list += ['syncPolicy : ' + str(self.syncPolicy)]
-            except ValueError:
-                str_list += ['syncPolicy : <ValueError>']
-            try:
-                str_list += ['clusterDim :\n' + '\n'.join(['    ' + line for line in str(self.clusterDim).splitlines()])]
-            except ValueError:
-                str_list += ['clusterDim : <ValueError>']
-            try:
-                str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]
-            except ValueError:
-                str_list += ['clusterSchedulingPolicyPreference : <ValueError>']
-            try:
-                str_list += ['programmaticStreamSerializationAllowed : ' + str(self.programmaticStreamSerializationAllowed)]
-            except ValueError:
-                str_list += ['programmaticStreamSerializationAllowed : <ValueError>']
-            try:
-                str_list += ['programmaticEvent :\n' + '\n'.join(['    ' + line for line in str(self.programmaticEvent).splitlines()])]
-            except ValueError:
-                str_list += ['programmaticEvent : <ValueError>']
-            try:
-                str_list += ['launchCompletionEvent :\n' + '\n'.join(['    ' + line for line in str(self.launchCompletionEvent).splitlines()])]
-            except ValueError:
-                str_list += ['launchCompletionEvent : <ValueError>']
-            try:
-                str_list += ['priority : ' + str(self.priority)]
-            except ValueError:
-                str_list += ['priority : <ValueError>']
-            try:
-                str_list += ['memSyncDomainMap :\n' + '\n'.join(['    ' + line for line in str(self.memSyncDomainMap).splitlines()])]
-            except ValueError:
-                str_list += ['memSyncDomainMap : <ValueError>']
-            try:
-                str_list += ['memSyncDomain : ' + str(self.memSyncDomain)]
-            except ValueError:
-                str_list += ['memSyncDomain : <ValueError>']
-            try:
-                str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join(['    ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])]
-            except ValueError:
-                str_list += ['deviceUpdatableKernelNode : <ValueError>']
-            try:
-                str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)]
-            except ValueError:
-                str_list += ['sharedMemCarveout : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def pad(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].pad, 64)
-    @pad.setter
-    def pad(self, pad):
-        if len(pad) != 64:
-            raise ValueError("pad length must be 64, is " + str(len(pad)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(pad):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].pad[i] = b
-        else:
-            for i, b in enumerate(pad):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].pad[i] = b
-    @property
-    def accessPolicyWindow(self):
-        return self._accessPolicyWindow
-    @accessPolicyWindow.setter
-    def accessPolicyWindow(self, accessPolicyWindow not None : CUaccessPolicyWindow):
-        string.memcpy(&self._ptr[0].accessPolicyWindow, <cydriver.CUaccessPolicyWindow*><void_ptr>accessPolicyWindow.getPtr(), sizeof(self._ptr[0].accessPolicyWindow))
-    @property
-    def cooperative(self):
-        return self._ptr[0].cooperative
-    @cooperative.setter
-    def cooperative(self, int cooperative):
-        self._ptr[0].cooperative = cooperative
-    @property
-    def syncPolicy(self):
-        return CUsynchronizationPolicy(self._ptr[0].syncPolicy)
-    @syncPolicy.setter
-    def syncPolicy(self, syncPolicy not None : CUsynchronizationPolicy):
-        self._ptr[0].syncPolicy = syncPolicy.value
-    @property
-    def clusterDim(self):
-        return self._clusterDim
-    @clusterDim.setter
-    def clusterDim(self, clusterDim not None : anon_struct1):
-        string.memcpy(&self._ptr[0].clusterDim, <cydriver.anon_struct1*><void_ptr>clusterDim.getPtr(), sizeof(self._ptr[0].clusterDim))
-    @property
-    def clusterSchedulingPolicyPreference(self):
-        return CUclusterSchedulingPolicy(self._ptr[0].clusterSchedulingPolicyPreference)
-    @clusterSchedulingPolicyPreference.setter
-    def clusterSchedulingPolicyPreference(self, clusterSchedulingPolicyPreference not None : CUclusterSchedulingPolicy):
-        self._ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference.value
-    @property
-    def programmaticStreamSerializationAllowed(self):
-        return self._ptr[0].programmaticStreamSerializationAllowed
-    @programmaticStreamSerializationAllowed.setter
-    def programmaticStreamSerializationAllowed(self, int programmaticStreamSerializationAllowed):
-        self._ptr[0].programmaticStreamSerializationAllowed = programmaticStreamSerializationAllowed
-    @property
-    def programmaticEvent(self):
-        return self._programmaticEvent
-    @programmaticEvent.setter
-    def programmaticEvent(self, programmaticEvent not None : anon_struct2):
-        string.memcpy(&self._ptr[0].programmaticEvent, <cydriver.anon_struct2*><void_ptr>programmaticEvent.getPtr(), sizeof(self._ptr[0].programmaticEvent))
-    @property
-    def launchCompletionEvent(self):
-        return self._launchCompletionEvent
-    @launchCompletionEvent.setter
-    def launchCompletionEvent(self, launchCompletionEvent not None : anon_struct3):
-        string.memcpy(&self._ptr[0].launchCompletionEvent, <cydriver.anon_struct3*><void_ptr>launchCompletionEvent.getPtr(), sizeof(self._ptr[0].launchCompletionEvent))
-    @property
-    def priority(self):
-        return self._ptr[0].priority
-    @priority.setter
-    def priority(self, int priority):
-        self._ptr[0].priority = priority
-    @property
-    def memSyncDomainMap(self):
-        return self._memSyncDomainMap
-    @memSyncDomainMap.setter
-    def memSyncDomainMap(self, memSyncDomainMap not None : CUlaunchMemSyncDomainMap):
-        string.memcpy(&self._ptr[0].memSyncDomainMap, <cydriver.CUlaunchMemSyncDomainMap*><void_ptr>memSyncDomainMap.getPtr(), sizeof(self._ptr[0].memSyncDomainMap))
-    @property
-    def memSyncDomain(self):
-        return CUlaunchMemSyncDomain(self._ptr[0].memSyncDomain)
-    @memSyncDomain.setter
-    def memSyncDomain(self, memSyncDomain not None : CUlaunchMemSyncDomain):
-        self._ptr[0].memSyncDomain = memSyncDomain.value
-    @property
-    def deviceUpdatableKernelNode(self):
-        return self._deviceUpdatableKernelNode
-    @deviceUpdatableKernelNode.setter
-    def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct4):
-        string.memcpy(&self._ptr[0].deviceUpdatableKernelNode, <cydriver.anon_struct4*><void_ptr>deviceUpdatableKernelNode.getPtr(), sizeof(self._ptr[0].deviceUpdatableKernelNode))
-    @property
-    def sharedMemCarveout(self):
-        return self._ptr[0].sharedMemCarveout
-    @sharedMemCarveout.setter
-    def sharedMemCarveout(self, unsigned int sharedMemCarveout):
-        self._ptr[0].sharedMemCarveout = sharedMemCarveout
-{{endif}}
-{{if 'struct CUlaunchAttribute_st' in found_types}}
-
-cdef class CUlaunchAttribute_st:
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : CUlaunchAttributeID
-        Attribute to set
-    value : CUlaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUlaunchAttribute_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._value = CUlaunchAttributeValue(_ptr=<void_ptr>&self._ptr[0].value)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['id : ' + str(self.id)]
-            except ValueError:
-                str_list += ['id : <ValueError>']
-            try:
-                str_list += ['value :\n' + '\n'.join(['    ' + line for line in str(self.value).splitlines()])]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def id(self):
-        return CUlaunchAttributeID(self._ptr[0].id)
-    @id.setter
-    def id(self, id not None : CUlaunchAttributeID):
-        self._ptr[0].id = id.value
-    @property
-    def value(self):
-        return self._value
-    @value.setter
-    def value(self, value not None : CUlaunchAttributeValue):
-        string.memcpy(&self._ptr[0].value, <cydriver.CUlaunchAttributeValue*><void_ptr>value.getPtr(), sizeof(self._ptr[0].value))
-{{endif}}
-{{if 'struct CUlaunchConfig_st' in found_types}}
-
-cdef class CUlaunchConfig_st:
-    """
-    CUDA extensible launch configuration
-
-    Attributes
-    ----------
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    attrs : CUlaunchAttribute
-        List of attributes; nullable if CUlaunchConfig::numAttrs == 0
-    numAttrs : unsigned int
-        Number of attributes populated in CUlaunchConfig::attrs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUlaunchConfig_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._hStream = CUstream(_ptr=<void_ptr>&self._ptr[0].hStream)
-    def __dealloc__(self):
-        if self._attrs is not NULL:
-            free(self._attrs)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['gridDimX : ' + str(self.gridDimX)]
-            except ValueError:
-                str_list += ['gridDimX : <ValueError>']
-            try:
-                str_list += ['gridDimY : ' + str(self.gridDimY)]
-            except ValueError:
-                str_list += ['gridDimY : <ValueError>']
-            try:
-                str_list += ['gridDimZ : ' + str(self.gridDimZ)]
-            except ValueError:
-                str_list += ['gridDimZ : <ValueError>']
-            try:
-                str_list += ['blockDimX : ' + str(self.blockDimX)]
-            except ValueError:
-                str_list += ['blockDimX : <ValueError>']
-            try:
-                str_list += ['blockDimY : ' + str(self.blockDimY)]
-            except ValueError:
-                str_list += ['blockDimY : <ValueError>']
-            try:
-                str_list += ['blockDimZ : ' + str(self.blockDimZ)]
-            except ValueError:
-                str_list += ['blockDimZ : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['hStream : ' + str(self.hStream)]
-            except ValueError:
-                str_list += ['hStream : <ValueError>']
-            try:
-                str_list += ['attrs : ' + str(self.attrs)]
-            except ValueError:
-                str_list += ['attrs : <ValueError>']
-            try:
-                str_list += ['numAttrs : ' + str(self.numAttrs)]
-            except ValueError:
-                str_list += ['numAttrs : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def gridDimX(self):
-        return self._ptr[0].gridDimX
-    @gridDimX.setter
-    def gridDimX(self, unsigned int gridDimX):
-        self._ptr[0].gridDimX = gridDimX
-    @property
-    def gridDimY(self):
-        return self._ptr[0].gridDimY
-    @gridDimY.setter
-    def gridDimY(self, unsigned int gridDimY):
-        self._ptr[0].gridDimY = gridDimY
-    @property
-    def gridDimZ(self):
-        return self._ptr[0].gridDimZ
-    @gridDimZ.setter
-    def gridDimZ(self, unsigned int gridDimZ):
-        self._ptr[0].gridDimZ = gridDimZ
-    @property
-    def blockDimX(self):
-        return self._ptr[0].blockDimX
-    @blockDimX.setter
-    def blockDimX(self, unsigned int blockDimX):
-        self._ptr[0].blockDimX = blockDimX
-    @property
-    def blockDimY(self):
-        return self._ptr[0].blockDimY
-    @blockDimY.setter
-    def blockDimY(self, unsigned int blockDimY):
-        self._ptr[0].blockDimY = blockDimY
-    @property
-    def blockDimZ(self):
-        return self._ptr[0].blockDimZ
-    @blockDimZ.setter
-    def blockDimZ(self, unsigned int blockDimZ):
-        self._ptr[0].blockDimZ = blockDimZ
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def hStream(self):
-        return self._hStream
-    @hStream.setter
-    def hStream(self, hStream):
-        cdef cydriver.CUstream cyhStream
-        if hStream is None:
-            cyhStream = <cydriver.CUstream><void_ptr>0
-        elif isinstance(hStream, (CUstream,)):
-            phStream = int(hStream)
-            cyhStream = <cydriver.CUstream><void_ptr>phStream
-        else:
-            phStream = int(CUstream(hStream))
-            cyhStream = <cydriver.CUstream><void_ptr>phStream
-        self._hStream._ptr[0] = cyhStream
-    @property
-    def attrs(self):
-        arrs = [<void_ptr>self._ptr[0].attrs + x*sizeof(cydriver.CUlaunchAttribute) for x in range(self._attrs_length)]
-        return [CUlaunchAttribute(_ptr=arr) for arr in arrs]
-    @attrs.setter
-    def attrs(self, val):
-        if len(val) == 0:
-            free(self._attrs)
-            self._attrs_length = 0
-            self._ptr[0].attrs = NULL
-        else:
-            if self._attrs_length != <size_t>len(val):
-                free(self._attrs)
-                self._attrs = <cydriver.CUlaunchAttribute*> calloc(len(val), sizeof(cydriver.CUlaunchAttribute))
-                if self._attrs is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUlaunchAttribute)))
-                self._attrs_length = <size_t>len(val)
-                self._ptr[0].attrs = self._attrs
-            for idx in range(len(val)):
-                string.memcpy(&self._attrs[idx], (<CUlaunchAttribute>val[idx])._ptr, sizeof(cydriver.CUlaunchAttribute))
-
-    @property
-    def numAttrs(self):
-        return self._ptr[0].numAttrs
-    @numAttrs.setter
-    def numAttrs(self, unsigned int numAttrs):
-        self._ptr[0].numAttrs = numAttrs
-{{endif}}
-{{if 'struct CUexecAffinitySmCount_st' in found_types}}
-
-cdef class CUexecAffinitySmCount_st:
-    """
-    Value for CU_EXEC_AFFINITY_TYPE_SM_COUNT
-
-    Attributes
-    ----------
-    val : unsigned int
-        The number of SMs the context is limited to use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUexecAffinitySmCount_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['val : ' + str(self.val)]
-            except ValueError:
-                str_list += ['val : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def val(self):
-        return self._ptr[0].val
-    @val.setter
-    def val(self, unsigned int val):
-        self._ptr[0].val = val
-{{endif}}
-{{if 'struct CUexecAffinityParam_st' in found_types}}
-
-cdef class anon_union3:
-    """
-    Attributes
-    ----------
-    smCount : CUexecAffinitySmCount
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUexecAffinityParam_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._smCount = CUexecAffinitySmCount(_ptr=<void_ptr>&self._ptr[0].param.smCount)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].param
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['smCount :\n' + '\n'.join(['    ' + line for line in str(self.smCount).splitlines()])]
-            except ValueError:
-                str_list += ['smCount : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def smCount(self):
-        return self._smCount
-    @smCount.setter
-    def smCount(self, smCount not None : CUexecAffinitySmCount):
-        string.memcpy(&self._ptr[0].param.smCount, <cydriver.CUexecAffinitySmCount*><void_ptr>smCount.getPtr(), sizeof(self._ptr[0].param.smCount))
-{{endif}}
-{{if 'struct CUexecAffinityParam_st' in found_types}}
-
-cdef class CUexecAffinityParam_st:
-    """
-    Execution Affinity Parameters
-
-    Attributes
-    ----------
-    type : CUexecAffinityType
-
-    param : anon_union3
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUexecAffinityParam_st *>calloc(1, sizeof(cydriver.CUexecAffinityParam_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUexecAffinityParam_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._param = anon_union3(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['param :\n' + '\n'.join(['    ' + line for line in str(self.param).splitlines()])]
-            except ValueError:
-                str_list += ['param : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUexecAffinityType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUexecAffinityType):
-        self._ptr[0].type = type.value
-    @property
-    def param(self):
-        return self._param
-    @param.setter
-    def param(self, param not None : anon_union3):
-        string.memcpy(&self._ptr[0].param, <cydriver.anon_union3*><void_ptr>param.getPtr(), sizeof(self._ptr[0].param))
-{{endif}}
-{{if 'struct CUctxCigParam_st' in found_types}}
-
-cdef class CUctxCigParam_st:
-    """
-    CIG Context Create Params
-
-    Attributes
-    ----------
-    sharedDataType : CUcigDataType
-
-    sharedData : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUctxCigParam_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['sharedDataType : ' + str(self.sharedDataType)]
-            except ValueError:
-                str_list += ['sharedDataType : <ValueError>']
-            try:
-                str_list += ['sharedData : ' + hex(self.sharedData)]
-            except ValueError:
-                str_list += ['sharedData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def sharedDataType(self):
-        return CUcigDataType(self._ptr[0].sharedDataType)
-    @sharedDataType.setter
-    def sharedDataType(self, sharedDataType not None : CUcigDataType):
-        self._ptr[0].sharedDataType = sharedDataType.value
-    @property
-    def sharedData(self):
-        return <void_ptr>self._ptr[0].sharedData
-    @sharedData.setter
-    def sharedData(self, sharedData):
-        _cysharedData = utils.HelperInputVoidPtr(sharedData)
-        self._ptr[0].sharedData = <void*><void_ptr>_cysharedData.cptr
-{{endif}}
-{{if 'struct CUctxCreateParams_st' in found_types}}
-
-cdef class CUctxCreateParams_st:
-    """
-    Params for creating CUDA context Exactly one of execAffinityParams
-    and cigParams must be non-NULL.
-
-    Attributes
-    ----------
-    execAffinityParams : CUexecAffinityParam
-
-    numExecAffinityParams : int
-
-    cigParams : CUctxCigParam
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUctxCreateParams_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._execAffinityParams is not NULL:
-            free(self._execAffinityParams)
-        if self._cigParams is not NULL:
-            free(self._cigParams)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['execAffinityParams : ' + str(self.execAffinityParams)]
-            except ValueError:
-                str_list += ['execAffinityParams : <ValueError>']
-            try:
-                str_list += ['numExecAffinityParams : ' + str(self.numExecAffinityParams)]
-            except ValueError:
-                str_list += ['numExecAffinityParams : <ValueError>']
-            try:
-                str_list += ['cigParams : ' + str(self.cigParams)]
-            except ValueError:
-                str_list += ['cigParams : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def execAffinityParams(self):
-        arrs = [<void_ptr>self._ptr[0].execAffinityParams + x*sizeof(cydriver.CUexecAffinityParam) for x in range(self._execAffinityParams_length)]
-        return [CUexecAffinityParam(_ptr=arr) for arr in arrs]
-    @execAffinityParams.setter
-    def execAffinityParams(self, val):
-        if len(val) == 0:
-            free(self._execAffinityParams)
-            self._execAffinityParams_length = 0
-            self._ptr[0].execAffinityParams = NULL
-        else:
-            if self._execAffinityParams_length != <size_t>len(val):
-                free(self._execAffinityParams)
-                self._execAffinityParams = <cydriver.CUexecAffinityParam*> calloc(len(val), sizeof(cydriver.CUexecAffinityParam))
-                if self._execAffinityParams is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUexecAffinityParam)))
-                self._execAffinityParams_length = <size_t>len(val)
-                self._ptr[0].execAffinityParams = self._execAffinityParams
-            for idx in range(len(val)):
-                string.memcpy(&self._execAffinityParams[idx], (<CUexecAffinityParam>val[idx])._ptr, sizeof(cydriver.CUexecAffinityParam))
-
-    @property
-    def numExecAffinityParams(self):
-        return self._ptr[0].numExecAffinityParams
-    @numExecAffinityParams.setter
-    def numExecAffinityParams(self, int numExecAffinityParams):
-        self._ptr[0].numExecAffinityParams = numExecAffinityParams
-    @property
-    def cigParams(self):
-        arrs = [<void_ptr>self._ptr[0].cigParams + x*sizeof(cydriver.CUctxCigParam) for x in range(self._cigParams_length)]
-        return [CUctxCigParam(_ptr=arr) for arr in arrs]
-    @cigParams.setter
-    def cigParams(self, val):
-        if len(val) == 0:
-            free(self._cigParams)
-            self._cigParams_length = 0
-            self._ptr[0].cigParams = NULL
-        else:
-            if self._cigParams_length != <size_t>len(val):
-                free(self._cigParams)
-                self._cigParams = <cydriver.CUctxCigParam*> calloc(len(val), sizeof(cydriver.CUctxCigParam))
-                if self._cigParams is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUctxCigParam)))
-                self._cigParams_length = <size_t>len(val)
-                self._ptr[0].cigParams = self._cigParams
-            for idx in range(len(val)):
-                string.memcpy(&self._cigParams[idx], (<CUctxCigParam>val[idx])._ptr, sizeof(cydriver.CUctxCigParam))
-
-{{endif}}
-{{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}}
-
-cdef class CUlibraryHostUniversalFunctionAndDataTable_st:
-    """
-    Attributes
-    ----------
-    functionTable : Any
-
-    functionWindowSize : size_t
-
-    dataTable : Any
-
-    dataWindowSize : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUlibraryHostUniversalFunctionAndDataTable_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['functionTable : ' + hex(self.functionTable)]
-            except ValueError:
-                str_list += ['functionTable : <ValueError>']
-            try:
-                str_list += ['functionWindowSize : ' + str(self.functionWindowSize)]
-            except ValueError:
-                str_list += ['functionWindowSize : <ValueError>']
-            try:
-                str_list += ['dataTable : ' + hex(self.dataTable)]
-            except ValueError:
-                str_list += ['dataTable : <ValueError>']
-            try:
-                str_list += ['dataWindowSize : ' + str(self.dataWindowSize)]
-            except ValueError:
-                str_list += ['dataWindowSize : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def functionTable(self):
-        return <void_ptr>self._ptr[0].functionTable
-    @functionTable.setter
-    def functionTable(self, functionTable):
-        _cyfunctionTable = utils.HelperInputVoidPtr(functionTable)
-        self._ptr[0].functionTable = <void*><void_ptr>_cyfunctionTable.cptr
-    @property
-    def functionWindowSize(self):
-        return self._ptr[0].functionWindowSize
-    @functionWindowSize.setter
-    def functionWindowSize(self, size_t functionWindowSize):
-        self._ptr[0].functionWindowSize = functionWindowSize
-    @property
-    def dataTable(self):
-        return <void_ptr>self._ptr[0].dataTable
-    @dataTable.setter
-    def dataTable(self, dataTable):
-        _cydataTable = utils.HelperInputVoidPtr(dataTable)
-        self._ptr[0].dataTable = <void*><void_ptr>_cydataTable.cptr
-    @property
-    def dataWindowSize(self):
-        return self._ptr[0].dataWindowSize
-    @dataWindowSize.setter
-    def dataWindowSize(self, size_t dataWindowSize):
-        self._ptr[0].dataWindowSize = dataWindowSize
-{{endif}}
-{{if 'struct CUDA_MEMCPY2D_st' in found_types}}
-
-cdef class CUDA_MEMCPY2D_st:
-    """
-    2D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    WidthInBytes : size_t
-        Width of 2D memory copy in bytes
-    Height : size_t
-        Height of 2D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMCPY2D_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._srcDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].srcDevice)
-        self._srcArray = CUarray(_ptr=<void_ptr>&self._ptr[0].srcArray)
-        self._dstDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dstDevice)
-        self._dstArray = CUarray(_ptr=<void_ptr>&self._ptr[0].dstArray)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['srcXInBytes : ' + str(self.srcXInBytes)]
-            except ValueError:
-                str_list += ['srcXInBytes : <ValueError>']
-            try:
-                str_list += ['srcY : ' + str(self.srcY)]
-            except ValueError:
-                str_list += ['srcY : <ValueError>']
-            try:
-                str_list += ['srcMemoryType : ' + str(self.srcMemoryType)]
-            except ValueError:
-                str_list += ['srcMemoryType : <ValueError>']
-            try:
-                str_list += ['srcHost : ' + hex(self.srcHost)]
-            except ValueError:
-                str_list += ['srcHost : <ValueError>']
-            try:
-                str_list += ['srcDevice : ' + str(self.srcDevice)]
-            except ValueError:
-                str_list += ['srcDevice : <ValueError>']
-            try:
-                str_list += ['srcArray : ' + str(self.srcArray)]
-            except ValueError:
-                str_list += ['srcArray : <ValueError>']
-            try:
-                str_list += ['srcPitch : ' + str(self.srcPitch)]
-            except ValueError:
-                str_list += ['srcPitch : <ValueError>']
-            try:
-                str_list += ['dstXInBytes : ' + str(self.dstXInBytes)]
-            except ValueError:
-                str_list += ['dstXInBytes : <ValueError>']
-            try:
-                str_list += ['dstY : ' + str(self.dstY)]
-            except ValueError:
-                str_list += ['dstY : <ValueError>']
-            try:
-                str_list += ['dstMemoryType : ' + str(self.dstMemoryType)]
-            except ValueError:
-                str_list += ['dstMemoryType : <ValueError>']
-            try:
-                str_list += ['dstHost : ' + hex(self.dstHost)]
-            except ValueError:
-                str_list += ['dstHost : <ValueError>']
-            try:
-                str_list += ['dstDevice : ' + str(self.dstDevice)]
-            except ValueError:
-                str_list += ['dstDevice : <ValueError>']
-            try:
-                str_list += ['dstArray : ' + str(self.dstArray)]
-            except ValueError:
-                str_list += ['dstArray : <ValueError>']
-            try:
-                str_list += ['dstPitch : ' + str(self.dstPitch)]
-            except ValueError:
-                str_list += ['dstPitch : <ValueError>']
-            try:
-                str_list += ['WidthInBytes : ' + str(self.WidthInBytes)]
-            except ValueError:
-                str_list += ['WidthInBytes : <ValueError>']
-            try:
-                str_list += ['Height : ' + str(self.Height)]
-            except ValueError:
-                str_list += ['Height : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def srcXInBytes(self):
-        return self._ptr[0].srcXInBytes
-    @srcXInBytes.setter
-    def srcXInBytes(self, size_t srcXInBytes):
-        self._ptr[0].srcXInBytes = srcXInBytes
-    @property
-    def srcY(self):
-        return self._ptr[0].srcY
-    @srcY.setter
-    def srcY(self, size_t srcY):
-        self._ptr[0].srcY = srcY
-    @property
-    def srcMemoryType(self):
-        return CUmemorytype(self._ptr[0].srcMemoryType)
-    @srcMemoryType.setter
-    def srcMemoryType(self, srcMemoryType not None : CUmemorytype):
-        self._ptr[0].srcMemoryType = srcMemoryType.value
-    @property
-    def srcHost(self):
-        return <void_ptr>self._ptr[0].srcHost
-    @srcHost.setter
-    def srcHost(self, srcHost):
-        _cysrcHost = utils.HelperInputVoidPtr(srcHost)
-        self._ptr[0].srcHost = <void*><void_ptr>_cysrcHost.cptr
-    @property
-    def srcDevice(self):
-        return self._srcDevice
-    @srcDevice.setter
-    def srcDevice(self, srcDevice):
-        cdef cydriver.CUdeviceptr cysrcDevice
-        if srcDevice is None:
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(srcDevice, (CUdeviceptr)):
-            psrcDevice = int(srcDevice)
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        else:
-            psrcDevice = int(CUdeviceptr(srcDevice))
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        self._srcDevice._ptr[0] = cysrcDevice
-
-    @property
-    def srcArray(self):
-        return self._srcArray
-    @srcArray.setter
-    def srcArray(self, srcArray):
-        cdef cydriver.CUarray cysrcArray
-        if srcArray is None:
-            cysrcArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(srcArray, (CUarray,)):
-            psrcArray = int(srcArray)
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        else:
-            psrcArray = int(CUarray(srcArray))
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        self._srcArray._ptr[0] = cysrcArray
-    @property
-    def srcPitch(self):
-        return self._ptr[0].srcPitch
-    @srcPitch.setter
-    def srcPitch(self, size_t srcPitch):
-        self._ptr[0].srcPitch = srcPitch
-    @property
-    def dstXInBytes(self):
-        return self._ptr[0].dstXInBytes
-    @dstXInBytes.setter
-    def dstXInBytes(self, size_t dstXInBytes):
-        self._ptr[0].dstXInBytes = dstXInBytes
-    @property
-    def dstY(self):
-        return self._ptr[0].dstY
-    @dstY.setter
-    def dstY(self, size_t dstY):
-        self._ptr[0].dstY = dstY
-    @property
-    def dstMemoryType(self):
-        return CUmemorytype(self._ptr[0].dstMemoryType)
-    @dstMemoryType.setter
-    def dstMemoryType(self, dstMemoryType not None : CUmemorytype):
-        self._ptr[0].dstMemoryType = dstMemoryType.value
-    @property
-    def dstHost(self):
-        return <void_ptr>self._ptr[0].dstHost
-    @dstHost.setter
-    def dstHost(self, dstHost):
-        _cydstHost = utils.HelperInputVoidPtr(dstHost)
-        self._ptr[0].dstHost = <void*><void_ptr>_cydstHost.cptr
-    @property
-    def dstDevice(self):
-        return self._dstDevice
-    @dstDevice.setter
-    def dstDevice(self, dstDevice):
-        cdef cydriver.CUdeviceptr cydstDevice
-        if dstDevice is None:
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dstDevice, (CUdeviceptr)):
-            pdstDevice = int(dstDevice)
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        else:
-            pdstDevice = int(CUdeviceptr(dstDevice))
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        self._dstDevice._ptr[0] = cydstDevice
-
-    @property
-    def dstArray(self):
-        return self._dstArray
-    @dstArray.setter
-    def dstArray(self, dstArray):
-        cdef cydriver.CUarray cydstArray
-        if dstArray is None:
-            cydstArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(dstArray, (CUarray,)):
-            pdstArray = int(dstArray)
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        else:
-            pdstArray = int(CUarray(dstArray))
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        self._dstArray._ptr[0] = cydstArray
-    @property
-    def dstPitch(self):
-        return self._ptr[0].dstPitch
-    @dstPitch.setter
-    def dstPitch(self, size_t dstPitch):
-        self._ptr[0].dstPitch = dstPitch
-    @property
-    def WidthInBytes(self):
-        return self._ptr[0].WidthInBytes
-    @WidthInBytes.setter
-    def WidthInBytes(self, size_t WidthInBytes):
-        self._ptr[0].WidthInBytes = WidthInBytes
-    @property
-    def Height(self):
-        return self._ptr[0].Height
-    @Height.setter
-    def Height(self, size_t Height):
-        self._ptr[0].Height = Height
-{{endif}}
-{{if 'struct CUDA_MEMCPY3D_st' in found_types}}
-
-cdef class CUDA_MEMCPY3D_st:
-    """
-    3D memory copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    reserved0 : Any
-        Must be NULL
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    reserved1 : Any
-        Must be NULL
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMCPY3D_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._srcDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].srcDevice)
-        self._srcArray = CUarray(_ptr=<void_ptr>&self._ptr[0].srcArray)
-        self._dstDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dstDevice)
-        self._dstArray = CUarray(_ptr=<void_ptr>&self._ptr[0].dstArray)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['srcXInBytes : ' + str(self.srcXInBytes)]
-            except ValueError:
-                str_list += ['srcXInBytes : <ValueError>']
-            try:
-                str_list += ['srcY : ' + str(self.srcY)]
-            except ValueError:
-                str_list += ['srcY : <ValueError>']
-            try:
-                str_list += ['srcZ : ' + str(self.srcZ)]
-            except ValueError:
-                str_list += ['srcZ : <ValueError>']
-            try:
-                str_list += ['srcLOD : ' + str(self.srcLOD)]
-            except ValueError:
-                str_list += ['srcLOD : <ValueError>']
-            try:
-                str_list += ['srcMemoryType : ' + str(self.srcMemoryType)]
-            except ValueError:
-                str_list += ['srcMemoryType : <ValueError>']
-            try:
-                str_list += ['srcHost : ' + hex(self.srcHost)]
-            except ValueError:
-                str_list += ['srcHost : <ValueError>']
-            try:
-                str_list += ['srcDevice : ' + str(self.srcDevice)]
-            except ValueError:
-                str_list += ['srcDevice : <ValueError>']
-            try:
-                str_list += ['srcArray : ' + str(self.srcArray)]
-            except ValueError:
-                str_list += ['srcArray : <ValueError>']
-            try:
-                str_list += ['reserved0 : ' + hex(self.reserved0)]
-            except ValueError:
-                str_list += ['reserved0 : <ValueError>']
-            try:
-                str_list += ['srcPitch : ' + str(self.srcPitch)]
-            except ValueError:
-                str_list += ['srcPitch : <ValueError>']
-            try:
-                str_list += ['srcHeight : ' + str(self.srcHeight)]
-            except ValueError:
-                str_list += ['srcHeight : <ValueError>']
-            try:
-                str_list += ['dstXInBytes : ' + str(self.dstXInBytes)]
-            except ValueError:
-                str_list += ['dstXInBytes : <ValueError>']
-            try:
-                str_list += ['dstY : ' + str(self.dstY)]
-            except ValueError:
-                str_list += ['dstY : <ValueError>']
-            try:
-                str_list += ['dstZ : ' + str(self.dstZ)]
-            except ValueError:
-                str_list += ['dstZ : <ValueError>']
-            try:
-                str_list += ['dstLOD : ' + str(self.dstLOD)]
-            except ValueError:
-                str_list += ['dstLOD : <ValueError>']
-            try:
-                str_list += ['dstMemoryType : ' + str(self.dstMemoryType)]
-            except ValueError:
-                str_list += ['dstMemoryType : <ValueError>']
-            try:
-                str_list += ['dstHost : ' + hex(self.dstHost)]
-            except ValueError:
-                str_list += ['dstHost : <ValueError>']
-            try:
-                str_list += ['dstDevice : ' + str(self.dstDevice)]
-            except ValueError:
-                str_list += ['dstDevice : <ValueError>']
-            try:
-                str_list += ['dstArray : ' + str(self.dstArray)]
-            except ValueError:
-                str_list += ['dstArray : <ValueError>']
-            try:
-                str_list += ['reserved1 : ' + hex(self.reserved1)]
-            except ValueError:
-                str_list += ['reserved1 : <ValueError>']
-            try:
-                str_list += ['dstPitch : ' + str(self.dstPitch)]
-            except ValueError:
-                str_list += ['dstPitch : <ValueError>']
-            try:
-                str_list += ['dstHeight : ' + str(self.dstHeight)]
-            except ValueError:
-                str_list += ['dstHeight : <ValueError>']
-            try:
-                str_list += ['WidthInBytes : ' + str(self.WidthInBytes)]
-            except ValueError:
-                str_list += ['WidthInBytes : <ValueError>']
-            try:
-                str_list += ['Height : ' + str(self.Height)]
-            except ValueError:
-                str_list += ['Height : <ValueError>']
-            try:
-                str_list += ['Depth : ' + str(self.Depth)]
-            except ValueError:
-                str_list += ['Depth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def srcXInBytes(self):
-        return self._ptr[0].srcXInBytes
-    @srcXInBytes.setter
-    def srcXInBytes(self, size_t srcXInBytes):
-        self._ptr[0].srcXInBytes = srcXInBytes
-    @property
-    def srcY(self):
-        return self._ptr[0].srcY
-    @srcY.setter
-    def srcY(self, size_t srcY):
-        self._ptr[0].srcY = srcY
-    @property
-    def srcZ(self):
-        return self._ptr[0].srcZ
-    @srcZ.setter
-    def srcZ(self, size_t srcZ):
-        self._ptr[0].srcZ = srcZ
-    @property
-    def srcLOD(self):
-        return self._ptr[0].srcLOD
-    @srcLOD.setter
-    def srcLOD(self, size_t srcLOD):
-        self._ptr[0].srcLOD = srcLOD
-    @property
-    def srcMemoryType(self):
-        return CUmemorytype(self._ptr[0].srcMemoryType)
-    @srcMemoryType.setter
-    def srcMemoryType(self, srcMemoryType not None : CUmemorytype):
-        self._ptr[0].srcMemoryType = srcMemoryType.value
-    @property
-    def srcHost(self):
-        return <void_ptr>self._ptr[0].srcHost
-    @srcHost.setter
-    def srcHost(self, srcHost):
-        _cysrcHost = utils.HelperInputVoidPtr(srcHost)
-        self._ptr[0].srcHost = <void*><void_ptr>_cysrcHost.cptr
-    @property
-    def srcDevice(self):
-        return self._srcDevice
-    @srcDevice.setter
-    def srcDevice(self, srcDevice):
-        cdef cydriver.CUdeviceptr cysrcDevice
-        if srcDevice is None:
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(srcDevice, (CUdeviceptr)):
-            psrcDevice = int(srcDevice)
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        else:
-            psrcDevice = int(CUdeviceptr(srcDevice))
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        self._srcDevice._ptr[0] = cysrcDevice
-
-    @property
-    def srcArray(self):
-        return self._srcArray
-    @srcArray.setter
-    def srcArray(self, srcArray):
-        cdef cydriver.CUarray cysrcArray
-        if srcArray is None:
-            cysrcArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(srcArray, (CUarray,)):
-            psrcArray = int(srcArray)
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        else:
-            psrcArray = int(CUarray(srcArray))
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        self._srcArray._ptr[0] = cysrcArray
-    @property
-    def reserved0(self):
-        return <void_ptr>self._ptr[0].reserved0
-    @reserved0.setter
-    def reserved0(self, reserved0):
-        _cyreserved0 = utils.HelperInputVoidPtr(reserved0)
-        self._ptr[0].reserved0 = <void*><void_ptr>_cyreserved0.cptr
-    @property
-    def srcPitch(self):
-        return self._ptr[0].srcPitch
-    @srcPitch.setter
-    def srcPitch(self, size_t srcPitch):
-        self._ptr[0].srcPitch = srcPitch
-    @property
-    def srcHeight(self):
-        return self._ptr[0].srcHeight
-    @srcHeight.setter
-    def srcHeight(self, size_t srcHeight):
-        self._ptr[0].srcHeight = srcHeight
-    @property
-    def dstXInBytes(self):
-        return self._ptr[0].dstXInBytes
-    @dstXInBytes.setter
-    def dstXInBytes(self, size_t dstXInBytes):
-        self._ptr[0].dstXInBytes = dstXInBytes
-    @property
-    def dstY(self):
-        return self._ptr[0].dstY
-    @dstY.setter
-    def dstY(self, size_t dstY):
-        self._ptr[0].dstY = dstY
-    @property
-    def dstZ(self):
-        return self._ptr[0].dstZ
-    @dstZ.setter
-    def dstZ(self, size_t dstZ):
-        self._ptr[0].dstZ = dstZ
-    @property
-    def dstLOD(self):
-        return self._ptr[0].dstLOD
-    @dstLOD.setter
-    def dstLOD(self, size_t dstLOD):
-        self._ptr[0].dstLOD = dstLOD
-    @property
-    def dstMemoryType(self):
-        return CUmemorytype(self._ptr[0].dstMemoryType)
-    @dstMemoryType.setter
-    def dstMemoryType(self, dstMemoryType not None : CUmemorytype):
-        self._ptr[0].dstMemoryType = dstMemoryType.value
-    @property
-    def dstHost(self):
-        return <void_ptr>self._ptr[0].dstHost
-    @dstHost.setter
-    def dstHost(self, dstHost):
-        _cydstHost = utils.HelperInputVoidPtr(dstHost)
-        self._ptr[0].dstHost = <void*><void_ptr>_cydstHost.cptr
-    @property
-    def dstDevice(self):
-        return self._dstDevice
-    @dstDevice.setter
-    def dstDevice(self, dstDevice):
-        cdef cydriver.CUdeviceptr cydstDevice
-        if dstDevice is None:
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dstDevice, (CUdeviceptr)):
-            pdstDevice = int(dstDevice)
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        else:
-            pdstDevice = int(CUdeviceptr(dstDevice))
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        self._dstDevice._ptr[0] = cydstDevice
-
-    @property
-    def dstArray(self):
-        return self._dstArray
-    @dstArray.setter
-    def dstArray(self, dstArray):
-        cdef cydriver.CUarray cydstArray
-        if dstArray is None:
-            cydstArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(dstArray, (CUarray,)):
-            pdstArray = int(dstArray)
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        else:
-            pdstArray = int(CUarray(dstArray))
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        self._dstArray._ptr[0] = cydstArray
-    @property
-    def reserved1(self):
-        return <void_ptr>self._ptr[0].reserved1
-    @reserved1.setter
-    def reserved1(self, reserved1):
-        _cyreserved1 = utils.HelperInputVoidPtr(reserved1)
-        self._ptr[0].reserved1 = <void*><void_ptr>_cyreserved1.cptr
-    @property
-    def dstPitch(self):
-        return self._ptr[0].dstPitch
-    @dstPitch.setter
-    def dstPitch(self, size_t dstPitch):
-        self._ptr[0].dstPitch = dstPitch
-    @property
-    def dstHeight(self):
-        return self._ptr[0].dstHeight
-    @dstHeight.setter
-    def dstHeight(self, size_t dstHeight):
-        self._ptr[0].dstHeight = dstHeight
-    @property
-    def WidthInBytes(self):
-        return self._ptr[0].WidthInBytes
-    @WidthInBytes.setter
-    def WidthInBytes(self, size_t WidthInBytes):
-        self._ptr[0].WidthInBytes = WidthInBytes
-    @property
-    def Height(self):
-        return self._ptr[0].Height
-    @Height.setter
-    def Height(self, size_t Height):
-        self._ptr[0].Height = Height
-    @property
-    def Depth(self):
-        return self._ptr[0].Depth
-    @Depth.setter
-    def Depth(self, size_t Depth):
-        self._ptr[0].Depth = Depth
-{{endif}}
-{{if 'struct CUDA_MEMCPY3D_PEER_st' in found_types}}
-
-cdef class CUDA_MEMCPY3D_PEER_st:
-    """
-    3D memory cross-context copy parameters
-
-    Attributes
-    ----------
-    srcXInBytes : size_t
-        Source X in bytes
-    srcY : size_t
-        Source Y
-    srcZ : size_t
-        Source Z
-    srcLOD : size_t
-        Source LOD
-    srcMemoryType : CUmemorytype
-        Source memory type (host, device, array)
-    srcHost : Any
-        Source host pointer
-    srcDevice : CUdeviceptr
-        Source device pointer
-    srcArray : CUarray
-        Source array reference
-    srcContext : CUcontext
-        Source context (ignored with srcMemoryType is CU_MEMORYTYPE_ARRAY)
-    srcPitch : size_t
-        Source pitch (ignored when src is array)
-    srcHeight : size_t
-        Source height (ignored when src is array; may be 0 if Depth==1)
-    dstXInBytes : size_t
-        Destination X in bytes
-    dstY : size_t
-        Destination Y
-    dstZ : size_t
-        Destination Z
-    dstLOD : size_t
-        Destination LOD
-    dstMemoryType : CUmemorytype
-        Destination memory type (host, device, array)
-    dstHost : Any
-        Destination host pointer
-    dstDevice : CUdeviceptr
-        Destination device pointer
-    dstArray : CUarray
-        Destination array reference
-    dstContext : CUcontext
-        Destination context (ignored with dstMemoryType is
-        CU_MEMORYTYPE_ARRAY)
-    dstPitch : size_t
-        Destination pitch (ignored when dst is array)
-    dstHeight : size_t
-        Destination height (ignored when dst is array; may be 0 if
-        Depth==1)
-    WidthInBytes : size_t
-        Width of 3D memory copy in bytes
-    Height : size_t
-        Height of 3D memory copy
-    Depth : size_t
-        Depth of 3D memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMCPY3D_PEER_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._srcDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].srcDevice)
-        self._srcArray = CUarray(_ptr=<void_ptr>&self._ptr[0].srcArray)
-        self._srcContext = CUcontext(_ptr=<void_ptr>&self._ptr[0].srcContext)
-        self._dstDevice = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dstDevice)
-        self._dstArray = CUarray(_ptr=<void_ptr>&self._ptr[0].dstArray)
-        self._dstContext = CUcontext(_ptr=<void_ptr>&self._ptr[0].dstContext)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['srcXInBytes : ' + str(self.srcXInBytes)]
-            except ValueError:
-                str_list += ['srcXInBytes : <ValueError>']
-            try:
-                str_list += ['srcY : ' + str(self.srcY)]
-            except ValueError:
-                str_list += ['srcY : <ValueError>']
-            try:
-                str_list += ['srcZ : ' + str(self.srcZ)]
-            except ValueError:
-                str_list += ['srcZ : <ValueError>']
-            try:
-                str_list += ['srcLOD : ' + str(self.srcLOD)]
-            except ValueError:
-                str_list += ['srcLOD : <ValueError>']
-            try:
-                str_list += ['srcMemoryType : ' + str(self.srcMemoryType)]
-            except ValueError:
-                str_list += ['srcMemoryType : <ValueError>']
-            try:
-                str_list += ['srcHost : ' + hex(self.srcHost)]
-            except ValueError:
-                str_list += ['srcHost : <ValueError>']
-            try:
-                str_list += ['srcDevice : ' + str(self.srcDevice)]
-            except ValueError:
-                str_list += ['srcDevice : <ValueError>']
-            try:
-                str_list += ['srcArray : ' + str(self.srcArray)]
-            except ValueError:
-                str_list += ['srcArray : <ValueError>']
-            try:
-                str_list += ['srcContext : ' + str(self.srcContext)]
-            except ValueError:
-                str_list += ['srcContext : <ValueError>']
-            try:
-                str_list += ['srcPitch : ' + str(self.srcPitch)]
-            except ValueError:
-                str_list += ['srcPitch : <ValueError>']
-            try:
-                str_list += ['srcHeight : ' + str(self.srcHeight)]
-            except ValueError:
-                str_list += ['srcHeight : <ValueError>']
-            try:
-                str_list += ['dstXInBytes : ' + str(self.dstXInBytes)]
-            except ValueError:
-                str_list += ['dstXInBytes : <ValueError>']
-            try:
-                str_list += ['dstY : ' + str(self.dstY)]
-            except ValueError:
-                str_list += ['dstY : <ValueError>']
-            try:
-                str_list += ['dstZ : ' + str(self.dstZ)]
-            except ValueError:
-                str_list += ['dstZ : <ValueError>']
-            try:
-                str_list += ['dstLOD : ' + str(self.dstLOD)]
-            except ValueError:
-                str_list += ['dstLOD : <ValueError>']
-            try:
-                str_list += ['dstMemoryType : ' + str(self.dstMemoryType)]
-            except ValueError:
-                str_list += ['dstMemoryType : <ValueError>']
-            try:
-                str_list += ['dstHost : ' + hex(self.dstHost)]
-            except ValueError:
-                str_list += ['dstHost : <ValueError>']
-            try:
-                str_list += ['dstDevice : ' + str(self.dstDevice)]
-            except ValueError:
-                str_list += ['dstDevice : <ValueError>']
-            try:
-                str_list += ['dstArray : ' + str(self.dstArray)]
-            except ValueError:
-                str_list += ['dstArray : <ValueError>']
-            try:
-                str_list += ['dstContext : ' + str(self.dstContext)]
-            except ValueError:
-                str_list += ['dstContext : <ValueError>']
-            try:
-                str_list += ['dstPitch : ' + str(self.dstPitch)]
-            except ValueError:
-                str_list += ['dstPitch : <ValueError>']
-            try:
-                str_list += ['dstHeight : ' + str(self.dstHeight)]
-            except ValueError:
-                str_list += ['dstHeight : <ValueError>']
-            try:
-                str_list += ['WidthInBytes : ' + str(self.WidthInBytes)]
-            except ValueError:
-                str_list += ['WidthInBytes : <ValueError>']
-            try:
-                str_list += ['Height : ' + str(self.Height)]
-            except ValueError:
-                str_list += ['Height : <ValueError>']
-            try:
-                str_list += ['Depth : ' + str(self.Depth)]
-            except ValueError:
-                str_list += ['Depth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def srcXInBytes(self):
-        return self._ptr[0].srcXInBytes
-    @srcXInBytes.setter
-    def srcXInBytes(self, size_t srcXInBytes):
-        self._ptr[0].srcXInBytes = srcXInBytes
-    @property
-    def srcY(self):
-        return self._ptr[0].srcY
-    @srcY.setter
-    def srcY(self, size_t srcY):
-        self._ptr[0].srcY = srcY
-    @property
-    def srcZ(self):
-        return self._ptr[0].srcZ
-    @srcZ.setter
-    def srcZ(self, size_t srcZ):
-        self._ptr[0].srcZ = srcZ
-    @property
-    def srcLOD(self):
-        return self._ptr[0].srcLOD
-    @srcLOD.setter
-    def srcLOD(self, size_t srcLOD):
-        self._ptr[0].srcLOD = srcLOD
-    @property
-    def srcMemoryType(self):
-        return CUmemorytype(self._ptr[0].srcMemoryType)
-    @srcMemoryType.setter
-    def srcMemoryType(self, srcMemoryType not None : CUmemorytype):
-        self._ptr[0].srcMemoryType = srcMemoryType.value
-    @property
-    def srcHost(self):
-        return <void_ptr>self._ptr[0].srcHost
-    @srcHost.setter
-    def srcHost(self, srcHost):
-        _cysrcHost = utils.HelperInputVoidPtr(srcHost)
-        self._ptr[0].srcHost = <void*><void_ptr>_cysrcHost.cptr
-    @property
-    def srcDevice(self):
-        return self._srcDevice
-    @srcDevice.setter
-    def srcDevice(self, srcDevice):
-        cdef cydriver.CUdeviceptr cysrcDevice
-        if srcDevice is None:
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(srcDevice, (CUdeviceptr)):
-            psrcDevice = int(srcDevice)
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        else:
-            psrcDevice = int(CUdeviceptr(srcDevice))
-            cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-        self._srcDevice._ptr[0] = cysrcDevice
-
-    @property
-    def srcArray(self):
-        return self._srcArray
-    @srcArray.setter
-    def srcArray(self, srcArray):
-        cdef cydriver.CUarray cysrcArray
-        if srcArray is None:
-            cysrcArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(srcArray, (CUarray,)):
-            psrcArray = int(srcArray)
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        else:
-            psrcArray = int(CUarray(srcArray))
-            cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-        self._srcArray._ptr[0] = cysrcArray
-    @property
-    def srcContext(self):
-        return self._srcContext
-    @srcContext.setter
-    def srcContext(self, srcContext):
-        cdef cydriver.CUcontext cysrcContext
-        if srcContext is None:
-            cysrcContext = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(srcContext, (CUcontext,)):
-            psrcContext = int(srcContext)
-            cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-        else:
-            psrcContext = int(CUcontext(srcContext))
-            cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-        self._srcContext._ptr[0] = cysrcContext
-    @property
-    def srcPitch(self):
-        return self._ptr[0].srcPitch
-    @srcPitch.setter
-    def srcPitch(self, size_t srcPitch):
-        self._ptr[0].srcPitch = srcPitch
-    @property
-    def srcHeight(self):
-        return self._ptr[0].srcHeight
-    @srcHeight.setter
-    def srcHeight(self, size_t srcHeight):
-        self._ptr[0].srcHeight = srcHeight
-    @property
-    def dstXInBytes(self):
-        return self._ptr[0].dstXInBytes
-    @dstXInBytes.setter
-    def dstXInBytes(self, size_t dstXInBytes):
-        self._ptr[0].dstXInBytes = dstXInBytes
-    @property
-    def dstY(self):
-        return self._ptr[0].dstY
-    @dstY.setter
-    def dstY(self, size_t dstY):
-        self._ptr[0].dstY = dstY
-    @property
-    def dstZ(self):
-        return self._ptr[0].dstZ
-    @dstZ.setter
-    def dstZ(self, size_t dstZ):
-        self._ptr[0].dstZ = dstZ
-    @property
-    def dstLOD(self):
-        return self._ptr[0].dstLOD
-    @dstLOD.setter
-    def dstLOD(self, size_t dstLOD):
-        self._ptr[0].dstLOD = dstLOD
-    @property
-    def dstMemoryType(self):
-        return CUmemorytype(self._ptr[0].dstMemoryType)
-    @dstMemoryType.setter
-    def dstMemoryType(self, dstMemoryType not None : CUmemorytype):
-        self._ptr[0].dstMemoryType = dstMemoryType.value
-    @property
-    def dstHost(self):
-        return <void_ptr>self._ptr[0].dstHost
-    @dstHost.setter
-    def dstHost(self, dstHost):
-        _cydstHost = utils.HelperInputVoidPtr(dstHost)
-        self._ptr[0].dstHost = <void*><void_ptr>_cydstHost.cptr
-    @property
-    def dstDevice(self):
-        return self._dstDevice
-    @dstDevice.setter
-    def dstDevice(self, dstDevice):
-        cdef cydriver.CUdeviceptr cydstDevice
-        if dstDevice is None:
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dstDevice, (CUdeviceptr)):
-            pdstDevice = int(dstDevice)
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        else:
-            pdstDevice = int(CUdeviceptr(dstDevice))
-            cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-        self._dstDevice._ptr[0] = cydstDevice
-
-    @property
-    def dstArray(self):
-        return self._dstArray
-    @dstArray.setter
-    def dstArray(self, dstArray):
-        cdef cydriver.CUarray cydstArray
-        if dstArray is None:
-            cydstArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(dstArray, (CUarray,)):
-            pdstArray = int(dstArray)
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        else:
-            pdstArray = int(CUarray(dstArray))
-            cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-        self._dstArray._ptr[0] = cydstArray
-    @property
-    def dstContext(self):
-        return self._dstContext
-    @dstContext.setter
-    def dstContext(self, dstContext):
-        cdef cydriver.CUcontext cydstContext
-        if dstContext is None:
-            cydstContext = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(dstContext, (CUcontext,)):
-            pdstContext = int(dstContext)
-            cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-        else:
-            pdstContext = int(CUcontext(dstContext))
-            cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-        self._dstContext._ptr[0] = cydstContext
-    @property
-    def dstPitch(self):
-        return self._ptr[0].dstPitch
-    @dstPitch.setter
-    def dstPitch(self, size_t dstPitch):
-        self._ptr[0].dstPitch = dstPitch
-    @property
-    def dstHeight(self):
-        return self._ptr[0].dstHeight
-    @dstHeight.setter
-    def dstHeight(self, size_t dstHeight):
-        self._ptr[0].dstHeight = dstHeight
-    @property
-    def WidthInBytes(self):
-        return self._ptr[0].WidthInBytes
-    @WidthInBytes.setter
-    def WidthInBytes(self, size_t WidthInBytes):
-        self._ptr[0].WidthInBytes = WidthInBytes
-    @property
-    def Height(self):
-        return self._ptr[0].Height
-    @Height.setter
-    def Height(self, size_t Height):
-        self._ptr[0].Height = Height
-    @property
-    def Depth(self):
-        return self._ptr[0].Depth
-    @Depth.setter
-    def Depth(self, size_t Depth):
-        self._ptr[0].Depth = Depth
-{{endif}}
-{{if 'struct CUDA_MEMCPY_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEMCPY_NODE_PARAMS_st:
-    """
-    Memcpy node parameters
-
-    Attributes
-    ----------
-    flags : int
-        Must be zero
-    reserved : int
-        Must be zero
-    copyCtx : CUcontext
-        Context on which to run the node
-    copyParams : CUDA_MEMCPY3D
-        Parameters for the memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEMCPY_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._copyCtx = CUcontext(_ptr=<void_ptr>&self._ptr[0].copyCtx)
-        self._copyParams = CUDA_MEMCPY3D(_ptr=<void_ptr>&self._ptr[0].copyParams)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            try:
-                str_list += ['copyCtx : ' + str(self.copyCtx)]
-            except ValueError:
-                str_list += ['copyCtx : <ValueError>']
-            try:
-                str_list += ['copyParams :\n' + '\n'.join(['    ' + line for line in str(self.copyParams).splitlines()])]
-            except ValueError:
-                str_list += ['copyParams : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, int reserved):
-        self._ptr[0].reserved = reserved
-    @property
-    def copyCtx(self):
-        return self._copyCtx
-    @copyCtx.setter
-    def copyCtx(self, copyCtx):
-        cdef cydriver.CUcontext cycopyCtx
-        if copyCtx is None:
-            cycopyCtx = <cydriver.CUcontext><void_ptr>0
-        elif isinstance(copyCtx, (CUcontext,)):
-            pcopyCtx = int(copyCtx)
-            cycopyCtx = <cydriver.CUcontext><void_ptr>pcopyCtx
-        else:
-            pcopyCtx = int(CUcontext(copyCtx))
-            cycopyCtx = <cydriver.CUcontext><void_ptr>pcopyCtx
-        self._copyCtx._ptr[0] = cycopyCtx
-    @property
-    def copyParams(self):
-        return self._copyParams
-    @copyParams.setter
-    def copyParams(self, copyParams not None : CUDA_MEMCPY3D):
-        string.memcpy(&self._ptr[0].copyParams, <cydriver.CUDA_MEMCPY3D*><void_ptr>copyParams.getPtr(), sizeof(self._ptr[0].copyParams))
-{{endif}}
-{{if 'struct CUDA_ARRAY_DESCRIPTOR_st' in found_types}}
-
-cdef class CUDA_ARRAY_DESCRIPTOR_st:
-    """
-    Array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of array
-    Height : size_t
-        Height of array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_ARRAY_DESCRIPTOR_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['Width : ' + str(self.Width)]
-            except ValueError:
-                str_list += ['Width : <ValueError>']
-            try:
-                str_list += ['Height : ' + str(self.Height)]
-            except ValueError:
-                str_list += ['Height : <ValueError>']
-            try:
-                str_list += ['Format : ' + str(self.Format)]
-            except ValueError:
-                str_list += ['Format : <ValueError>']
-            try:
-                str_list += ['NumChannels : ' + str(self.NumChannels)]
-            except ValueError:
-                str_list += ['NumChannels : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def Width(self):
-        return self._ptr[0].Width
-    @Width.setter
-    def Width(self, size_t Width):
-        self._ptr[0].Width = Width
-    @property
-    def Height(self):
-        return self._ptr[0].Height
-    @Height.setter
-    def Height(self, size_t Height):
-        self._ptr[0].Height = Height
-    @property
-    def Format(self):
-        return CUarray_format(self._ptr[0].Format)
-    @Format.setter
-    def Format(self, Format not None : CUarray_format):
-        self._ptr[0].Format = Format.value
-    @property
-    def NumChannels(self):
-        return self._ptr[0].NumChannels
-    @NumChannels.setter
-    def NumChannels(self, unsigned int NumChannels):
-        self._ptr[0].NumChannels = NumChannels
-{{endif}}
-{{if 'struct CUDA_ARRAY3D_DESCRIPTOR_st' in found_types}}
-
-cdef class CUDA_ARRAY3D_DESCRIPTOR_st:
-    """
-    3D array descriptor
-
-    Attributes
-    ----------
-    Width : size_t
-        Width of 3D array
-    Height : size_t
-        Height of 3D array
-    Depth : size_t
-        Depth of 3D array
-    Format : CUarray_format
-        Array format
-    NumChannels : unsigned int
-        Channels per array element
-    Flags : unsigned int
-        Flags
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_ARRAY3D_DESCRIPTOR_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['Width : ' + str(self.Width)]
-            except ValueError:
-                str_list += ['Width : <ValueError>']
-            try:
-                str_list += ['Height : ' + str(self.Height)]
-            except ValueError:
-                str_list += ['Height : <ValueError>']
-            try:
-                str_list += ['Depth : ' + str(self.Depth)]
-            except ValueError:
-                str_list += ['Depth : <ValueError>']
-            try:
-                str_list += ['Format : ' + str(self.Format)]
-            except ValueError:
-                str_list += ['Format : <ValueError>']
-            try:
-                str_list += ['NumChannels : ' + str(self.NumChannels)]
-            except ValueError:
-                str_list += ['NumChannels : <ValueError>']
-            try:
-                str_list += ['Flags : ' + str(self.Flags)]
-            except ValueError:
-                str_list += ['Flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def Width(self):
-        return self._ptr[0].Width
-    @Width.setter
-    def Width(self, size_t Width):
-        self._ptr[0].Width = Width
-    @property
-    def Height(self):
-        return self._ptr[0].Height
-    @Height.setter
-    def Height(self, size_t Height):
-        self._ptr[0].Height = Height
-    @property
-    def Depth(self):
-        return self._ptr[0].Depth
-    @Depth.setter
-    def Depth(self, size_t Depth):
-        self._ptr[0].Depth = Depth
-    @property
-    def Format(self):
-        return CUarray_format(self._ptr[0].Format)
-    @Format.setter
-    def Format(self, Format not None : CUarray_format):
-        self._ptr[0].Format = Format.value
-    @property
-    def NumChannels(self):
-        return self._ptr[0].NumChannels
-    @NumChannels.setter
-    def NumChannels(self, unsigned int NumChannels):
-        self._ptr[0].NumChannels = NumChannels
-    @property
-    def Flags(self):
-        return self._ptr[0].Flags
-    @Flags.setter
-    def Flags(self, unsigned int Flags):
-        self._ptr[0].Flags = Flags
-{{endif}}
-{{if 'struct CUDA_ARRAY_SPARSE_PROPERTIES_st' in found_types}}
-
-cdef class anon_struct5:
-    """
-    Attributes
-    ----------
-    width : unsigned int
-
-    height : unsigned int
-
-    depth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].tileExtent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def width(self):
-        return self._ptr[0].tileExtent.width
-    @width.setter
-    def width(self, unsigned int width):
-        self._ptr[0].tileExtent.width = width
-    @property
-    def height(self):
-        return self._ptr[0].tileExtent.height
-    @height.setter
-    def height(self, unsigned int height):
-        self._ptr[0].tileExtent.height = height
-    @property
-    def depth(self):
-        return self._ptr[0].tileExtent.depth
-    @depth.setter
-    def depth(self, unsigned int depth):
-        self._ptr[0].tileExtent.depth = depth
-{{endif}}
-{{if 'struct CUDA_ARRAY_SPARSE_PROPERTIES_st' in found_types}}
-
-cdef class CUDA_ARRAY_SPARSE_PROPERTIES_st:
-    """
-    CUDA array sparse properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct5
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins.
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or
-        CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._tileExtent = anon_struct5(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['tileExtent :\n' + '\n'.join(['    ' + line for line in str(self.tileExtent).splitlines()])]
-            except ValueError:
-                str_list += ['tileExtent : <ValueError>']
-            try:
-                str_list += ['miptailFirstLevel : ' + str(self.miptailFirstLevel)]
-            except ValueError:
-                str_list += ['miptailFirstLevel : <ValueError>']
-            try:
-                str_list += ['miptailSize : ' + str(self.miptailSize)]
-            except ValueError:
-                str_list += ['miptailSize : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def tileExtent(self):
-        return self._tileExtent
-    @tileExtent.setter
-    def tileExtent(self, tileExtent not None : anon_struct5):
-        string.memcpy(&self._ptr[0].tileExtent, <cydriver.anon_struct5*><void_ptr>tileExtent.getPtr(), sizeof(self._ptr[0].tileExtent))
-    @property
-    def miptailFirstLevel(self):
-        return self._ptr[0].miptailFirstLevel
-    @miptailFirstLevel.setter
-    def miptailFirstLevel(self, unsigned int miptailFirstLevel):
-        self._ptr[0].miptailFirstLevel = miptailFirstLevel
-    @property
-    def miptailSize(self):
-        return self._ptr[0].miptailSize
-    @miptailSize.setter
-    def miptailSize(self, unsigned long long miptailSize):
-        self._ptr[0].miptailSize = miptailSize
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st' in found_types}}
-
-cdef class CUDA_ARRAY_MEMORY_REQUIREMENTS_st:
-    """
-    CUDA array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total required memory size
-    alignment : size_t
-        alignment requirement
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['alignment : ' + str(self.alignment)]
-            except ValueError:
-                str_list += ['alignment : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, size_t size):
-        self._ptr[0].size = size
-    @property
-    def alignment(self):
-        return self._ptr[0].alignment
-    @alignment.setter
-    def alignment(self, size_t alignment):
-        self._ptr[0].alignment = alignment
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct6:
-    """
-    Attributes
-    ----------
-    hArray : CUarray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._hArray = CUarray(_ptr=<void_ptr>&self._ptr[0].res.array.hArray)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.array
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['hArray : ' + str(self.hArray)]
-            except ValueError:
-                str_list += ['hArray : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def hArray(self):
-        return self._hArray
-    @hArray.setter
-    def hArray(self, hArray):
-        cdef cydriver.CUarray cyhArray
-        if hArray is None:
-            cyhArray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(hArray, (CUarray,)):
-            phArray = int(hArray)
-            cyhArray = <cydriver.CUarray><void_ptr>phArray
-        else:
-            phArray = int(CUarray(hArray))
-            cyhArray = <cydriver.CUarray><void_ptr>phArray
-        self._hArray._ptr[0] = cyhArray
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct7:
-    """
-    Attributes
-    ----------
-    hMipmappedArray : CUmipmappedArray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._hMipmappedArray = CUmipmappedArray(_ptr=<void_ptr>&self._ptr[0].res.mipmap.hMipmappedArray)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.mipmap
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['hMipmappedArray : ' + str(self.hMipmappedArray)]
-            except ValueError:
-                str_list += ['hMipmappedArray : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def hMipmappedArray(self):
-        return self._hMipmappedArray
-    @hMipmappedArray.setter
-    def hMipmappedArray(self, hMipmappedArray):
-        cdef cydriver.CUmipmappedArray cyhMipmappedArray
-        if hMipmappedArray is None:
-            cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>0
-        elif isinstance(hMipmappedArray, (CUmipmappedArray,)):
-            phMipmappedArray = int(hMipmappedArray)
-            cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-        else:
-            phMipmappedArray = int(CUmipmappedArray(hMipmappedArray))
-            cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-        self._hMipmappedArray._ptr[0] = cyhMipmappedArray
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct8:
-    """
-    Attributes
-    ----------
-    devPtr : CUdeviceptr
-
-    format : CUarray_format
-
-    numChannels : unsigned int
-
-    sizeInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._devPtr = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].res.linear.devPtr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.linear
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['devPtr : ' + str(self.devPtr)]
-            except ValueError:
-                str_list += ['devPtr : <ValueError>']
-            try:
-                str_list += ['format : ' + str(self.format)]
-            except ValueError:
-                str_list += ['format : <ValueError>']
-            try:
-                str_list += ['numChannels : ' + str(self.numChannels)]
-            except ValueError:
-                str_list += ['numChannels : <ValueError>']
-            try:
-                str_list += ['sizeInBytes : ' + str(self.sizeInBytes)]
-            except ValueError:
-                str_list += ['sizeInBytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def devPtr(self):
-        return self._devPtr
-    @devPtr.setter
-    def devPtr(self, devPtr):
-        cdef cydriver.CUdeviceptr cydevPtr
-        if devPtr is None:
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(devPtr, (CUdeviceptr)):
-            pdevPtr = int(devPtr)
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-        else:
-            pdevPtr = int(CUdeviceptr(devPtr))
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-        self._devPtr._ptr[0] = cydevPtr
-
-    @property
-    def format(self):
-        return CUarray_format(self._ptr[0].res.linear.format)
-    @format.setter
-    def format(self, format not None : CUarray_format):
-        self._ptr[0].res.linear.format = format.value
-    @property
-    def numChannels(self):
-        return self._ptr[0].res.linear.numChannels
-    @numChannels.setter
-    def numChannels(self, unsigned int numChannels):
-        self._ptr[0].res.linear.numChannels = numChannels
-    @property
-    def sizeInBytes(self):
-        return self._ptr[0].res.linear.sizeInBytes
-    @sizeInBytes.setter
-    def sizeInBytes(self, size_t sizeInBytes):
-        self._ptr[0].res.linear.sizeInBytes = sizeInBytes
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct9:
-    """
-    Attributes
-    ----------
-    devPtr : CUdeviceptr
-
-    format : CUarray_format
-
-    numChannels : unsigned int
-
-    width : size_t
-
-    height : size_t
-
-    pitchInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._devPtr = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].res.pitch2D.devPtr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.pitch2D
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['devPtr : ' + str(self.devPtr)]
-            except ValueError:
-                str_list += ['devPtr : <ValueError>']
-            try:
-                str_list += ['format : ' + str(self.format)]
-            except ValueError:
-                str_list += ['format : <ValueError>']
-            try:
-                str_list += ['numChannels : ' + str(self.numChannels)]
-            except ValueError:
-                str_list += ['numChannels : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['pitchInBytes : ' + str(self.pitchInBytes)]
-            except ValueError:
-                str_list += ['pitchInBytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def devPtr(self):
-        return self._devPtr
-    @devPtr.setter
-    def devPtr(self, devPtr):
-        cdef cydriver.CUdeviceptr cydevPtr
-        if devPtr is None:
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(devPtr, (CUdeviceptr)):
-            pdevPtr = int(devPtr)
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-        else:
-            pdevPtr = int(CUdeviceptr(devPtr))
-            cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-        self._devPtr._ptr[0] = cydevPtr
-
-    @property
-    def format(self):
-        return CUarray_format(self._ptr[0].res.pitch2D.format)
-    @format.setter
-    def format(self, format not None : CUarray_format):
-        self._ptr[0].res.pitch2D.format = format.value
-    @property
-    def numChannels(self):
-        return self._ptr[0].res.pitch2D.numChannels
-    @numChannels.setter
-    def numChannels(self, unsigned int numChannels):
-        self._ptr[0].res.pitch2D.numChannels = numChannels
-    @property
-    def width(self):
-        return self._ptr[0].res.pitch2D.width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].res.pitch2D.width = width
-    @property
-    def height(self):
-        return self._ptr[0].res.pitch2D.height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].res.pitch2D.height = height
-    @property
-    def pitchInBytes(self):
-        return self._ptr[0].res.pitch2D.pitchInBytes
-    @pitchInBytes.setter
-    def pitchInBytes(self, size_t pitchInBytes):
-        self._ptr[0].res.pitch2D.pitchInBytes = pitchInBytes
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_struct10:
-    """
-    Attributes
-    ----------
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.reserved
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return self._ptr[0].res.reserved.reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].res.reserved.reserved = reserved
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class anon_union4:
-    """
-    Attributes
-    ----------
-    array : anon_struct6
-
-    mipmap : anon_struct7
-
-    linear : anon_struct8
-
-    pitch2D : anon_struct9
-
-    reserved : anon_struct10
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._array = anon_struct6(_ptr=<void_ptr>self._ptr)
-        self._mipmap = anon_struct7(_ptr=<void_ptr>self._ptr)
-        self._linear = anon_struct8(_ptr=<void_ptr>self._ptr)
-        self._pitch2D = anon_struct9(_ptr=<void_ptr>self._ptr)
-        self._reserved = anon_struct10(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['array :\n' + '\n'.join(['    ' + line for line in str(self.array).splitlines()])]
-            except ValueError:
-                str_list += ['array : <ValueError>']
-            try:
-                str_list += ['mipmap :\n' + '\n'.join(['    ' + line for line in str(self.mipmap).splitlines()])]
-            except ValueError:
-                str_list += ['mipmap : <ValueError>']
-            try:
-                str_list += ['linear :\n' + '\n'.join(['    ' + line for line in str(self.linear).splitlines()])]
-            except ValueError:
-                str_list += ['linear : <ValueError>']
-            try:
-                str_list += ['pitch2D :\n' + '\n'.join(['    ' + line for line in str(self.pitch2D).splitlines()])]
-            except ValueError:
-                str_list += ['pitch2D : <ValueError>']
-            try:
-                str_list += ['reserved :\n' + '\n'.join(['    ' + line for line in str(self.reserved).splitlines()])]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def array(self):
-        return self._array
-    @array.setter
-    def array(self, array not None : anon_struct6):
-        string.memcpy(&self._ptr[0].res.array, <cydriver.anon_struct6*><void_ptr>array.getPtr(), sizeof(self._ptr[0].res.array))
-    @property
-    def mipmap(self):
-        return self._mipmap
-    @mipmap.setter
-    def mipmap(self, mipmap not None : anon_struct7):
-        string.memcpy(&self._ptr[0].res.mipmap, <cydriver.anon_struct7*><void_ptr>mipmap.getPtr(), sizeof(self._ptr[0].res.mipmap))
-    @property
-    def linear(self):
-        return self._linear
-    @linear.setter
-    def linear(self, linear not None : anon_struct8):
-        string.memcpy(&self._ptr[0].res.linear, <cydriver.anon_struct8*><void_ptr>linear.getPtr(), sizeof(self._ptr[0].res.linear))
-    @property
-    def pitch2D(self):
-        return self._pitch2D
-    @pitch2D.setter
-    def pitch2D(self, pitch2D not None : anon_struct9):
-        string.memcpy(&self._ptr[0].res.pitch2D, <cydriver.anon_struct9*><void_ptr>pitch2D.getPtr(), sizeof(self._ptr[0].res.pitch2D))
-    @property
-    def reserved(self):
-        return self._reserved
-    @reserved.setter
-    def reserved(self, reserved not None : anon_struct10):
-        string.memcpy(&self._ptr[0].res.reserved, <cydriver.anon_struct10*><void_ptr>reserved.getPtr(), sizeof(self._ptr[0].res.reserved))
-{{endif}}
-{{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-
-cdef class CUDA_RESOURCE_DESC_st:
-    """
-    CUDA Resource descriptor
-
-    Attributes
-    ----------
-    resType : CUresourcetype
-        Resource type
-    res : anon_union4
-
-    flags : unsigned int
-        Flags (must be zero)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUDA_RESOURCE_DESC_st *>calloc(1, sizeof(cydriver.CUDA_RESOURCE_DESC_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUDA_RESOURCE_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._res = anon_union4(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['resType : ' + str(self.resType)]
-            except ValueError:
-                str_list += ['resType : <ValueError>']
-            try:
-                str_list += ['res :\n' + '\n'.join(['    ' + line for line in str(self.res).splitlines()])]
-            except ValueError:
-                str_list += ['res : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def resType(self):
-        return CUresourcetype(self._ptr[0].resType)
-    @resType.setter
-    def resType(self, resType not None : CUresourcetype):
-        self._ptr[0].resType = resType.value
-    @property
-    def res(self):
-        return self._res
-    @res.setter
-    def res(self, res not None : anon_union4):
-        string.memcpy(&self._ptr[0].res, <cydriver.anon_union4*><void_ptr>res.getPtr(), sizeof(self._ptr[0].res))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct CUDA_TEXTURE_DESC_st' in found_types}}
-
-cdef class CUDA_TEXTURE_DESC_st:
-    """
-    Texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[CUaddress_mode]
-        Address modes
-    filterMode : CUfilter_mode
-        Filter mode
-    flags : unsigned int
-        Flags
-    maxAnisotropy : unsigned int
-        Maximum anisotropy ratio
-    mipmapFilterMode : CUfilter_mode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Mipmap level bias
-    minMipmapLevelClamp : float
-        Mipmap minimum level clamp
-    maxMipmapLevelClamp : float
-        Mipmap maximum level clamp
-    borderColor : List[float]
-        Border Color
-    reserved : List[int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_TEXTURE_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['addressMode : ' + str(self.addressMode)]
-            except ValueError:
-                str_list += ['addressMode : <ValueError>']
-            try:
-                str_list += ['filterMode : ' + str(self.filterMode)]
-            except ValueError:
-                str_list += ['filterMode : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['maxAnisotropy : ' + str(self.maxAnisotropy)]
-            except ValueError:
-                str_list += ['maxAnisotropy : <ValueError>']
-            try:
-                str_list += ['mipmapFilterMode : ' + str(self.mipmapFilterMode)]
-            except ValueError:
-                str_list += ['mipmapFilterMode : <ValueError>']
-            try:
-                str_list += ['mipmapLevelBias : ' + str(self.mipmapLevelBias)]
-            except ValueError:
-                str_list += ['mipmapLevelBias : <ValueError>']
-            try:
-                str_list += ['minMipmapLevelClamp : ' + str(self.minMipmapLevelClamp)]
-            except ValueError:
-                str_list += ['minMipmapLevelClamp : <ValueError>']
-            try:
-                str_list += ['maxMipmapLevelClamp : ' + str(self.maxMipmapLevelClamp)]
-            except ValueError:
-                str_list += ['maxMipmapLevelClamp : <ValueError>']
-            try:
-                str_list += ['borderColor : ' + str(self.borderColor)]
-            except ValueError:
-                str_list += ['borderColor : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def addressMode(self):
-        return [CUaddress_mode(_x) for _x in list(self._ptr[0].addressMode)]
-    @addressMode.setter
-    def addressMode(self, addressMode):
-        self._ptr[0].addressMode = [_x.value for _x in addressMode]
-    @property
-    def filterMode(self):
-        return CUfilter_mode(self._ptr[0].filterMode)
-    @filterMode.setter
-    def filterMode(self, filterMode not None : CUfilter_mode):
-        self._ptr[0].filterMode = filterMode.value
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def maxAnisotropy(self):
-        return self._ptr[0].maxAnisotropy
-    @maxAnisotropy.setter
-    def maxAnisotropy(self, unsigned int maxAnisotropy):
-        self._ptr[0].maxAnisotropy = maxAnisotropy
-    @property
-    def mipmapFilterMode(self):
-        return CUfilter_mode(self._ptr[0].mipmapFilterMode)
-    @mipmapFilterMode.setter
-    def mipmapFilterMode(self, mipmapFilterMode not None : CUfilter_mode):
-        self._ptr[0].mipmapFilterMode = mipmapFilterMode.value
-    @property
-    def mipmapLevelBias(self):
-        return self._ptr[0].mipmapLevelBias
-    @mipmapLevelBias.setter
-    def mipmapLevelBias(self, float mipmapLevelBias):
-        self._ptr[0].mipmapLevelBias = mipmapLevelBias
-    @property
-    def minMipmapLevelClamp(self):
-        return self._ptr[0].minMipmapLevelClamp
-    @minMipmapLevelClamp.setter
-    def minMipmapLevelClamp(self, float minMipmapLevelClamp):
-        self._ptr[0].minMipmapLevelClamp = minMipmapLevelClamp
-    @property
-    def maxMipmapLevelClamp(self):
-        return self._ptr[0].maxMipmapLevelClamp
-    @maxMipmapLevelClamp.setter
-    def maxMipmapLevelClamp(self, float maxMipmapLevelClamp):
-        self._ptr[0].maxMipmapLevelClamp = maxMipmapLevelClamp
-    @property
-    def borderColor(self):
-        return self._ptr[0].borderColor
-    @borderColor.setter
-    def borderColor(self, borderColor):
-        self._ptr[0].borderColor = borderColor
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_RESOURCE_VIEW_DESC_st' in found_types}}
-
-cdef class CUDA_RESOURCE_VIEW_DESC_st:
-    """
-    Resource view descriptor
-
-    Attributes
-    ----------
-    format : CUresourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_RESOURCE_VIEW_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['format : ' + str(self.format)]
-            except ValueError:
-                str_list += ['format : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            try:
-                str_list += ['firstMipmapLevel : ' + str(self.firstMipmapLevel)]
-            except ValueError:
-                str_list += ['firstMipmapLevel : <ValueError>']
-            try:
-                str_list += ['lastMipmapLevel : ' + str(self.lastMipmapLevel)]
-            except ValueError:
-                str_list += ['lastMipmapLevel : <ValueError>']
-            try:
-                str_list += ['firstLayer : ' + str(self.firstLayer)]
-            except ValueError:
-                str_list += ['firstLayer : <ValueError>']
-            try:
-                str_list += ['lastLayer : ' + str(self.lastLayer)]
-            except ValueError:
-                str_list += ['lastLayer : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def format(self):
-        return CUresourceViewFormat(self._ptr[0].format)
-    @format.setter
-    def format(self, format not None : CUresourceViewFormat):
-        self._ptr[0].format = format.value
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-    @property
-    def depth(self):
-        return self._ptr[0].depth
-    @depth.setter
-    def depth(self, size_t depth):
-        self._ptr[0].depth = depth
-    @property
-    def firstMipmapLevel(self):
-        return self._ptr[0].firstMipmapLevel
-    @firstMipmapLevel.setter
-    def firstMipmapLevel(self, unsigned int firstMipmapLevel):
-        self._ptr[0].firstMipmapLevel = firstMipmapLevel
-    @property
-    def lastMipmapLevel(self):
-        return self._ptr[0].lastMipmapLevel
-    @lastMipmapLevel.setter
-    def lastMipmapLevel(self, unsigned int lastMipmapLevel):
-        self._ptr[0].lastMipmapLevel = lastMipmapLevel
-    @property
-    def firstLayer(self):
-        return self._ptr[0].firstLayer
-    @firstLayer.setter
-    def firstLayer(self, unsigned int firstLayer):
-        self._ptr[0].firstLayer = firstLayer
-    @property
-    def lastLayer(self):
-        return self._ptr[0].lastLayer
-    @lastLayer.setter
-    def lastLayer(self, unsigned int lastLayer):
-        self._ptr[0].lastLayer = lastLayer
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUtensorMap_st' in found_types}}
-
-cdef class CUtensorMap_st:
-    """
-    Tensor map descriptor. Requires compiler support for aligning to 64
-    bytes.
-
-    Attributes
-    ----------
-    opaque : List[cuuint64_t]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUtensorMap_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['opaque : ' + str(self.opaque)]
-            except ValueError:
-                str_list += ['opaque : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def opaque(self):
-        return [cuuint64_t(init_value=_opaque) for _opaque in self._ptr[0].opaque]
-    @opaque.setter
-    def opaque(self, opaque):
-        self._ptr[0].opaque = opaque
-
-{{endif}}
-{{if 'struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st' in found_types}}
-
-cdef class CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st:
-    """
-    GPU Direct v3 tokens
-
-    Attributes
-    ----------
-    p2pToken : unsigned long long
-
-    vaSpaceToken : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['p2pToken : ' + str(self.p2pToken)]
-            except ValueError:
-                str_list += ['p2pToken : <ValueError>']
-            try:
-                str_list += ['vaSpaceToken : ' + str(self.vaSpaceToken)]
-            except ValueError:
-                str_list += ['vaSpaceToken : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def p2pToken(self):
-        return self._ptr[0].p2pToken
-    @p2pToken.setter
-    def p2pToken(self, unsigned long long p2pToken):
-        self._ptr[0].p2pToken = p2pToken
-    @property
-    def vaSpaceToken(self):
-        return self._ptr[0].vaSpaceToken
-    @vaSpaceToken.setter
-    def vaSpaceToken(self, unsigned int vaSpaceToken):
-        self._ptr[0].vaSpaceToken = vaSpaceToken
-{{endif}}
-{{if 'struct CUDA_LAUNCH_PARAMS_st' in found_types}}
-
-cdef class CUDA_LAUNCH_PARAMS_st:
-    """
-    Kernel launch parameters
-
-    Attributes
-    ----------
-    function : CUfunction
-        Kernel to launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : CUstream
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_LAUNCH_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._function = CUfunction(_ptr=<void_ptr>&self._ptr[0].function)
-        self._hStream = CUstream(_ptr=<void_ptr>&self._ptr[0].hStream)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['function : ' + str(self.function)]
-            except ValueError:
-                str_list += ['function : <ValueError>']
-            try:
-                str_list += ['gridDimX : ' + str(self.gridDimX)]
-            except ValueError:
-                str_list += ['gridDimX : <ValueError>']
-            try:
-                str_list += ['gridDimY : ' + str(self.gridDimY)]
-            except ValueError:
-                str_list += ['gridDimY : <ValueError>']
-            try:
-                str_list += ['gridDimZ : ' + str(self.gridDimZ)]
-            except ValueError:
-                str_list += ['gridDimZ : <ValueError>']
-            try:
-                str_list += ['blockDimX : ' + str(self.blockDimX)]
-            except ValueError:
-                str_list += ['blockDimX : <ValueError>']
-            try:
-                str_list += ['blockDimY : ' + str(self.blockDimY)]
-            except ValueError:
-                str_list += ['blockDimY : <ValueError>']
-            try:
-                str_list += ['blockDimZ : ' + str(self.blockDimZ)]
-            except ValueError:
-                str_list += ['blockDimZ : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['hStream : ' + str(self.hStream)]
-            except ValueError:
-                str_list += ['hStream : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def function(self):
-        return self._function
-    @function.setter
-    def function(self, function):
-        cdef cydriver.CUfunction cyfunction
-        if function is None:
-            cyfunction = <cydriver.CUfunction><void_ptr>0
-        elif isinstance(function, (CUfunction,)):
-            pfunction = int(function)
-            cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-        else:
-            pfunction = int(CUfunction(function))
-            cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-        self._function._ptr[0] = cyfunction
-    @property
-    def gridDimX(self):
-        return self._ptr[0].gridDimX
-    @gridDimX.setter
-    def gridDimX(self, unsigned int gridDimX):
-        self._ptr[0].gridDimX = gridDimX
-    @property
-    def gridDimY(self):
-        return self._ptr[0].gridDimY
-    @gridDimY.setter
-    def gridDimY(self, unsigned int gridDimY):
-        self._ptr[0].gridDimY = gridDimY
-    @property
-    def gridDimZ(self):
-        return self._ptr[0].gridDimZ
-    @gridDimZ.setter
-    def gridDimZ(self, unsigned int gridDimZ):
-        self._ptr[0].gridDimZ = gridDimZ
-    @property
-    def blockDimX(self):
-        return self._ptr[0].blockDimX
-    @blockDimX.setter
-    def blockDimX(self, unsigned int blockDimX):
-        self._ptr[0].blockDimX = blockDimX
-    @property
-    def blockDimY(self):
-        return self._ptr[0].blockDimY
-    @blockDimY.setter
-    def blockDimY(self, unsigned int blockDimY):
-        self._ptr[0].blockDimY = blockDimY
-    @property
-    def blockDimZ(self):
-        return self._ptr[0].blockDimZ
-    @blockDimZ.setter
-    def blockDimZ(self, unsigned int blockDimZ):
-        self._ptr[0].blockDimZ = blockDimZ
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def hStream(self):
-        return self._hStream
-    @hStream.setter
-    def hStream(self, hStream):
-        cdef cydriver.CUstream cyhStream
-        if hStream is None:
-            cyhStream = <cydriver.CUstream><void_ptr>0
-        elif isinstance(hStream, (CUstream,)):
-            phStream = int(hStream)
-            cyhStream = <cydriver.CUstream><void_ptr>phStream
-        else:
-            phStream = int(CUstream(hStream))
-            cyhStream = <cydriver.CUstream><void_ptr>phStream
-        self._hStream._ptr[0] = cyhStream
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_struct11:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle.win32
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + hex(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['name : ' + hex(self.name)]
-            except ValueError:
-                str_list += ['name : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return <void_ptr>self._ptr[0].handle.win32.handle
-    @handle.setter
-    def handle(self, handle):
-        _cyhandle = utils.HelperInputVoidPtr(handle)
-        self._ptr[0].handle.win32.handle = <void*><void_ptr>_cyhandle.cptr
-    @property
-    def name(self):
-        return <void_ptr>self._ptr[0].handle.win32.name
-    @name.setter
-    def name(self, name):
-        _cyname = utils.HelperInputVoidPtr(name)
-        self._ptr[0].handle.win32.name = <void*><void_ptr>_cyname.cptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_union5:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct11
-
-    nvSciBufObject : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._win32 = anon_struct11(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fd : ' + str(self.fd)]
-            except ValueError:
-                str_list += ['fd : <ValueError>']
-            try:
-                str_list += ['win32 :\n' + '\n'.join(['    ' + line for line in str(self.win32).splitlines()])]
-            except ValueError:
-                str_list += ['win32 : <ValueError>']
-            try:
-                str_list += ['nvSciBufObject : ' + hex(self.nvSciBufObject)]
-            except ValueError:
-                str_list += ['nvSciBufObject : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fd(self):
-        return self._ptr[0].handle.fd
-    @fd.setter
-    def fd(self, int fd):
-        self._ptr[0].handle.fd = fd
-    @property
-    def win32(self):
-        return self._win32
-    @win32.setter
-    def win32(self, win32 not None : anon_struct11):
-        string.memcpy(&self._ptr[0].handle.win32, <cydriver.anon_struct11*><void_ptr>win32.getPtr(), sizeof(self._ptr[0].handle.win32))
-    @property
-    def nvSciBufObject(self):
-        return <void_ptr>self._ptr[0].handle.nvSciBufObject
-    @nvSciBufObject.setter
-    def nvSciBufObject(self, nvSciBufObject):
-        _cynvSciBufObject = utils.HelperInputVoidPtr(nvSciBufObject)
-        self._ptr[0].handle.nvSciBufObject = <void*><void_ptr>_cynvSciBufObject.cptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st:
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalMemoryHandleType
-        Type of the handle
-    handle : anon_union5
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or CUDA_EXTERNAL_MEMORY_DEDICATED
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st *>calloc(1, sizeof(cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = anon_union5(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['handle :\n' + '\n'.join(['    ' + line for line in str(self.handle).splitlines()])]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUexternalMemoryHandleType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUexternalMemoryHandleType):
-        self._ptr[0].type = type.value
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle not None : anon_union5):
-        string.memcpy(&self._ptr[0].handle, <cydriver.anon_union5*><void_ptr>handle.getPtr(), sizeof(self._ptr[0].handle))
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned long long size):
-        self._ptr[0].size = size
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st:
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def offset(self):
-        return self._ptr[0].offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].offset = offset
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned long long size):
-        self._ptr[0].size = size
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st:
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    arrayDesc : CUDA_ARRAY3D_DESCRIPTOR
-        Format, dimension and type of base level of the mipmap chain
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._arrayDesc = CUDA_ARRAY3D_DESCRIPTOR(_ptr=<void_ptr>&self._ptr[0].arrayDesc)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['arrayDesc :\n' + '\n'.join(['    ' + line for line in str(self.arrayDesc).splitlines()])]
-            except ValueError:
-                str_list += ['arrayDesc : <ValueError>']
-            try:
-                str_list += ['numLevels : ' + str(self.numLevels)]
-            except ValueError:
-                str_list += ['numLevels : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def offset(self):
-        return self._ptr[0].offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].offset = offset
-    @property
-    def arrayDesc(self):
-        return self._arrayDesc
-    @arrayDesc.setter
-    def arrayDesc(self, arrayDesc not None : CUDA_ARRAY3D_DESCRIPTOR):
-        string.memcpy(&self._ptr[0].arrayDesc, <cydriver.CUDA_ARRAY3D_DESCRIPTOR*><void_ptr>arrayDesc.getPtr(), sizeof(self._ptr[0].arrayDesc))
-    @property
-    def numLevels(self):
-        return self._ptr[0].numLevels
-    @numLevels.setter
-    def numLevels(self, unsigned int numLevels):
-        self._ptr[0].numLevels = numLevels
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_struct12:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle.win32
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + hex(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['name : ' + hex(self.name)]
-            except ValueError:
-                str_list += ['name : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return <void_ptr>self._ptr[0].handle.win32.handle
-    @handle.setter
-    def handle(self, handle):
-        _cyhandle = utils.HelperInputVoidPtr(handle)
-        self._ptr[0].handle.win32.handle = <void*><void_ptr>_cyhandle.cptr
-    @property
-    def name(self):
-        return <void_ptr>self._ptr[0].handle.win32.name
-    @name.setter
-    def name(self, name):
-        _cyname = utils.HelperInputVoidPtr(name)
-        self._ptr[0].handle.win32.name = <void*><void_ptr>_cyname.cptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class anon_union6:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct12
-
-    nvSciSyncObj : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._win32 = anon_struct12(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fd : ' + str(self.fd)]
-            except ValueError:
-                str_list += ['fd : <ValueError>']
-            try:
-                str_list += ['win32 :\n' + '\n'.join(['    ' + line for line in str(self.win32).splitlines()])]
-            except ValueError:
-                str_list += ['win32 : <ValueError>']
-            try:
-                str_list += ['nvSciSyncObj : ' + hex(self.nvSciSyncObj)]
-            except ValueError:
-                str_list += ['nvSciSyncObj : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fd(self):
-        return self._ptr[0].handle.fd
-    @fd.setter
-    def fd(self, int fd):
-        self._ptr[0].handle.fd = fd
-    @property
-    def win32(self):
-        return self._win32
-    @win32.setter
-    def win32(self, win32 not None : anon_struct12):
-        string.memcpy(&self._ptr[0].handle.win32, <cydriver.anon_struct12*><void_ptr>win32.getPtr(), sizeof(self._ptr[0].handle.win32))
-    @property
-    def nvSciSyncObj(self):
-        return <void_ptr>self._ptr[0].handle.nvSciSyncObj
-    @nvSciSyncObj.setter
-    def nvSciSyncObj(self, nvSciSyncObj):
-        _cynvSciSyncObj = utils.HelperInputVoidPtr(nvSciSyncObj)
-        self._ptr[0].handle.nvSciSyncObj = <void*><void_ptr>_cynvSciSyncObj.cptr
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st:
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : CUexternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union6
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st *>calloc(1, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = anon_union6(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['handle :\n' + '\n'.join(['    ' + line for line in str(self.handle).splitlines()])]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUexternalSemaphoreHandleType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUexternalSemaphoreHandleType):
-        self._ptr[0].type = type.value
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle not None : anon_union6):
-        string.memcpy(&self._ptr[0].handle, <cydriver.anon_union6*><void_ptr>handle.getPtr(), sizeof(self._ptr[0].handle))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct13:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.fence
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def value(self):
-        return self._ptr[0].params.fence.value
-    @value.setter
-    def value(self, unsigned long long value):
-        self._ptr[0].params.fence.value = value
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_union7:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.nvSciSync
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence : ' + hex(self.fence)]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return <void_ptr>self._ptr[0].params.nvSciSync.fence
-    @fence.setter
-    def fence(self, fence):
-        _cyfence = utils.HelperInputVoidPtr(fence)
-        self._ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cyfence.cptr
-    @property
-    def reserved(self):
-        return self._ptr[0].params.nvSciSync.reserved
-    @reserved.setter
-    def reserved(self, unsigned long long reserved):
-        self._ptr[0].params.nvSciSync.reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct14:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.keyedMutex
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['key : ' + str(self.key)]
-            except ValueError:
-                str_list += ['key : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def key(self):
-        return self._ptr[0].params.keyedMutex.key
-    @key.setter
-    def key(self, unsigned long long key):
-        self._ptr[0].params.keyedMutex.key = key
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class anon_struct15:
-    """
-    Attributes
-    ----------
-    fence : anon_struct13
-
-    nvSciSync : anon_union7
-
-    keyedMutex : anon_struct14
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._fence = anon_struct13(_ptr=<void_ptr>self._ptr)
-        self._nvSciSync = anon_union7(_ptr=<void_ptr>self._ptr)
-        self._keyedMutex = anon_struct14(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence :\n' + '\n'.join(['    ' + line for line in str(self.fence).splitlines()])]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['nvSciSync :\n' + '\n'.join(['    ' + line for line in str(self.nvSciSync).splitlines()])]
-            except ValueError:
-                str_list += ['nvSciSync : <ValueError>']
-            try:
-                str_list += ['keyedMutex :\n' + '\n'.join(['    ' + line for line in str(self.keyedMutex).splitlines()])]
-            except ValueError:
-                str_list += ['keyedMutex : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return self._fence
-    @fence.setter
-    def fence(self, fence not None : anon_struct13):
-        string.memcpy(&self._ptr[0].params.fence, <cydriver.anon_struct13*><void_ptr>fence.getPtr(), sizeof(self._ptr[0].params.fence))
-    @property
-    def nvSciSync(self):
-        return self._nvSciSync
-    @nvSciSync.setter
-    def nvSciSync(self, nvSciSync not None : anon_union7):
-        string.memcpy(&self._ptr[0].params.nvSciSync, <cydriver.anon_union7*><void_ptr>nvSciSync.getPtr(), sizeof(self._ptr[0].params.nvSciSync))
-    @property
-    def keyedMutex(self):
-        return self._keyedMutex
-    @keyedMutex.setter
-    def keyedMutex(self, keyedMutex not None : anon_struct14):
-        string.memcpy(&self._ptr[0].params.keyedMutex, <cydriver.anon_struct14*><void_ptr>keyedMutex.getPtr(), sizeof(self._ptr[0].params.keyedMutex))
-    @property
-    def reserved(self):
-        return self._ptr[0].params.reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].params.reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st:
-    """
-    External semaphore signal parameters
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS is used to signal
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC which
-        indicates that while signaling the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._params = anon_struct15(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['params :\n' + '\n'.join(['    ' + line for line in str(self.params).splitlines()])]
-            except ValueError:
-                str_list += ['params : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def params(self):
-        return self._params
-    @params.setter
-    def params(self, params not None : anon_struct15):
-        string.memcpy(&self._ptr[0].params, <cydriver.anon_struct15*><void_ptr>params.getPtr(), sizeof(self._ptr[0].params))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct16:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.fence
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def value(self):
-        return self._ptr[0].params.fence.value
-    @value.setter
-    def value(self, unsigned long long value):
-        self._ptr[0].params.fence.value = value
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_union8:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.nvSciSync
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence : ' + hex(self.fence)]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return <void_ptr>self._ptr[0].params.nvSciSync.fence
-    @fence.setter
-    def fence(self, fence):
-        _cyfence = utils.HelperInputVoidPtr(fence)
-        self._ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cyfence.cptr
-    @property
-    def reserved(self):
-        return self._ptr[0].params.nvSciSync.reserved
-    @reserved.setter
-    def reserved(self, unsigned long long reserved):
-        self._ptr[0].params.nvSciSync.reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct17:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-    timeoutMs : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.keyedMutex
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['key : ' + str(self.key)]
-            except ValueError:
-                str_list += ['key : <ValueError>']
-            try:
-                str_list += ['timeoutMs : ' + str(self.timeoutMs)]
-            except ValueError:
-                str_list += ['timeoutMs : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def key(self):
-        return self._ptr[0].params.keyedMutex.key
-    @key.setter
-    def key(self, unsigned long long key):
-        self._ptr[0].params.keyedMutex.key = key
-    @property
-    def timeoutMs(self):
-        return self._ptr[0].params.keyedMutex.timeoutMs
-    @timeoutMs.setter
-    def timeoutMs(self, unsigned int timeoutMs):
-        self._ptr[0].params.keyedMutex.timeoutMs = timeoutMs
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class anon_struct18:
-    """
-    Attributes
-    ----------
-    fence : anon_struct16
-
-    nvSciSync : anon_union8
-
-    keyedMutex : anon_struct17
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._fence = anon_struct16(_ptr=<void_ptr>self._ptr)
-        self._nvSciSync = anon_union8(_ptr=<void_ptr>self._ptr)
-        self._keyedMutex = anon_struct17(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence :\n' + '\n'.join(['    ' + line for line in str(self.fence).splitlines()])]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['nvSciSync :\n' + '\n'.join(['    ' + line for line in str(self.nvSciSync).splitlines()])]
-            except ValueError:
-                str_list += ['nvSciSync : <ValueError>']
-            try:
-                str_list += ['keyedMutex :\n' + '\n'.join(['    ' + line for line in str(self.keyedMutex).splitlines()])]
-            except ValueError:
-                str_list += ['keyedMutex : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return self._fence
-    @fence.setter
-    def fence(self, fence not None : anon_struct16):
-        string.memcpy(&self._ptr[0].params.fence, <cydriver.anon_struct16*><void_ptr>fence.getPtr(), sizeof(self._ptr[0].params.fence))
-    @property
-    def nvSciSync(self):
-        return self._nvSciSync
-    @nvSciSync.setter
-    def nvSciSync(self, nvSciSync not None : anon_union8):
-        string.memcpy(&self._ptr[0].params.nvSciSync, <cydriver.anon_union8*><void_ptr>nvSciSync.getPtr(), sizeof(self._ptr[0].params.nvSciSync))
-    @property
-    def keyedMutex(self):
-        return self._keyedMutex
-    @keyedMutex.setter
-    def keyedMutex(self, keyedMutex not None : anon_struct17):
-        string.memcpy(&self._ptr[0].params.keyedMutex, <cydriver.anon_struct17*><void_ptr>keyedMutex.getPtr(), sizeof(self._ptr[0].params.keyedMutex))
-    @property
-    def reserved(self):
-        return self._ptr[0].params.reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].params.reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st:
-    """
-    External semaphore wait parameters
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS is used to wait on
-        a CUexternalSemaphore of type
-        CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC, the valid flag is
-        CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC which indicates
-        that while waiting for the CUexternalSemaphore, no memory
-        synchronization operations should be performed for any external
-        memory object imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF.
-        For all other types of CUexternalSemaphore, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._params = anon_struct18(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['params :\n' + '\n'.join(['    ' + line for line in str(self.params).splitlines()])]
-            except ValueError:
-                str_list += ['params : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def params(self):
-        return self._params
-    @params.setter
-    def params(self, params not None : anon_struct18):
-        string.memcpy(&self._ptr[0].params, <cydriver.anon_struct18*><void_ptr>params.getPtr(), sizeof(self._ptr[0].params))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st:
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cydriver.CUexternalSemaphore) for x in range(self._extSemArray_length)]
-        return [CUexternalSemaphore(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cydriver.CUexternalSemaphore*> calloc(len(val), sizeof(cydriver.CUexternalSemaphore))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<CUexternalSemaphore>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS) for x in range(self._paramsArray_length)]
-        return [CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS*> calloc(len(val), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS>val[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st:
-    """
-    Semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cydriver.CUexternalSemaphore) for x in range(self._extSemArray_length)]
-        return [CUexternalSemaphore(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cydriver.CUexternalSemaphore*> calloc(len(val), sizeof(cydriver.CUexternalSemaphore))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<CUexternalSemaphore>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS) for x in range(self._paramsArray_length)]
-        return [CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS*> calloc(len(val), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS>val[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_st:
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cydriver.CUexternalSemaphore) for x in range(self._extSemArray_length)]
-        return [CUexternalSemaphore(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cydriver.CUexternalSemaphore*> calloc(len(val), sizeof(cydriver.CUexternalSemaphore))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<CUexternalSemaphore>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS) for x in range(self._paramsArray_length)]
-        return [CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS*> calloc(len(val), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS>val[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st:
-    """
-    Semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : CUexternalSemaphore
-        Array of external semaphore handles.
-    paramsArray : CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cydriver.CUexternalSemaphore) for x in range(self._extSemArray_length)]
-        return [CUexternalSemaphore(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cydriver.CUexternalSemaphore*> calloc(len(val), sizeof(cydriver.CUexternalSemaphore))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<CUexternalSemaphore>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS) for x in range(self._paramsArray_length)]
-        return [CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS*> calloc(len(val), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS>val[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union9:
-    """
-    Attributes
-    ----------
-    mipmap : CUmipmappedArray
-
-    array : CUarray
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._mipmap = CUmipmappedArray(_ptr=<void_ptr>&self._ptr[0].resource.mipmap)
-        self._array = CUarray(_ptr=<void_ptr>&self._ptr[0].resource.array)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].resource
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['mipmap : ' + str(self.mipmap)]
-            except ValueError:
-                str_list += ['mipmap : <ValueError>']
-            try:
-                str_list += ['array : ' + str(self.array)]
-            except ValueError:
-                str_list += ['array : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def mipmap(self):
-        return self._mipmap
-    @mipmap.setter
-    def mipmap(self, mipmap):
-        cdef cydriver.CUmipmappedArray cymipmap
-        if mipmap is None:
-            cymipmap = <cydriver.CUmipmappedArray><void_ptr>0
-        elif isinstance(mipmap, (CUmipmappedArray,)):
-            pmipmap = int(mipmap)
-            cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-        else:
-            pmipmap = int(CUmipmappedArray(mipmap))
-            cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-        self._mipmap._ptr[0] = cymipmap
-    @property
-    def array(self):
-        return self._array
-    @array.setter
-    def array(self, array):
-        cdef cydriver.CUarray cyarray
-        if array is None:
-            cyarray = <cydriver.CUarray><void_ptr>0
-        elif isinstance(array, (CUarray,)):
-            parray = int(array)
-            cyarray = <cydriver.CUarray><void_ptr>parray
-        else:
-            parray = int(CUarray(array))
-            cyarray = <cydriver.CUarray><void_ptr>parray
-        self._array._ptr[0] = cyarray
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_struct19:
-    """
-    Attributes
-    ----------
-    level : unsigned int
-
-    layer : unsigned int
-
-    offsetX : unsigned int
-
-    offsetY : unsigned int
-
-    offsetZ : unsigned int
-
-    extentWidth : unsigned int
-
-    extentHeight : unsigned int
-
-    extentDepth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].subresource.sparseLevel
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['level : ' + str(self.level)]
-            except ValueError:
-                str_list += ['level : <ValueError>']
-            try:
-                str_list += ['layer : ' + str(self.layer)]
-            except ValueError:
-                str_list += ['layer : <ValueError>']
-            try:
-                str_list += ['offsetX : ' + str(self.offsetX)]
-            except ValueError:
-                str_list += ['offsetX : <ValueError>']
-            try:
-                str_list += ['offsetY : ' + str(self.offsetY)]
-            except ValueError:
-                str_list += ['offsetY : <ValueError>']
-            try:
-                str_list += ['offsetZ : ' + str(self.offsetZ)]
-            except ValueError:
-                str_list += ['offsetZ : <ValueError>']
-            try:
-                str_list += ['extentWidth : ' + str(self.extentWidth)]
-            except ValueError:
-                str_list += ['extentWidth : <ValueError>']
-            try:
-                str_list += ['extentHeight : ' + str(self.extentHeight)]
-            except ValueError:
-                str_list += ['extentHeight : <ValueError>']
-            try:
-                str_list += ['extentDepth : ' + str(self.extentDepth)]
-            except ValueError:
-                str_list += ['extentDepth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def level(self):
-        return self._ptr[0].subresource.sparseLevel.level
-    @level.setter
-    def level(self, unsigned int level):
-        self._ptr[0].subresource.sparseLevel.level = level
-    @property
-    def layer(self):
-        return self._ptr[0].subresource.sparseLevel.layer
-    @layer.setter
-    def layer(self, unsigned int layer):
-        self._ptr[0].subresource.sparseLevel.layer = layer
-    @property
-    def offsetX(self):
-        return self._ptr[0].subresource.sparseLevel.offsetX
-    @offsetX.setter
-    def offsetX(self, unsigned int offsetX):
-        self._ptr[0].subresource.sparseLevel.offsetX = offsetX
-    @property
-    def offsetY(self):
-        return self._ptr[0].subresource.sparseLevel.offsetY
-    @offsetY.setter
-    def offsetY(self, unsigned int offsetY):
-        self._ptr[0].subresource.sparseLevel.offsetY = offsetY
-    @property
-    def offsetZ(self):
-        return self._ptr[0].subresource.sparseLevel.offsetZ
-    @offsetZ.setter
-    def offsetZ(self, unsigned int offsetZ):
-        self._ptr[0].subresource.sparseLevel.offsetZ = offsetZ
-    @property
-    def extentWidth(self):
-        return self._ptr[0].subresource.sparseLevel.extentWidth
-    @extentWidth.setter
-    def extentWidth(self, unsigned int extentWidth):
-        self._ptr[0].subresource.sparseLevel.extentWidth = extentWidth
-    @property
-    def extentHeight(self):
-        return self._ptr[0].subresource.sparseLevel.extentHeight
-    @extentHeight.setter
-    def extentHeight(self, unsigned int extentHeight):
-        self._ptr[0].subresource.sparseLevel.extentHeight = extentHeight
-    @property
-    def extentDepth(self):
-        return self._ptr[0].subresource.sparseLevel.extentDepth
-    @extentDepth.setter
-    def extentDepth(self, unsigned int extentDepth):
-        self._ptr[0].subresource.sparseLevel.extentDepth = extentDepth
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_struct20:
-    """
-    Attributes
-    ----------
-    layer : unsigned int
-
-    offset : unsigned long long
-
-    size : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].subresource.miptail
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['layer : ' + str(self.layer)]
-            except ValueError:
-                str_list += ['layer : <ValueError>']
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def layer(self):
-        return self._ptr[0].subresource.miptail.layer
-    @layer.setter
-    def layer(self, unsigned int layer):
-        self._ptr[0].subresource.miptail.layer = layer
-    @property
-    def offset(self):
-        return self._ptr[0].subresource.miptail.offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].subresource.miptail.offset = offset
-    @property
-    def size(self):
-        return self._ptr[0].subresource.miptail.size
-    @size.setter
-    def size(self, unsigned long long size):
-        self._ptr[0].subresource.miptail.size = size
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union10:
-    """
-    Attributes
-    ----------
-    sparseLevel : anon_struct19
-
-    miptail : anon_struct20
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._sparseLevel = anon_struct19(_ptr=<void_ptr>self._ptr)
-        self._miptail = anon_struct20(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].subresource
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['sparseLevel :\n' + '\n'.join(['    ' + line for line in str(self.sparseLevel).splitlines()])]
-            except ValueError:
-                str_list += ['sparseLevel : <ValueError>']
-            try:
-                str_list += ['miptail :\n' + '\n'.join(['    ' + line for line in str(self.miptail).splitlines()])]
-            except ValueError:
-                str_list += ['miptail : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def sparseLevel(self):
-        return self._sparseLevel
-    @sparseLevel.setter
-    def sparseLevel(self, sparseLevel not None : anon_struct19):
-        string.memcpy(&self._ptr[0].subresource.sparseLevel, <cydriver.anon_struct19*><void_ptr>sparseLevel.getPtr(), sizeof(self._ptr[0].subresource.sparseLevel))
-    @property
-    def miptail(self):
-        return self._miptail
-    @miptail.setter
-    def miptail(self, miptail not None : anon_struct20):
-        string.memcpy(&self._ptr[0].subresource.miptail, <cydriver.anon_struct20*><void_ptr>miptail.getPtr(), sizeof(self._ptr[0].subresource.miptail))
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class anon_union11:
-    """
-    Attributes
-    ----------
-    memHandle : CUmemGenericAllocationHandle
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._memHandle = CUmemGenericAllocationHandle(_ptr=<void_ptr>&self._ptr[0].memHandle.memHandle)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].memHandle
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['memHandle : ' + str(self.memHandle)]
-            except ValueError:
-                str_list += ['memHandle : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def memHandle(self):
-        return self._memHandle
-    @memHandle.setter
-    def memHandle(self, memHandle):
-        cdef cydriver.CUmemGenericAllocationHandle cymemHandle
-        if memHandle is None:
-            cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-        elif isinstance(memHandle, (CUmemGenericAllocationHandle)):
-            pmemHandle = int(memHandle)
-            cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmemHandle
-        else:
-            pmemHandle = int(CUmemGenericAllocationHandle(memHandle))
-            cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmemHandle
-        self._memHandle._ptr[0] = cymemHandle
-
-{{endif}}
-{{if 'struct CUarrayMapInfo_st' in found_types}}
-
-cdef class CUarrayMapInfo_st:
-    """
-    Specifies the CUDA array or CUDA mipmapped array memory mapping
-    information
-
-    Attributes
-    ----------
-    resourceType : CUresourcetype
-        Resource type
-    resource : anon_union9
-
-    subresourceType : CUarraySparseSubresourceType
-        Sparse subresource type
-    subresource : anon_union10
-
-    memOperationType : CUmemOperationType
-        Memory operation type
-    memHandleType : CUmemHandleType
-        Memory handle type
-    memHandle : anon_union11
-
-    offset : unsigned long long
-        Offset within mip tail  Offset within the memory
-    deviceBitMask : unsigned int
-        Device ordinal bit mask
-    flags : unsigned int
-        flags for future use, must be zero now.
-    reserved : List[unsigned int]
-        Reserved for future use, must be zero now.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUarrayMapInfo_st *>calloc(1, sizeof(cydriver.CUarrayMapInfo_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUarrayMapInfo_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._resource = anon_union9(_ptr=<void_ptr>self._ptr)
-        self._subresource = anon_union10(_ptr=<void_ptr>self._ptr)
-        self._memHandle = anon_union11(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['resourceType : ' + str(self.resourceType)]
-            except ValueError:
-                str_list += ['resourceType : <ValueError>']
-            try:
-                str_list += ['resource :\n' + '\n'.join(['    ' + line for line in str(self.resource).splitlines()])]
-            except ValueError:
-                str_list += ['resource : <ValueError>']
-            try:
-                str_list += ['subresourceType : ' + str(self.subresourceType)]
-            except ValueError:
-                str_list += ['subresourceType : <ValueError>']
-            try:
-                str_list += ['subresource :\n' + '\n'.join(['    ' + line for line in str(self.subresource).splitlines()])]
-            except ValueError:
-                str_list += ['subresource : <ValueError>']
-            try:
-                str_list += ['memOperationType : ' + str(self.memOperationType)]
-            except ValueError:
-                str_list += ['memOperationType : <ValueError>']
-            try:
-                str_list += ['memHandleType : ' + str(self.memHandleType)]
-            except ValueError:
-                str_list += ['memHandleType : <ValueError>']
-            try:
-                str_list += ['memHandle :\n' + '\n'.join(['    ' + line for line in str(self.memHandle).splitlines()])]
-            except ValueError:
-                str_list += ['memHandle : <ValueError>']
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['deviceBitMask : ' + str(self.deviceBitMask)]
-            except ValueError:
-                str_list += ['deviceBitMask : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def resourceType(self):
-        return CUresourcetype(self._ptr[0].resourceType)
-    @resourceType.setter
-    def resourceType(self, resourceType not None : CUresourcetype):
-        self._ptr[0].resourceType = resourceType.value
-    @property
-    def resource(self):
-        return self._resource
-    @resource.setter
-    def resource(self, resource not None : anon_union9):
-        string.memcpy(&self._ptr[0].resource, <cydriver.anon_union9*><void_ptr>resource.getPtr(), sizeof(self._ptr[0].resource))
-    @property
-    def subresourceType(self):
-        return CUarraySparseSubresourceType(self._ptr[0].subresourceType)
-    @subresourceType.setter
-    def subresourceType(self, subresourceType not None : CUarraySparseSubresourceType):
-        self._ptr[0].subresourceType = subresourceType.value
-    @property
-    def subresource(self):
-        return self._subresource
-    @subresource.setter
-    def subresource(self, subresource not None : anon_union10):
-        string.memcpy(&self._ptr[0].subresource, <cydriver.anon_union10*><void_ptr>subresource.getPtr(), sizeof(self._ptr[0].subresource))
-    @property
-    def memOperationType(self):
-        return CUmemOperationType(self._ptr[0].memOperationType)
-    @memOperationType.setter
-    def memOperationType(self, memOperationType not None : CUmemOperationType):
-        self._ptr[0].memOperationType = memOperationType.value
-    @property
-    def memHandleType(self):
-        return CUmemHandleType(self._ptr[0].memHandleType)
-    @memHandleType.setter
-    def memHandleType(self, memHandleType not None : CUmemHandleType):
-        self._ptr[0].memHandleType = memHandleType.value
-    @property
-    def memHandle(self):
-        return self._memHandle
-    @memHandle.setter
-    def memHandle(self, memHandle not None : anon_union11):
-        string.memcpy(&self._ptr[0].memHandle, <cydriver.anon_union11*><void_ptr>memHandle.getPtr(), sizeof(self._ptr[0].memHandle))
-    @property
-    def offset(self):
-        return self._ptr[0].offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].offset = offset
-    @property
-    def deviceBitMask(self):
-        return self._ptr[0].deviceBitMask
-    @deviceBitMask.setter
-    def deviceBitMask(self, unsigned int deviceBitMask):
-        self._ptr[0].deviceBitMask = deviceBitMask
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct CUmemLocation_st' in found_types}}
-
-cdef class CUmemLocation_st:
-    """
-    Specifies a memory location.
-
-    Attributes
-    ----------
-    type : CUmemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemLocation_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['id : ' + str(self.id)]
-            except ValueError:
-                str_list += ['id : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUmemLocationType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUmemLocationType):
-        self._ptr[0].type = type.value
-    @property
-    def id(self):
-        return self._ptr[0].id
-    @id.setter
-    def id(self, int id):
-        self._ptr[0].id = id
-{{endif}}
-{{if 'struct CUmemAllocationProp_st' in found_types}}
-
-cdef class anon_struct21:
-    """
-    Attributes
-    ----------
-    compressionType : bytes
-
-    gpuDirectRDMACapable : bytes
-
-    usage : unsigned short
-
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUmemAllocationProp_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].allocFlags
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['compressionType : ' + str(self.compressionType)]
-            except ValueError:
-                str_list += ['compressionType : <ValueError>']
-            try:
-                str_list += ['gpuDirectRDMACapable : ' + str(self.gpuDirectRDMACapable)]
-            except ValueError:
-                str_list += ['gpuDirectRDMACapable : <ValueError>']
-            try:
-                str_list += ['usage : ' + str(self.usage)]
-            except ValueError:
-                str_list += ['usage : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def compressionType(self):
-        return self._ptr[0].allocFlags.compressionType
-    @compressionType.setter
-    def compressionType(self, unsigned char compressionType):
-        self._ptr[0].allocFlags.compressionType = compressionType
-    @property
-    def gpuDirectRDMACapable(self):
-        return self._ptr[0].allocFlags.gpuDirectRDMACapable
-    @gpuDirectRDMACapable.setter
-    def gpuDirectRDMACapable(self, unsigned char gpuDirectRDMACapable):
-        self._ptr[0].allocFlags.gpuDirectRDMACapable = gpuDirectRDMACapable
-    @property
-    def usage(self):
-        return self._ptr[0].allocFlags.usage
-    @usage.setter
-    def usage(self, unsigned short usage):
-        self._ptr[0].allocFlags.usage = usage
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].allocFlags.reserved, 4)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 4:
-            raise ValueError("reserved length must be 4, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].allocFlags.reserved[i] = b
-{{endif}}
-{{if 'struct CUmemAllocationProp_st' in found_types}}
-
-cdef class CUmemAllocationProp_st:
-    """
-    Specifies the allocation properties for a allocation.
-
-    Attributes
-    ----------
-    type : CUmemAllocationType
-        Allocation type
-    requestedHandleTypes : CUmemAllocationHandleType
-        requested CUmemAllocationHandleType
-    location : CUmemLocation
-        Location of allocation
-    win32HandleMetaData : Any
-        Windows-specific POBJECT_ATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This object attributes
-        structure includes security attributes that define the scope of
-        which exported allocations may be transferred to other processes.
-        In all other cases, this field is required to be zero.
-    allocFlags : anon_struct21
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemAllocationProp_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._location = CUmemLocation(_ptr=<void_ptr>&self._ptr[0].location)
-        self._allocFlags = anon_struct21(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['requestedHandleTypes : ' + str(self.requestedHandleTypes)]
-            except ValueError:
-                str_list += ['requestedHandleTypes : <ValueError>']
-            try:
-                str_list += ['location :\n' + '\n'.join(['    ' + line for line in str(self.location).splitlines()])]
-            except ValueError:
-                str_list += ['location : <ValueError>']
-            try:
-                str_list += ['win32HandleMetaData : ' + hex(self.win32HandleMetaData)]
-            except ValueError:
-                str_list += ['win32HandleMetaData : <ValueError>']
-            try:
-                str_list += ['allocFlags :\n' + '\n'.join(['    ' + line for line in str(self.allocFlags).splitlines()])]
-            except ValueError:
-                str_list += ['allocFlags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUmemAllocationType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUmemAllocationType):
-        self._ptr[0].type = type.value
-    @property
-    def requestedHandleTypes(self):
-        return CUmemAllocationHandleType(self._ptr[0].requestedHandleTypes)
-    @requestedHandleTypes.setter
-    def requestedHandleTypes(self, requestedHandleTypes not None : CUmemAllocationHandleType):
-        self._ptr[0].requestedHandleTypes = requestedHandleTypes.value
-    @property
-    def location(self):
-        return self._location
-    @location.setter
-    def location(self, location not None : CUmemLocation):
-        string.memcpy(&self._ptr[0].location, <cydriver.CUmemLocation*><void_ptr>location.getPtr(), sizeof(self._ptr[0].location))
-    @property
-    def win32HandleMetaData(self):
-        return <void_ptr>self._ptr[0].win32HandleMetaData
-    @win32HandleMetaData.setter
-    def win32HandleMetaData(self, win32HandleMetaData):
-        _cywin32HandleMetaData = utils.HelperInputVoidPtr(win32HandleMetaData)
-        self._ptr[0].win32HandleMetaData = <void*><void_ptr>_cywin32HandleMetaData.cptr
-    @property
-    def allocFlags(self):
-        return self._allocFlags
-    @allocFlags.setter
-    def allocFlags(self, allocFlags not None : anon_struct21):
-        string.memcpy(&self._ptr[0].allocFlags, <cydriver.anon_struct21*><void_ptr>allocFlags.getPtr(), sizeof(self._ptr[0].allocFlags))
-{{endif}}
-{{if 'struct CUmulticastObjectProp_st' in found_types}}
-
-cdef class CUmulticastObjectProp_st:
-    """
-    Specifies the properties for a multicast object.
-
-    Attributes
-    ----------
-    numDevices : unsigned int
-        The number of devices in the multicast team that will bind memory
-        to this object
-    size : size_t
-        The maximum amount of memory that can be bound to this multicast
-        object per device
-    handleTypes : unsigned long long
-        Bitmask of exportable handle types (see CUmemAllocationHandleType)
-        for this object
-    flags : unsigned long long
-        Flags for future use, must be zero now
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmulticastObjectProp_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['numDevices : ' + str(self.numDevices)]
-            except ValueError:
-                str_list += ['numDevices : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['handleTypes : ' + str(self.handleTypes)]
-            except ValueError:
-                str_list += ['handleTypes : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def numDevices(self):
-        return self._ptr[0].numDevices
-    @numDevices.setter
-    def numDevices(self, unsigned int numDevices):
-        self._ptr[0].numDevices = numDevices
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, size_t size):
-        self._ptr[0].size = size
-    @property
-    def handleTypes(self):
-        return self._ptr[0].handleTypes
-    @handleTypes.setter
-    def handleTypes(self, unsigned long long handleTypes):
-        self._ptr[0].handleTypes = handleTypes
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned long long flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct CUmemAccessDesc_st' in found_types}}
-
-cdef class CUmemAccessDesc_st:
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : CUmemLocation
-        Location on which the request is to change it's accessibility
-    flags : CUmemAccess_flags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemAccessDesc_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._location = CUmemLocation(_ptr=<void_ptr>&self._ptr[0].location)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['location :\n' + '\n'.join(['    ' + line for line in str(self.location).splitlines()])]
-            except ValueError:
-                str_list += ['location : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def location(self):
-        return self._location
-    @location.setter
-    def location(self, location not None : CUmemLocation):
-        string.memcpy(&self._ptr[0].location, <cydriver.CUmemLocation*><void_ptr>location.getPtr(), sizeof(self._ptr[0].location))
-    @property
-    def flags(self):
-        return CUmemAccess_flags(self._ptr[0].flags)
-    @flags.setter
-    def flags(self, flags not None : CUmemAccess_flags):
-        self._ptr[0].flags = flags.value
-{{endif}}
-{{if 'struct CUgraphExecUpdateResultInfo_st' in found_types}}
-
-cdef class CUgraphExecUpdateResultInfo_st:
-    """
-    Result information returned by cuGraphExecUpdate
-
-    Attributes
-    ----------
-    result : CUgraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : CUgraphNode
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : CUgraphNode
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUgraphExecUpdateResultInfo_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._errorNode = CUgraphNode(_ptr=<void_ptr>&self._ptr[0].errorNode)
-        self._errorFromNode = CUgraphNode(_ptr=<void_ptr>&self._ptr[0].errorFromNode)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['result : ' + str(self.result)]
-            except ValueError:
-                str_list += ['result : <ValueError>']
-            try:
-                str_list += ['errorNode : ' + str(self.errorNode)]
-            except ValueError:
-                str_list += ['errorNode : <ValueError>']
-            try:
-                str_list += ['errorFromNode : ' + str(self.errorFromNode)]
-            except ValueError:
-                str_list += ['errorFromNode : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def result(self):
-        return CUgraphExecUpdateResult(self._ptr[0].result)
-    @result.setter
-    def result(self, result not None : CUgraphExecUpdateResult):
-        self._ptr[0].result = result.value
-    @property
-    def errorNode(self):
-        return self._errorNode
-    @errorNode.setter
-    def errorNode(self, errorNode):
-        cdef cydriver.CUgraphNode cyerrorNode
-        if errorNode is None:
-            cyerrorNode = <cydriver.CUgraphNode><void_ptr>0
-        elif isinstance(errorNode, (CUgraphNode,)):
-            perrorNode = int(errorNode)
-            cyerrorNode = <cydriver.CUgraphNode><void_ptr>perrorNode
-        else:
-            perrorNode = int(CUgraphNode(errorNode))
-            cyerrorNode = <cydriver.CUgraphNode><void_ptr>perrorNode
-        self._errorNode._ptr[0] = cyerrorNode
-    @property
-    def errorFromNode(self):
-        return self._errorFromNode
-    @errorFromNode.setter
-    def errorFromNode(self, errorFromNode):
-        cdef cydriver.CUgraphNode cyerrorFromNode
-        if errorFromNode is None:
-            cyerrorFromNode = <cydriver.CUgraphNode><void_ptr>0
-        elif isinstance(errorFromNode, (CUgraphNode,)):
-            perrorFromNode = int(errorFromNode)
-            cyerrorFromNode = <cydriver.CUgraphNode><void_ptr>perrorFromNode
-        else:
-            perrorFromNode = int(CUgraphNode(errorFromNode))
-            cyerrorFromNode = <cydriver.CUgraphNode><void_ptr>perrorFromNode
-        self._errorFromNode._ptr[0] = cyerrorFromNode
-{{endif}}
-{{if 'struct CUmemPoolProps_st' in found_types}}
-
-cdef class CUmemPoolProps_st:
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : CUmemAllocationType
-        Allocation type. Currently must be specified as
-        CU_MEM_ALLOCATION_TYPE_PINNED
-    handleTypes : CUmemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : CUmemLocation
-        Location where allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        CU_MEM_HANDLE_TYPE_WIN32 is specified. This security attribute
-        defines the scope of which exported allocations may be transferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemPoolProps_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._location = CUmemLocation(_ptr=<void_ptr>&self._ptr[0].location)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['allocType : ' + str(self.allocType)]
-            except ValueError:
-                str_list += ['allocType : <ValueError>']
-            try:
-                str_list += ['handleTypes : ' + str(self.handleTypes)]
-            except ValueError:
-                str_list += ['handleTypes : <ValueError>']
-            try:
-                str_list += ['location :\n' + '\n'.join(['    ' + line for line in str(self.location).splitlines()])]
-            except ValueError:
-                str_list += ['location : <ValueError>']
-            try:
-                str_list += ['win32SecurityAttributes : ' + hex(self.win32SecurityAttributes)]
-            except ValueError:
-                str_list += ['win32SecurityAttributes : <ValueError>']
-            try:
-                str_list += ['maxSize : ' + str(self.maxSize)]
-            except ValueError:
-                str_list += ['maxSize : <ValueError>']
-            try:
-                str_list += ['usage : ' + str(self.usage)]
-            except ValueError:
-                str_list += ['usage : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def allocType(self):
-        return CUmemAllocationType(self._ptr[0].allocType)
-    @allocType.setter
-    def allocType(self, allocType not None : CUmemAllocationType):
-        self._ptr[0].allocType = allocType.value
-    @property
-    def handleTypes(self):
-        return CUmemAllocationHandleType(self._ptr[0].handleTypes)
-    @handleTypes.setter
-    def handleTypes(self, handleTypes not None : CUmemAllocationHandleType):
-        self._ptr[0].handleTypes = handleTypes.value
-    @property
-    def location(self):
-        return self._location
-    @location.setter
-    def location(self, location not None : CUmemLocation):
-        string.memcpy(&self._ptr[0].location, <cydriver.CUmemLocation*><void_ptr>location.getPtr(), sizeof(self._ptr[0].location))
-    @property
-    def win32SecurityAttributes(self):
-        return <void_ptr>self._ptr[0].win32SecurityAttributes
-    @win32SecurityAttributes.setter
-    def win32SecurityAttributes(self, win32SecurityAttributes):
-        _cywin32SecurityAttributes = utils.HelperInputVoidPtr(win32SecurityAttributes)
-        self._ptr[0].win32SecurityAttributes = <void*><void_ptr>_cywin32SecurityAttributes.cptr
-    @property
-    def maxSize(self):
-        return self._ptr[0].maxSize
-    @maxSize.setter
-    def maxSize(self, size_t maxSize):
-        self._ptr[0].maxSize = maxSize
-    @property
-    def usage(self):
-        return self._ptr[0].usage
-    @usage.setter
-    def usage(self, unsigned short usage):
-        self._ptr[0].usage = usage
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 54)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 54:
-            raise ValueError("reserved length must be 54, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct CUmemPoolPtrExportData_st' in found_types}}
-
-cdef class CUmemPoolPtrExportData_st:
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemPoolPtrExportData_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v1_st' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v1_st:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v1_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._poolProps = CUmemPoolProps(_ptr=<void_ptr>&self._ptr[0].poolProps)
-        self._dptr = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dptr)
-    def __dealloc__(self):
-        if self._accessDescs is not NULL:
-            free(self._accessDescs)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['poolProps :\n' + '\n'.join(['    ' + line for line in str(self.poolProps).splitlines()])]
-            except ValueError:
-                str_list += ['poolProps : <ValueError>']
-            try:
-                str_list += ['accessDescs : ' + str(self.accessDescs)]
-            except ValueError:
-                str_list += ['accessDescs : <ValueError>']
-            try:
-                str_list += ['accessDescCount : ' + str(self.accessDescCount)]
-            except ValueError:
-                str_list += ['accessDescCount : <ValueError>']
-            try:
-                str_list += ['bytesize : ' + str(self.bytesize)]
-            except ValueError:
-                str_list += ['bytesize : <ValueError>']
-            try:
-                str_list += ['dptr : ' + str(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def poolProps(self):
-        return self._poolProps
-    @poolProps.setter
-    def poolProps(self, poolProps not None : CUmemPoolProps):
-        string.memcpy(&self._ptr[0].poolProps, <cydriver.CUmemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._ptr[0].poolProps))
-    @property
-    def accessDescs(self):
-        arrs = [<void_ptr>self._ptr[0].accessDescs + x*sizeof(cydriver.CUmemAccessDesc) for x in range(self._accessDescs_length)]
-        return [CUmemAccessDesc(_ptr=arr) for arr in arrs]
-    @accessDescs.setter
-    def accessDescs(self, val):
-        if len(val) == 0:
-            free(self._accessDescs)
-            self._accessDescs_length = 0
-            self._ptr[0].accessDescs = NULL
-        else:
-            if self._accessDescs_length != <size_t>len(val):
-                free(self._accessDescs)
-                self._accessDescs = <cydriver.CUmemAccessDesc*> calloc(len(val), sizeof(cydriver.CUmemAccessDesc))
-                if self._accessDescs is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc)))
-                self._accessDescs_length = <size_t>len(val)
-                self._ptr[0].accessDescs = self._accessDescs
-            for idx in range(len(val)):
-                string.memcpy(&self._accessDescs[idx], (<CUmemAccessDesc>val[idx])._ptr, sizeof(cydriver.CUmemAccessDesc))
-
-    @property
-    def accessDescCount(self):
-        return self._ptr[0].accessDescCount
-    @accessDescCount.setter
-    def accessDescCount(self, size_t accessDescCount):
-        self._ptr[0].accessDescCount = accessDescCount
-    @property
-    def bytesize(self):
-        return self._ptr[0].bytesize
-    @bytesize.setter
-    def bytesize(self, size_t bytesize):
-        self._ptr[0].bytesize = bytesize
-    @property
-    def dptr(self):
-        return self._dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        cdef cydriver.CUdeviceptr cydptr
-        if dptr is None:
-            cydptr = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dptr, (CUdeviceptr)):
-            pdptr = int(dptr)
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        else:
-            pdptr = int(CUdeviceptr(dptr))
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        self._dptr._ptr[0] = cydptr
-
-{{endif}}
-{{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v2_st' in found_types}}
-
-cdef class CUDA_MEM_ALLOC_NODE_PARAMS_v2_st:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : CUmemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be CU_MEM_HANDLE_TYPE_NONE. IPC is
-        not supported.
-    accessDescs : CUmemAccessDesc
-        in: array of memory access descriptors. Used to describe peer GPU
-        access
-    accessDescCount : size_t
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : CUdeviceptr
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._poolProps = CUmemPoolProps(_ptr=<void_ptr>&self._ptr[0].poolProps)
-        self._dptr = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dptr)
-    def __dealloc__(self):
-        if self._accessDescs is not NULL:
-            free(self._accessDescs)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['poolProps :\n' + '\n'.join(['    ' + line for line in str(self.poolProps).splitlines()])]
-            except ValueError:
-                str_list += ['poolProps : <ValueError>']
-            try:
-                str_list += ['accessDescs : ' + str(self.accessDescs)]
-            except ValueError:
-                str_list += ['accessDescs : <ValueError>']
-            try:
-                str_list += ['accessDescCount : ' + str(self.accessDescCount)]
-            except ValueError:
-                str_list += ['accessDescCount : <ValueError>']
-            try:
-                str_list += ['bytesize : ' + str(self.bytesize)]
-            except ValueError:
-                str_list += ['bytesize : <ValueError>']
-            try:
-                str_list += ['dptr : ' + str(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def poolProps(self):
-        return self._poolProps
-    @poolProps.setter
-    def poolProps(self, poolProps not None : CUmemPoolProps):
-        string.memcpy(&self._ptr[0].poolProps, <cydriver.CUmemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._ptr[0].poolProps))
-    @property
-    def accessDescs(self):
-        arrs = [<void_ptr>self._ptr[0].accessDescs + x*sizeof(cydriver.CUmemAccessDesc) for x in range(self._accessDescs_length)]
-        return [CUmemAccessDesc(_ptr=arr) for arr in arrs]
-    @accessDescs.setter
-    def accessDescs(self, val):
-        if len(val) == 0:
-            free(self._accessDescs)
-            self._accessDescs_length = 0
-            self._ptr[0].accessDescs = NULL
-        else:
-            if self._accessDescs_length != <size_t>len(val):
-                free(self._accessDescs)
-                self._accessDescs = <cydriver.CUmemAccessDesc*> calloc(len(val), sizeof(cydriver.CUmemAccessDesc))
-                if self._accessDescs is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc)))
-                self._accessDescs_length = <size_t>len(val)
-                self._ptr[0].accessDescs = self._accessDescs
-            for idx in range(len(val)):
-                string.memcpy(&self._accessDescs[idx], (<CUmemAccessDesc>val[idx])._ptr, sizeof(cydriver.CUmemAccessDesc))
-
-    @property
-    def accessDescCount(self):
-        return self._ptr[0].accessDescCount
-    @accessDescCount.setter
-    def accessDescCount(self, size_t accessDescCount):
-        self._ptr[0].accessDescCount = accessDescCount
-    @property
-    def bytesize(self):
-        return self._ptr[0].bytesize
-    @bytesize.setter
-    def bytesize(self, size_t bytesize):
-        self._ptr[0].bytesize = bytesize
-    @property
-    def dptr(self):
-        return self._dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        cdef cydriver.CUdeviceptr cydptr
-        if dptr is None:
-            cydptr = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dptr, (CUdeviceptr)):
-            pdptr = int(dptr)
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        else:
-            pdptr = int(CUdeviceptr(dptr))
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        self._dptr._ptr[0] = cydptr
-
-{{endif}}
-{{if 'struct CUDA_MEM_FREE_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_MEM_FREE_NODE_PARAMS_st:
-    """
-    Memory free node parameters
-
-    Attributes
-    ----------
-    dptr : CUdeviceptr
-        in: the pointer to free
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_MEM_FREE_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._dptr = CUdeviceptr(_ptr=<void_ptr>&self._ptr[0].dptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dptr : ' + str(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dptr(self):
-        return self._dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        cdef cydriver.CUdeviceptr cydptr
-        if dptr is None:
-            cydptr = <cydriver.CUdeviceptr><void_ptr>0
-        elif isinstance(dptr, (CUdeviceptr)):
-            pdptr = int(dptr)
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        else:
-            pdptr = int(CUdeviceptr(dptr))
-            cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-        self._dptr._ptr[0] = cydptr
-
-{{endif}}
-{{if 'struct CUDA_CHILD_GRAPH_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_CHILD_GRAPH_NODE_PARAMS_st:
-    """
-    Child graph node parameters
-
-    Attributes
-    ----------
-    graph : CUgraph
-        The child graph to clone into the node for node creation, or a
-        handle to the graph owned by the node for node query
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._graph = CUgraph(_ptr=<void_ptr>&self._ptr[0].graph)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['graph : ' + str(self.graph)]
-            except ValueError:
-                str_list += ['graph : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def graph(self):
-        return self._graph
-    @graph.setter
-    def graph(self, graph):
-        cdef cydriver.CUgraph cygraph
-        if graph is None:
-            cygraph = <cydriver.CUgraph><void_ptr>0
-        elif isinstance(graph, (CUgraph,)):
-            pgraph = int(graph)
-            cygraph = <cydriver.CUgraph><void_ptr>pgraph
-        else:
-            pgraph = int(CUgraph(graph))
-            cygraph = <cydriver.CUgraph><void_ptr>pgraph
-        self._graph._ptr[0] = cygraph
-{{endif}}
-{{if 'struct CUDA_EVENT_RECORD_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EVENT_RECORD_NODE_PARAMS_st:
-    """
-    Event record node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to record when the node executes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EVENT_RECORD_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._event = CUevent(_ptr=<void_ptr>&self._ptr[0].event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cydriver.CUevent cyevent
-        if event is None:
-            cyevent = <cydriver.CUevent><void_ptr>0
-        elif isinstance(event, (CUevent,)):
-            pevent = int(event)
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        else:
-            pevent = int(CUevent(event))
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-{{endif}}
-{{if 'struct CUDA_EVENT_WAIT_NODE_PARAMS_st' in found_types}}
-
-cdef class CUDA_EVENT_WAIT_NODE_PARAMS_st:
-    """
-    Event wait node parameters
-
-    Attributes
-    ----------
-    event : CUevent
-        The event to wait on from the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUDA_EVENT_WAIT_NODE_PARAMS_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._event = CUevent(_ptr=<void_ptr>&self._ptr[0].event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cydriver.CUevent cyevent
-        if event is None:
-            cyevent = <cydriver.CUevent><void_ptr>0
-        elif isinstance(event, (CUevent,)):
-            pevent = int(event)
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        else:
-            pevent = int(CUevent(event))
-            cyevent = <cydriver.CUevent><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-{{endif}}
-{{if 'struct CUgraphNodeParams_st' in found_types}}
-
-cdef class CUgraphNodeParams_st:
-    """
-    Graph node parameters. See cuGraphAddNode.
-
-    Attributes
-    ----------
-    type : CUgraphNodeType
-        Type of the node
-    reserved0 : List[int]
-        Reserved. Must be zero.
-    reserved1 : List[long long]
-        Padding. Unused bytes must be zero.
-    kernel : CUDA_KERNEL_NODE_PARAMS_v3
-        Kernel node parameters.
-    memcpy : CUDA_MEMCPY_NODE_PARAMS
-        Memcpy node parameters.
-    memset : CUDA_MEMSET_NODE_PARAMS_v2
-        Memset node parameters.
-    host : CUDA_HOST_NODE_PARAMS_v2
-        Host node parameters.
-    graph : CUDA_CHILD_GRAPH_NODE_PARAMS
-        Child graph node parameters.
-    eventWait : CUDA_EVENT_WAIT_NODE_PARAMS
-        Event wait node parameters.
-    eventRecord : CUDA_EVENT_RECORD_NODE_PARAMS
-        Event record node parameters.
-    extSemSignal : CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2
-        External semaphore signal node parameters.
-    extSemWait : CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2
-        External semaphore wait node parameters.
-    alloc : CUDA_MEM_ALLOC_NODE_PARAMS_v2
-        Memory allocation node parameters.
-    free : CUDA_MEM_FREE_NODE_PARAMS
-        Memory free node parameters.
-    memOp : CUDA_BATCH_MEM_OP_NODE_PARAMS_v2
-        MemOp node parameters.
-    conditional : CUDA_CONDITIONAL_NODE_PARAMS
-        Conditional node parameters.
-    reserved2 : long long
-        Reserved bytes. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUgraphNodeParams_st *>calloc(1, sizeof(cydriver.CUgraphNodeParams_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUgraphNodeParams_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._kernel = CUDA_KERNEL_NODE_PARAMS_v3(_ptr=<void_ptr>&self._ptr[0].kernel)
-        self._memcpy = CUDA_MEMCPY_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].memcpy)
-        self._memset = CUDA_MEMSET_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].memset)
-        self._host = CUDA_HOST_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].host)
-        self._graph = CUDA_CHILD_GRAPH_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].graph)
-        self._eventWait = CUDA_EVENT_WAIT_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].eventWait)
-        self._eventRecord = CUDA_EVENT_RECORD_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].eventRecord)
-        self._extSemSignal = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].extSemSignal)
-        self._extSemWait = CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].extSemWait)
-        self._alloc = CUDA_MEM_ALLOC_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].alloc)
-        self._free = CUDA_MEM_FREE_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].free)
-        self._memOp = CUDA_BATCH_MEM_OP_NODE_PARAMS_v2(_ptr=<void_ptr>&self._ptr[0].memOp)
-        self._conditional = CUDA_CONDITIONAL_NODE_PARAMS(_ptr=<void_ptr>&self._ptr[0].conditional)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['reserved0 : ' + str(self.reserved0)]
-            except ValueError:
-                str_list += ['reserved0 : <ValueError>']
-            try:
-                str_list += ['reserved1 : ' + str(self.reserved1)]
-            except ValueError:
-                str_list += ['reserved1 : <ValueError>']
-            try:
-                str_list += ['kernel :\n' + '\n'.join(['    ' + line for line in str(self.kernel).splitlines()])]
-            except ValueError:
-                str_list += ['kernel : <ValueError>']
-            try:
-                str_list += ['memcpy :\n' + '\n'.join(['    ' + line for line in str(self.memcpy).splitlines()])]
-            except ValueError:
-                str_list += ['memcpy : <ValueError>']
-            try:
-                str_list += ['memset :\n' + '\n'.join(['    ' + line for line in str(self.memset).splitlines()])]
-            except ValueError:
-                str_list += ['memset : <ValueError>']
-            try:
-                str_list += ['host :\n' + '\n'.join(['    ' + line for line in str(self.host).splitlines()])]
-            except ValueError:
-                str_list += ['host : <ValueError>']
-            try:
-                str_list += ['graph :\n' + '\n'.join(['    ' + line for line in str(self.graph).splitlines()])]
-            except ValueError:
-                str_list += ['graph : <ValueError>']
-            try:
-                str_list += ['eventWait :\n' + '\n'.join(['    ' + line for line in str(self.eventWait).splitlines()])]
-            except ValueError:
-                str_list += ['eventWait : <ValueError>']
-            try:
-                str_list += ['eventRecord :\n' + '\n'.join(['    ' + line for line in str(self.eventRecord).splitlines()])]
-            except ValueError:
-                str_list += ['eventRecord : <ValueError>']
-            try:
-                str_list += ['extSemSignal :\n' + '\n'.join(['    ' + line for line in str(self.extSemSignal).splitlines()])]
-            except ValueError:
-                str_list += ['extSemSignal : <ValueError>']
-            try:
-                str_list += ['extSemWait :\n' + '\n'.join(['    ' + line for line in str(self.extSemWait).splitlines()])]
-            except ValueError:
-                str_list += ['extSemWait : <ValueError>']
-            try:
-                str_list += ['alloc :\n' + '\n'.join(['    ' + line for line in str(self.alloc).splitlines()])]
-            except ValueError:
-                str_list += ['alloc : <ValueError>']
-            try:
-                str_list += ['free :\n' + '\n'.join(['    ' + line for line in str(self.free).splitlines()])]
-            except ValueError:
-                str_list += ['free : <ValueError>']
-            try:
-                str_list += ['memOp :\n' + '\n'.join(['    ' + line for line in str(self.memOp).splitlines()])]
-            except ValueError:
-                str_list += ['memOp : <ValueError>']
-            try:
-                str_list += ['conditional :\n' + '\n'.join(['    ' + line for line in str(self.conditional).splitlines()])]
-            except ValueError:
-                str_list += ['conditional : <ValueError>']
-            try:
-                str_list += ['reserved2 : ' + str(self.reserved2)]
-            except ValueError:
-                str_list += ['reserved2 : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUgraphNodeType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUgraphNodeType):
-        self._ptr[0].type = type.value
-    @property
-    def reserved0(self):
-        return self._ptr[0].reserved0
-    @reserved0.setter
-    def reserved0(self, reserved0):
-        self._ptr[0].reserved0 = reserved0
-    @property
-    def reserved1(self):
-        return self._ptr[0].reserved1
-    @reserved1.setter
-    def reserved1(self, reserved1):
-        self._ptr[0].reserved1 = reserved1
-    @property
-    def kernel(self):
-        return self._kernel
-    @kernel.setter
-    def kernel(self, kernel not None : CUDA_KERNEL_NODE_PARAMS_v3):
-        string.memcpy(&self._ptr[0].kernel, <cydriver.CUDA_KERNEL_NODE_PARAMS_v3*><void_ptr>kernel.getPtr(), sizeof(self._ptr[0].kernel))
-    @property
-    def memcpy(self):
-        return self._memcpy
-    @memcpy.setter
-    def memcpy(self, memcpy not None : CUDA_MEMCPY_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].memcpy, <cydriver.CUDA_MEMCPY_NODE_PARAMS*><void_ptr>memcpy.getPtr(), sizeof(self._ptr[0].memcpy))
-    @property
-    def memset(self):
-        return self._memset
-    @memset.setter
-    def memset(self, memset not None : CUDA_MEMSET_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].memset, <cydriver.CUDA_MEMSET_NODE_PARAMS_v2*><void_ptr>memset.getPtr(), sizeof(self._ptr[0].memset))
-    @property
-    def host(self):
-        return self._host
-    @host.setter
-    def host(self, host not None : CUDA_HOST_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].host, <cydriver.CUDA_HOST_NODE_PARAMS_v2*><void_ptr>host.getPtr(), sizeof(self._ptr[0].host))
-    @property
-    def graph(self):
-        return self._graph
-    @graph.setter
-    def graph(self, graph not None : CUDA_CHILD_GRAPH_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].graph, <cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS*><void_ptr>graph.getPtr(), sizeof(self._ptr[0].graph))
-    @property
-    def eventWait(self):
-        return self._eventWait
-    @eventWait.setter
-    def eventWait(self, eventWait not None : CUDA_EVENT_WAIT_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].eventWait, <cydriver.CUDA_EVENT_WAIT_NODE_PARAMS*><void_ptr>eventWait.getPtr(), sizeof(self._ptr[0].eventWait))
-    @property
-    def eventRecord(self):
-        return self._eventRecord
-    @eventRecord.setter
-    def eventRecord(self, eventRecord not None : CUDA_EVENT_RECORD_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].eventRecord, <cydriver.CUDA_EVENT_RECORD_NODE_PARAMS*><void_ptr>eventRecord.getPtr(), sizeof(self._ptr[0].eventRecord))
-    @property
-    def extSemSignal(self):
-        return self._extSemSignal
-    @extSemSignal.setter
-    def extSemSignal(self, extSemSignal not None : CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].extSemSignal, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2*><void_ptr>extSemSignal.getPtr(), sizeof(self._ptr[0].extSemSignal))
-    @property
-    def extSemWait(self):
-        return self._extSemWait
-    @extSemWait.setter
-    def extSemWait(self, extSemWait not None : CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].extSemWait, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2*><void_ptr>extSemWait.getPtr(), sizeof(self._ptr[0].extSemWait))
-    @property
-    def alloc(self):
-        return self._alloc
-    @alloc.setter
-    def alloc(self, alloc not None : CUDA_MEM_ALLOC_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].alloc, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2*><void_ptr>alloc.getPtr(), sizeof(self._ptr[0].alloc))
-    @property
-    def free(self):
-        return self._free
-    @free.setter
-    def free(self, free not None : CUDA_MEM_FREE_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].free, <cydriver.CUDA_MEM_FREE_NODE_PARAMS*><void_ptr>free.getPtr(), sizeof(self._ptr[0].free))
-    @property
-    def memOp(self):
-        return self._memOp
-    @memOp.setter
-    def memOp(self, memOp not None : CUDA_BATCH_MEM_OP_NODE_PARAMS_v2):
-        string.memcpy(&self._ptr[0].memOp, <cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2*><void_ptr>memOp.getPtr(), sizeof(self._ptr[0].memOp))
-    @property
-    def conditional(self):
-        return self._conditional
-    @conditional.setter
-    def conditional(self, conditional not None : CUDA_CONDITIONAL_NODE_PARAMS):
-        string.memcpy(&self._ptr[0].conditional, <cydriver.CUDA_CONDITIONAL_NODE_PARAMS*><void_ptr>conditional.getPtr(), sizeof(self._ptr[0].conditional))
-    @property
-    def reserved2(self):
-        return self._ptr[0].reserved2
-    @reserved2.setter
-    def reserved2(self, long long reserved2):
-        self._ptr[0].reserved2 = reserved2
-{{endif}}
-{{if 'struct CUdevSmResource_st' in found_types}}
-
-cdef class CUdevSmResource_st:
-    """
-    Attributes
-    ----------
-    smCount : unsigned int
-        The amount of streaming multiprocessors available in this resource.
-        This is an output parameter only, do not write to this field.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdevSmResource_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['smCount : ' + str(self.smCount)]
-            except ValueError:
-                str_list += ['smCount : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def smCount(self):
-        return self._ptr[0].smCount
-    @smCount.setter
-    def smCount(self, unsigned int smCount):
-        self._ptr[0].smCount = smCount
-{{endif}}
-{{if 'struct CUdevResource_st' in found_types}}
-
-cdef class CUdevResource_st:
-    """
-    Attributes
-    ----------
-    type : CUdevResourceType
-        Type of resource, dictates which union field was last set
-    _internal_padding : bytes
-
-    sm : CUdevSmResource
-        Resource corresponding to CU_DEV_RESOURCE_TYPE_SM ``. type.
-    _oversize : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUdevResource_st *>calloc(1, sizeof(cydriver.CUdevResource_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUdevResource_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._sm = CUdevSmResource(_ptr=<void_ptr>&self._ptr[0].sm)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['_internal_padding : ' + str(self._internal_padding)]
-            except ValueError:
-                str_list += ['_internal_padding : <ValueError>']
-            try:
-                str_list += ['sm :\n' + '\n'.join(['    ' + line for line in str(self.sm).splitlines()])]
-            except ValueError:
-                str_list += ['sm : <ValueError>']
-            try:
-                str_list += ['_oversize : ' + str(self._oversize)]
-            except ValueError:
-                str_list += ['_oversize : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return CUdevResourceType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : CUdevResourceType):
-        self._ptr[0].type = type.value
-    @property
-    def _internal_padding(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0]._internal_padding, 92)
-    @_internal_padding.setter
-    def _internal_padding(self, _internal_padding):
-        if len(_internal_padding) != 92:
-            raise ValueError("_internal_padding length must be 92, is " + str(len(_internal_padding)))
-        for i, b in enumerate(_internal_padding):
-            self._ptr[0]._internal_padding[i] = b
-    @property
-    def sm(self):
-        return self._sm
-    @sm.setter
-    def sm(self, sm not None : CUdevSmResource):
-        string.memcpy(&self._ptr[0].sm, <cydriver.CUdevSmResource*><void_ptr>sm.getPtr(), sizeof(self._ptr[0].sm))
-    @property
-    def _oversize(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0]._oversize, 48)
-    @_oversize.setter
-    def _oversize(self, _oversize):
-        if len(_oversize) != 48:
-            raise ValueError("_oversize length must be 48, is " + str(len(_oversize)))
-        for i, b in enumerate(_oversize):
-            self._ptr[0]._oversize[i] = b
-{{endif}}
-{{if True}}
-
-cdef class anon_union14:
-    """
-    Attributes
-    ----------
-    pArray : List[CUarray]
-
-    pPitch : List[Any]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cydriver.CUeglFrame_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].frame
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['pArray : ' + str(self.pArray)]
-            except ValueError:
-                str_list += ['pArray : <ValueError>']
-            try:
-                str_list += ['pPitch : ' + hex(self.pPitch)]
-            except ValueError:
-                str_list += ['pPitch : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def pArray(self):
-        return [CUarray(init_value=<void_ptr>_pArray) for _pArray in self._ptr[0].frame.pArray]
-    @pArray.setter
-    def pArray(self, pArray : List[CUarray]):
-        if len(pArray) != 3:
-            raise IndexError('not enough values found during array assignment, expected 3, got', len(pArray))
-        pArray = [int(_pArray) for _pArray in pArray]
-        for _idx, _pArray in enumerate(pArray):
-            self._ptr[0].frame.pArray[_idx] = <cydriver.CUarray><void_ptr>_pArray
-
-    @property
-    def pPitch(self):
-        return [<void_ptr>_pPitch for _pPitch in self._ptr[0].frame.pPitch]
-    @pPitch.setter
-    def pPitch(self, pPitch : List[int]):
-        if len(pPitch) != 3:
-            raise IndexError('not enough values found during array assignment, expected 3, got', len(pPitch))
-        pPitch = [<void_ptr>_pPitch for _pPitch in pPitch]
-        for _idx, _pPitch in enumerate(pPitch):
-            self._ptr[0].frame.pPitch[_idx] = <void*><void_ptr>_pPitch
-{{endif}}
-{{if True}}
-
-cdef class CUeglFrame_st:
-    """
-    CUDA EGLFrame structure Descriptor - structure defining one frame
-    of EGL.  Each frame may contain one or more planes depending on
-    whether the surface * is Multiplanar or not.
-
-    Attributes
-    ----------
-    frame : anon_union14
-
-    width : unsigned int
-        Width of first plane
-    height : unsigned int
-        Height of first plane
-    depth : unsigned int
-        Depth of first plane
-    pitch : unsigned int
-        Pitch of first plane
-    planeCount : unsigned int
-        Number of planes
-    numChannels : unsigned int
-        Number of channels for the plane
-    frameType : CUeglFrameType
-        Array or Pitch
-    eglColorFormat : CUeglColorFormat
-        CUDA EGL Color Format
-    cuFormat : CUarray_format
-        CUDA Array Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cydriver.CUeglFrame_st *>calloc(1, sizeof(cydriver.CUeglFrame_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cydriver.CUeglFrame_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._frame = anon_union14(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['frame :\n' + '\n'.join(['    ' + line for line in str(self.frame).splitlines()])]
-            except ValueError:
-                str_list += ['frame : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['planeCount : ' + str(self.planeCount)]
-            except ValueError:
-                str_list += ['planeCount : <ValueError>']
-            try:
-                str_list += ['numChannels : ' + str(self.numChannels)]
-            except ValueError:
-                str_list += ['numChannels : <ValueError>']
-            try:
-                str_list += ['frameType : ' + str(self.frameType)]
-            except ValueError:
-                str_list += ['frameType : <ValueError>']
-            try:
-                str_list += ['eglColorFormat : ' + str(self.eglColorFormat)]
-            except ValueError:
-                str_list += ['eglColorFormat : <ValueError>']
-            try:
-                str_list += ['cuFormat : ' + str(self.cuFormat)]
-            except ValueError:
-                str_list += ['cuFormat : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def frame(self):
-        return self._frame
-    @frame.setter
-    def frame(self, frame not None : anon_union14):
-        string.memcpy(&self._ptr[0].frame, <cydriver.anon_union14*><void_ptr>frame.getPtr(), sizeof(self._ptr[0].frame))
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, unsigned int width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, unsigned int height):
-        self._ptr[0].height = height
-    @property
-    def depth(self):
-        return self._ptr[0].depth
-    @depth.setter
-    def depth(self, unsigned int depth):
-        self._ptr[0].depth = depth
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, unsigned int pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def planeCount(self):
-        return self._ptr[0].planeCount
-    @planeCount.setter
-    def planeCount(self, unsigned int planeCount):
-        self._ptr[0].planeCount = planeCount
-    @property
-    def numChannels(self):
-        return self._ptr[0].numChannels
-    @numChannels.setter
-    def numChannels(self, unsigned int numChannels):
-        self._ptr[0].numChannels = numChannels
-    @property
-    def frameType(self):
-        return CUeglFrameType(self._ptr[0].frameType)
-    @frameType.setter
-    def frameType(self, frameType not None : CUeglFrameType):
-        self._ptr[0].frameType = frameType.value
-    @property
-    def eglColorFormat(self):
-        return CUeglColorFormat(self._ptr[0].eglColorFormat)
-    @eglColorFormat.setter
-    def eglColorFormat(self, eglColorFormat not None : CUeglColorFormat):
-        self._ptr[0].eglColorFormat = eglColorFormat.value
-    @property
-    def cuFormat(self):
-        return CUarray_format(self._ptr[0].cuFormat)
-    @cuFormat.setter
-    def cuFormat(self, cuFormat not None : CUarray_format):
-        self._ptr[0].cuFormat = cuFormat.value
-{{endif}}
-{{if 'cuuint32_t' in found_types}}
-
-cdef class cuuint32_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.cuuint32_t *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<cuuint32_t ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cuuint64_t' in found_types}}
-
-cdef class cuuint64_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint64_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.cuuint64_t *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<cuuint64_t ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint64_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUdeviceptr_v2' in found_types}}
-
-cdef class CUdeviceptr_v2:
-    """
-
-    CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdeviceptr_v2 *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUdeviceptr_v2 ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUdevice_v1' in found_types}}
-
-cdef class CUdevice_v1:
-    """
-
-    CUDA device
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUdevice_v1 *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUdevice_v1 ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUtexObject_v1' in found_types}}
-
-cdef class CUtexObject_v1:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUtexObject_v1 *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUtexObject_v1 ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUsurfObject_v1' in found_types}}
-
-cdef class CUsurfObject_v1:
-    """
-
-    An opaque value that represents a CUDA surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUsurfObject_v1 *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUsurfObject_v1 ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'CUmemGenericAllocationHandle_v1' in found_types}}
-
-cdef class CUmemGenericAllocationHandle_v1:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.CUmemGenericAllocationHandle_v1 *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<CUmemGenericAllocationHandle_v1 ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLenum:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.GLenum *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<GLenum ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLuint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.GLuint *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<GLuint ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.EGLint *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<EGLint ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpDevice:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.VdpDevice *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpDevice ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpGetProcAddress:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.VdpGetProcAddress *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpGetProcAddress ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpVideoSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.VdpVideoSurface *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpVideoSurface ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpOutputSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cydriver.VdpOutputSurface *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpOutputSurface ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cuGetErrorString' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGetErrorString(error not None : CUresult):
-    """ Gets the string description of an error code.
-
-    Sets `*pStr` to the address of a NULL-terminated string description of
-    the error code `error`. If the error code is not recognized,
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned and `*pStr` will
-    be set to the NULL address.
-
-    Parameters
-    ----------
-    error : :py:obj:`~.CUresult`
-        Error code to convert to string
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pStr : bytes
-        Address of the string pointer.
-
-    See Also
-    --------
-    :py:obj:`~.CUresult`, :py:obj:`~.cudaGetErrorString`
-    """
-    cdef cydriver.CUresult cyerror = error.value
-    cdef const char* pStr = NULL
-    err = cydriver.cuGetErrorString(cyerror, &pStr)
-    return (CUresult(err), <bytes>pStr)
-{{endif}}
-
-{{if 'cuGetErrorName' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGetErrorName(error not None : CUresult):
-    """ Gets the string representation of an error code enum name.
-
-    Sets `*pStr` to the address of a NULL-terminated string representation
-    of the name of the enum error code `error`. If the error code is not
-    recognized, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned and
-    `*pStr` will be set to the NULL address.
-
-    Parameters
-    ----------
-    error : :py:obj:`~.CUresult`
-        Error code to convert to string
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pStr : bytes
-        Address of the string pointer.
-
-    See Also
-    --------
-    :py:obj:`~.CUresult`, :py:obj:`~.cudaGetErrorName`
-    """
-    cdef cydriver.CUresult cyerror = error.value
-    cdef const char* pStr = NULL
-    err = cydriver.cuGetErrorName(cyerror, &pStr)
-    return (CUresult(err), <bytes>pStr)
-{{endif}}
-
-{{if 'cuInit' in found_functions}}
-
-@cython.embedsignature(True)
-def cuInit(unsigned int Flags):
-    """ Initialize the CUDA driver API Initializes the driver API and must be called before any other function from the driver API in the current process. Currently, the `Flags` parameter must be 0. If :py:obj:`~.cuInit()` has not been called, any function from the driver API will return :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`.
-
-    Parameters
-    ----------
-    Flags : unsigned int
-        Initialization flag for CUDA.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH`, :py:obj:`~.CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE`
-    """
-    err = cydriver.cuInit(Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDriverGetVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDriverGetVersion():
-    """ Returns the latest CUDA version supported by driver.
-
-    Returns in `*driverVersion` the version of CUDA supported by the
-    driver. The version is returned as (1000 * major + 10 * minor). For
-    example, CUDA 9.2 would be represented by 9020.
-
-    This function automatically returns
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if `driverVersion` is NULL.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    driverVersion : int
-        Returns the CUDA driver version
-
-    See Also
-    --------
-    :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cudaRuntimeGetVersion`
-    """
-    cdef int driverVersion = 0
-    err = cydriver.cuDriverGetVersion(&driverVersion)
-    return (CUresult(err), driverVersion)
-{{endif}}
-
-{{if 'cuDeviceGet' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGet(int ordinal):
-    """ Returns a handle to a compute device.
-
-    Returns in `*device` a device handle given an ordinal in the range [0,
-    :py:obj:`~.cuDeviceGetCount()`-1].
-
-    Parameters
-    ----------
-    ordinal : int
-        Device number to get handle for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    device : :py:obj:`~.CUdevice`
-        Returned device handle
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`
-    """
-    cdef CUdevice device = CUdevice()
-    err = cydriver.cuDeviceGet(<cydriver.CUdevice*>device._ptr, ordinal)
-    return (CUresult(err), device)
-{{endif}}
-
-{{if 'cuDeviceGetCount' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetCount():
-    """ Returns the number of compute-capable devices.
-
-    Returns in `*count` the number of devices with compute capability
-    greater than or equal to 2.0 that are available for execution. If there
-    is no such device, :py:obj:`~.cuDeviceGetCount()` returns 0.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    count : int
-        Returned number of compute-capable devices
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceCount`
-    """
-    cdef int count = 0
-    err = cydriver.cuDeviceGetCount(&count)
-    return (CUresult(err), count)
-{{endif}}
-
-{{if 'cuDeviceGetName' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetName(int length, dev):
-    """ Returns an identifier string for the device.
-
-    Returns an ASCII string identifying the device `dev` in the NULL-
-    terminated string pointed to by `name`. `length` specifies the maximum
-    length of the string that may be returned.
-
-    Parameters
-    ----------
-    length : int
-        Maximum length of string to store in `name`
-    dev : :py:obj:`~.CUdevice`
-        Device to get identifier string for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    name : bytes
-        Returned identifier string for the device
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceProperties`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    pyname = b" " * length
-    cdef char* name = pyname
-    err = cydriver.cuDeviceGetName(name, length, cydev)
-    return (CUresult(err), pyname)
-{{endif}}
-
-{{if 'cuDeviceGetUuid' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetUuid(dev):
-    """ Return an UUID for the device.
-
-    Note there is a later version of this API,
-    :py:obj:`~.cuDeviceGetUuid_v2`. It will supplant this version in 12.0,
-    which is retained for minor version compatibility.
-
-    Returns 16-octets identifying the device `dev` in the structure pointed
-    by the `uuid`.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device to get identifier string for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    uuid : :py:obj:`~.CUuuid`
-        Returned UUID
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetUuid_v2` :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceProperties`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUuuid uuid = CUuuid()
-    err = cydriver.cuDeviceGetUuid(<cydriver.CUuuid*>uuid._ptr, cydev)
-    return (CUresult(err), uuid)
-{{endif}}
-
-{{if 'cuDeviceGetUuid_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetUuid_v2(dev):
-    """ Return an UUID for the device (11.4+)
-
-    Returns 16-octets identifying the device `dev` in the structure pointed
-    by the `uuid`. If the device is in MIG mode, returns its MIG UUID which
-    uniquely identifies the subscribed MIG compute instance.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device to get identifier string for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    uuid : :py:obj:`~.CUuuid`
-        Returned UUID
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cudaGetDeviceProperties`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUuuid uuid = CUuuid()
-    err = cydriver.cuDeviceGetUuid_v2(<cydriver.CUuuid*>uuid._ptr, cydev)
-    return (CUresult(err), uuid)
-{{endif}}
-
-{{if 'cuDeviceGetLuid' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetLuid(dev):
-    """ Return an LUID and device node mask for the device.
-
-    Return identifying information (`luid` and `deviceNodeMask`) to allow
-    matching device with graphics APIs.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device to get identifier string for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    luid : bytes
-        Returned LUID
-    deviceNodeMask : unsigned int
-        Returned device node mask
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceProperties`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef char luid[8]
-    cdef unsigned int deviceNodeMask = 0
-    err = cydriver.cuDeviceGetLuid(luid, &deviceNodeMask, cydev)
-    return (CUresult(err), <bytes>luid, deviceNodeMask)
-{{endif}}
-
-{{if 'cuDeviceTotalMem_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceTotalMem(dev):
-    """ Returns the total amount of memory on the device.
-
-    Returns in `*bytes` the total amount of memory available on the device
-    `dev` in bytes.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    numbytes : int
-        Returned memory available on device in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaMemGetInfo`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef size_t numbytes = 0
-    err = cydriver.cuDeviceTotalMem(&numbytes, cydev)
-    return (CUresult(err), numbytes)
-{{endif}}
-
-{{if 'cuDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetTexture1DLinearMaxWidth(pformat not None : CUarray_format, unsigned numChannels, dev):
-    """ Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size.
-
-    Returns in `maxWidthInElements` the maximum number of texture elements
-    allocatable in a 1D linear texture for given `pformat` and
-    `numChannels`.
-
-    Parameters
-    ----------
-    pformat : :py:obj:`~.CUarray_format`
-        Texture format.
-    numChannels : unsigned
-        Number of channels per texture element.
-    dev : :py:obj:`~.CUdevice`
-        Device handle.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    maxWidthInElements : int
-        Returned maximum number of texture elements allocatable for given
-        `pformat` and `numChannels`.
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cudaMemGetInfo`, :py:obj:`~.cuDeviceTotalMem`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef size_t maxWidthInElements = 0
-    cdef cydriver.CUarray_format cypformat = pformat.value
-    err = cydriver.cuDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cypformat, numChannels, cydev)
-    return (CUresult(err), maxWidthInElements)
-{{endif}}
-
-{{if 'cuDeviceGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetAttribute(attrib not None : CUdevice_attribute, dev):
-    """ Returns information about the device.
-
-    Returns in `*pi` the integer value of the attribute `attrib` on device
-    `dev`. The supported attributes are:
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK`: Maximum number
-      of threads per block;
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X`: Maximum x-dimension
-      of a block
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y`: Maximum y-dimension
-      of a block
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z`: Maximum z-dimension
-      of a block
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X`: Maximum x-dimension
-      of a grid
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y`: Maximum y-dimension
-      of a grid
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z`: Maximum z-dimension
-      of a grid
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK`: Maximum
-      amount of shared memory available to a thread block in bytes
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`: Memory
-      available on device for constant variables in a CUDA C kernel in
-      bytes
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_WARP_SIZE`: Warp size in threads
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`: Maximum pitch in bytes
-      allowed by the memory copy functions that involve memory regions
-      allocated through :py:obj:`~.cuMemAllocPitch()`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH`: Maximum 1D
-      texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH`:
-      Maximum width for a 1D texture bound to linear memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH`:
-      Maximum mipmapped 1D texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH`: Maximum 2D
-      texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT`: Maximum 2D
-      texture height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH`:
-      Maximum width for a 2D texture bound to linear memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT`:
-      Maximum height for a 2D texture bound to linear memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH`:
-      Maximum pitch in bytes for a 2D texture bound to linear memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH`:
-      Maximum mipmapped 2D texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT`:
-      Maximum mipmapped 2D texture height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH`: Maximum 3D
-      texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT`: Maximum 3D
-      texture height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH`: Maximum 3D
-      texture depth
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE`:
-      Alternate maximum 3D texture width, 0 if no alternate maximum 3D
-      texture size is supported
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE`:
-      Alternate maximum 3D texture height, 0 if no alternate maximum 3D
-      texture size is supported
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE`:
-      Alternate maximum 3D texture depth, 0 if no alternate maximum 3D
-      texture size is supported
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH`: Maximum
-      cubemap texture width or height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH`:
-      Maximum 1D layered texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS`:
-      Maximum layers in a 1D layered texture
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH`:
-      Maximum 2D layered texture width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT`:
-      Maximum 2D layered texture height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS`:
-      Maximum layers in a 2D layered texture
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH`:
-      Maximum cubemap layered texture width or height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS`:
-      Maximum layers in a cubemap layered texture
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH`: Maximum 1D
-      surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH`: Maximum 2D
-      surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT`: Maximum 2D
-      surface height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH`: Maximum 3D
-      surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT`: Maximum 3D
-      surface height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH`: Maximum 3D
-      surface depth
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH`:
-      Maximum 1D layered surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS`:
-      Maximum layers in a 1D layered surface
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH`:
-      Maximum 2D layered surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT`:
-      Maximum 2D layered surface height
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS`:
-      Maximum layers in a 2D layered surface
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH`: Maximum
-      cubemap surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH`:
-      Maximum cubemap layered surface width
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS`:
-      Maximum layers in a cubemap layered surface
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK`: Maximum
-      number of 32-bit registers available to a thread block
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CLOCK_RATE`: The typical clock
-      frequency in kilohertz
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`: Alignment
-      requirement; texture base addresses aligned to
-      :py:obj:`~.textureAlign` bytes do not need an offset applied to
-      texture fetches
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT`: Pitch
-      alignment requirement for 2D texture references bound to pitched
-      memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP`: 1 if the device can
-      concurrently copy memory between host and device while executing a
-      kernel, or 0 if not
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`: Number of
-      multiprocessors on the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT`: 1 if there is a
-      run time limit for kernels executed on the device, or 0 if not
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_INTEGRATED`: 1 if the device is
-      integrated with the memory subsystem, or 0 if not
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY`: 1 if the device
-      can map host memory into the CUDA address space, or 0 if not
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`: Compute mode that
-      device is currently in. Available modes are as follows:
-
-      - :py:obj:`~.CU_COMPUTEMODE_DEFAULT`: Default mode - Device is not
-        restricted and can have multiple CUDA contexts present at a single
-        time.
-
-      - :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`: Compute-prohibited mode -
-        Device is prohibited from creating new CUDA contexts.
-
-      - :py:obj:`~.CU_COMPUTEMODE_EXCLUSIVE_PROCESS`: Compute-exclusive-
-        process mode - Device can have only one context used by a single
-        process at a time.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS`: 1 if the device
-      supports executing multiple kernels within the same context
-      simultaneously, or 0 if not. It is not guaranteed that multiple
-      kernels will be resident on the device concurrently so this feature
-      should not be relied upon for correctness.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_ECC_ENABLED`: 1 if error correction is
-      enabled on the device, 0 if error correction is disabled or not
-      supported by the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID`: PCI bus identifier of the
-      device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID`: PCI device (also known
-      as slot) identifier of the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID`: PCI domain identifier
-      of the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_TCC_DRIVER`: 1 if the device is using
-      a TCC driver. TCC is only available on Tesla hardware running Windows
-      Vista or later
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE`: Peak memory clock
-      frequency in kilohertz
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH`: Global
-      memory bus width in bits
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE`: Size of L2 cache in
-      bytes. 0 if the device doesn't have L2 cache
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR`:
-      Maximum resident threads per multiprocessor
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`: 1 if the device
-      shares a unified address space with the host, or 0 if not
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`: Major
-      compute capability version number
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`: Minor
-      compute capability version number
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED`: 1 if
-      device supports caching globals in L1 cache, 0 if caching globals in
-      L1 cache is not supported by the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED`: 1 if device
-      supports caching locals in L1 cache, 0 if caching locals in L1 cache
-      is not supported by the device
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`:
-      Maximum amount of shared memory available to a multiprocessor in
-      bytes; this amount is shared by all thread blocks simultaneously
-      resident on a multiprocessor
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR`:
-      Maximum number of 32-bit registers available to a multiprocessor;
-      this number is shared by all thread blocks simultaneously resident on
-      a multiprocessor
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY`: 1 if device supports
-      allocating managed memory on this system, 0 if allocating managed
-      memory is not supported by the device on this system.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD`: 1 if device is on a
-      multi-GPU board, 0 if not.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID`: Unique
-      identifier for a group of devices associated with the same board.
-      Devices on the same multi-GPU board will share the same identifier.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED`: 1 if
-      Link between the device and the host supports native atomic
-      operations.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO`:
-      Ratio of single precision performance (in floating-point operations
-      per second) to double precision performance.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`: Device
-      supports coherently accessing pageable memory without calling
-      cudaHostRegister on it.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`: Device can
-      coherently access managed memory concurrently with the CPU.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED`: Device
-      supports Compute Preemption.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM`:
-      Device can access host registered memory at the same virtual address
-      as the CPU.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN`:
-      The maximum per block shared memory size supported on this device.
-      This is the maximum value that can be opted into when using the
-      :py:obj:`~.cuFuncSetAttribute()` or
-      :py:obj:`~.cuKernelSetAttribute()` call. For more details see
-      :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`:
-      Device accesses pageable memory via the host's page tables.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST`:
-      The host can directly access managed memory on the device without
-      migration.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED`:
-      Device supports virtual memory management APIs like
-      :py:obj:`~.cuMemAddressReserve`, :py:obj:`~.cuMemCreate`,
-      :py:obj:`~.cuMemMap` and related APIs
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED`:
-      Device supports exporting memory to a posix file descriptor with
-      :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-      :py:obj:`~.cuMemCreate`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED`:
-      Device supports exporting memory to a Win32 NT handle with
-      :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-      :py:obj:`~.cuMemCreate`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED`:
-      Device supports exporting memory to a Win32 KMT handle with
-      :py:obj:`~.cuMemExportToShareableHandle`, if requested via
-      :py:obj:`~.cuMemCreate`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR`:
-      Maximum number of thread blocks that can reside on a multiprocessor
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED`: Device
-      supports compressible memory allocation via :py:obj:`~.cuMemCreate`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE`: Maximum
-      L2 persisting lines capacity setting in bytes
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE`:
-      Maximum value of :py:obj:`~.CUaccessPolicyWindow.num_bytes`
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED`:
-      Device supports specifying the GPUDirect RDMA flag with
-      :py:obj:`~.cuMemCreate`.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK`:
-      Amount of shared memory per block reserved by CUDA driver in bytes
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED`: Device
-      supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED`:
-      Device supports using the :py:obj:`~.cuMemHostRegister` flag
-      :py:obj:`~.CU_MEMHOSTERGISTER_READ_ONLY` to register memory that must
-      be mapped as read-only to the GPU
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED`: Device
-      supports using the :py:obj:`~.cuMemAllocAsync` and
-      :py:obj:`~.cuMemPool` family of APIs
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED`: Device
-      supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
-      https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS`:
-      The returned attribute shall be interpreted as a bitmask, where the
-      individual bits are described by the
-      :py:obj:`~.CUflushGPUDirectRDMAWritesOptions` enum
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING`:
-      GPUDirect RDMA writes to the device do not need to be flushed for
-      consumers within the scope indicated by the returned attribute. See
-      :py:obj:`~.CUGPUDirectRDMAWritesOrdering` for the numerical values
-      returned here.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES`:
-      Bitmask of handle types supported with mempool based IPC
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED`:
-      Device supports deferred mapping CUDA arrays and CUDA mipmapped
-      arrays.
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG`: NUMA configuration of a
-      device: value is of type :py:obj:`~.CUdeviceNumaConfig` enum
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_NUMA_ID`: NUMA node ID of the GPU
-      memory
-
-    - :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED`: Device supports
-      switch multicast and reduction operations.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUdevice_attribute`
-        Device attribute to query
-    dev : :py:obj:`~.CUdevice`
-        Device handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    pi : int
-        Returned device attribute value
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaGetDeviceProperties`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef int pi = 0
-    cdef cydriver.CUdevice_attribute cyattrib = attrib.value
-    err = cydriver.cuDeviceGetAttribute(&pi, cyattrib, cydev)
-    return (CUresult(err), pi)
-{{endif}}
-
-{{if 'cuDeviceGetNvSciSyncAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, dev, int flags):
-    """ Return NvSciSync attributes that this device can support.
-
-    Returns in `nvSciSyncAttrList`, the properties of NvSciSync that this
-    CUDA device, `dev` can support. The returned `nvSciSyncAttrList` can be
-    used to create an NvSciSync object that matches this device's
-    capabilities.
-
-    If NvSciSyncAttrKey_RequiredPerm field in `nvSciSyncAttrList` is
-    already set this API will return :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-
-    The applications should set `nvSciSyncAttrList` to a valid
-    NvSciSyncAttrList failing which this API will return
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`.
-
-    The `flags` controls how applications intends to use the NvSciSync
-    created from the `nvSciSyncAttrList`. The valid flags are:
-
-    - :py:obj:`~.CUDA_NVSCISYNC_ATTR_SIGNAL`, specifies that the
-      applications intends to signal an NvSciSync on this CUDA device.
-
-    - :py:obj:`~.CUDA_NVSCISYNC_ATTR_WAIT`, specifies that the applications
-      intends to wait on an NvSciSync on this CUDA device.
-
-    At least one of these flags must be set, failing which the API returns
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`. Both the flags are orthogonal to
-    one another: a developer may set both these flags that allows to set
-    both wait and signal specific attributes in the same
-    `nvSciSyncAttrList`.
-
-    Note that this API updates the input `nvSciSyncAttrList` with values
-    equivalent to the following public attribute key-values:
-    NvSciSyncAttrKey_RequiredPerm is set to
-
-    - NvSciSyncAccessPerm_SignalOnly if
-      :py:obj:`~.CUDA_NVSCISYNC_ATTR_SIGNAL` is set in `flags`.
-
-    - NvSciSyncAccessPerm_WaitOnly if :py:obj:`~.CUDA_NVSCISYNC_ATTR_WAIT`
-      is set in `flags`.
-
-    - NvSciSyncAccessPerm_WaitSignal if both
-      :py:obj:`~.CUDA_NVSCISYNC_ATTR_WAIT` and
-      :py:obj:`~.CUDA_NVSCISYNC_ATTR_SIGNAL` are set in `flags`.
-      NvSciSyncAttrKey_PrimitiveInfo is set to
-
-    - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid `device`.
-
-    - NvSciSyncAttrValPrimitiveType_Syncpoint if `device` is a Tegra
-      device.
-
-    - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if `device`
-      is GA10X+. NvSciSyncAttrKey_GpuId is set to the same UUID that is
-      returned for this `device` from :py:obj:`~.cuDeviceGetUuid`.
-
-    :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`,
-    :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`,
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`,
-    :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`,
-    :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`,
-    :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    Parameters
-    ----------
-    nvSciSyncAttrList : Any
-        Return NvSciSync attributes supported.
-    dev : :py:obj:`~.CUdevice`
-        Valid Cuda Device to get NvSciSync attributes for.
-    flags : int
-        flags describing NvSciSync usage.
-
-    Returns
-    -------
-    CUresult
-
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cynvSciSyncAttrList = utils.HelperInputVoidPtr(nvSciSyncAttrList)
-    cdef void* cynvSciSyncAttrList_ptr = <void*><void_ptr>cynvSciSyncAttrList.cptr
-    err = cydriver.cuDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, cydev, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceSetMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceSetMemPool(dev, pool):
-    """ Sets the current memory pool of a device.
-
-    The memory pool must be local to the specified device.
-    :py:obj:`~.cuMemAllocAsync` allocates from the current mempool of the
-    provided stream's device. By default, a device's current memory pool is
-    its default memory pool.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        None
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        None
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolDestroy`, :py:obj:`~.cuMemAllocFromPoolAsync`
-
-    Notes
-    -----
-    Use :py:obj:`~.cuMemAllocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    err = cydriver.cuDeviceSetMemPool(cydev, cypool)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceGetMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetMemPool(dev):
-    """ Gets the current mempool for a device.
-
-    Returns the last pool provided to :py:obj:`~.cuDeviceSetMemPool` for
-    this device or the device's default memory pool if
-    :py:obj:`~.cuDeviceSetMemPool` has never been called. By default the
-    current mempool is the default mempool for a device. Otherwise the
-    returned pool must have been set with :py:obj:`~.cuDeviceSetMemPool`.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        None
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pool : :py:obj:`~.CUmemoryPool`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUmemoryPool pool = CUmemoryPool()
-    err = cydriver.cuDeviceGetMemPool(<cydriver.CUmemoryPool*>pool._ptr, cydev)
-    return (CUresult(err), pool)
-{{endif}}
-
-{{if 'cuDeviceGetDefaultMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetDefaultMemPool(dev):
-    """ Returns the default mempool of a device.
-
-    The default mempool of a device contains device memory from that
-    device.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        None
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pool_out : :py:obj:`~.CUmemoryPool`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUmemoryPool pool_out = CUmemoryPool()
-    err = cydriver.cuDeviceGetDefaultMemPool(<cydriver.CUmemoryPool*>pool_out._ptr, cydev)
-    return (CUresult(err), pool_out)
-{{endif}}
-
-{{if 'cuDeviceGetExecAffinitySupport' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetExecAffinitySupport(typename not None : CUexecAffinityType, dev):
-    """ Returns information about the execution affinity support of the device.
-
-    Returns in `*pi` whether execution affinity type `typename` is
-    supported by device `dev`. The supported types are:
-
-    - :py:obj:`~.CU_EXEC_AFFINITY_TYPE_SM_COUNT`: 1 if context with limited
-      SMs is supported by the device, or 0 if not;
-
-    Parameters
-    ----------
-    typename : :py:obj:`~.CUexecAffinityType`
-        Execution affinity type to query
-    dev : :py:obj:`~.CUdevice`
-        Device handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    pi : int
-        1 if the execution affinity type `typename` is supported by the
-        device, or 0 if not
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef int pi = 0
-    cdef cydriver.CUexecAffinityType cytypename = typename.value
-    err = cydriver.cuDeviceGetExecAffinitySupport(&pi, cytypename, cydev)
-    return (CUresult(err), pi)
-{{endif}}
-
-{{if 'cuFlushGPUDirectRDMAWrites' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFlushGPUDirectRDMAWrites(target not None : CUflushGPUDirectRDMAWritesTarget, scope not None : CUflushGPUDirectRDMAWritesScope):
-    """ Blocks until remote writes are visible to the specified scope.
-
-    Blocks until GPUDirect RDMA writes to the target context via mappings
-    created through APIs like nvidia_p2p_get_pages (see
-    https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are
-    visible to the specified scope.
-
-    If the scope equals or lies within the scope indicated by
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING`, the
-    call will be a no-op and can be safely omitted for performance. This
-    can be determined by comparing the numerical values between the two
-    enums, with smaller scopes having smaller values.
-
-    Users may query support for this API via
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_FLUSH_FLUSH_GPU_DIRECT_RDMA_OPTIONS`.
-
-    Parameters
-    ----------
-    target : :py:obj:`~.CUflushGPUDirectRDMAWritesTarget`
-        The target of the operation, see
-        :py:obj:`~.CUflushGPUDirectRDMAWritesTarget`
-    scope : :py:obj:`~.CUflushGPUDirectRDMAWritesScope`
-        The scope of the operation, see
-        :py:obj:`~.CUflushGPUDirectRDMAWritesScope`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    """
-    cdef cydriver.CUflushGPUDirectRDMAWritesTarget cytarget = target.value
-    cdef cydriver.CUflushGPUDirectRDMAWritesScope cyscope = scope.value
-    err = cydriver.cuFlushGPUDirectRDMAWrites(cytarget, cyscope)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceGetProperties' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetProperties(dev):
-    """ Returns properties for a selected device.
-
-    [Deprecated]
-
-    This function was deprecated as of CUDA 5.0 and replaced by
-    :py:obj:`~.cuDeviceGetAttribute()`.
-
-    Returns in `*prop` the properties of device `dev`. The
-    :py:obj:`~.CUdevprop` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.maxThreadsPerBlock` is the maximum number of threads per
-      block;
-
-    - :py:obj:`~.maxThreadsDim`[3] is the maximum sizes of each dimension
-      of a block;
-
-    - :py:obj:`~.maxGridSize`[3] is the maximum sizes of each dimension of
-      a grid;
-
-    - :py:obj:`~.sharedMemPerBlock` is the total amount of shared memory
-      available per block in bytes;
-
-    - :py:obj:`~.totalConstantMemory` is the total amount of constant
-      memory available on the device in bytes;
-
-    - :py:obj:`~.SIMDWidth` is the warp size;
-
-    - :py:obj:`~.memPitch` is the maximum pitch allowed by the memory copy
-      functions that involve memory regions allocated through
-      :py:obj:`~.cuMemAllocPitch()`;
-
-    - :py:obj:`~.regsPerBlock` is the total number of registers available
-      per block;
-
-    - :py:obj:`~.clockRate` is the clock frequency in kilohertz;
-
-    - :py:obj:`~.textureAlign` is the alignment requirement; texture base
-      addresses that are aligned to :py:obj:`~.textureAlign` bytes do not
-      need an offset applied to texture fetches.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device to get properties for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    prop : :py:obj:`~.CUdevprop`
-        Returned properties of device
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUdevprop prop = CUdevprop()
-    err = cydriver.cuDeviceGetProperties(<cydriver.CUdevprop*>prop._ptr, cydev)
-    return (CUresult(err), prop)
-{{endif}}
-
-{{if 'cuDeviceComputeCapability' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceComputeCapability(dev):
-    """ Returns the compute capability of the device.
-
-    [Deprecated]
-
-    This function was deprecated as of CUDA 5.0 and its functionality
-    superseded by :py:obj:`~.cuDeviceGetAttribute()`.
-
-    Returns in `*major` and `*minor` the major and minor revision numbers
-    that define the compute capability of the device `dev`.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    major : int
-        Major revision number
-    minor : int
-        Minor revision number
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef int major = 0
-    cdef int minor = 0
-    err = cydriver.cuDeviceComputeCapability(&major, &minor, cydev)
-    return (CUresult(err), major, minor)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRetain' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevicePrimaryCtxRetain(dev):
-    """ Retain the primary context on the GPU.
-
-    Retains the primary context on the device. Once the user successfully
-    retains the primary context, the primary context will be active and
-    available to the user until the user releases it with
-    :py:obj:`~.cuDevicePrimaryCtxRelease()` or resets it with
-    :py:obj:`~.cuDevicePrimaryCtxReset()`. Unlike :py:obj:`~.cuCtxCreate()`
-    the newly retained context is not pushed onto the stack.
-
-    Retaining the primary context for the first time will fail with
-    :py:obj:`~.CUDA_ERROR_UNKNOWN` if the compute mode of the device is
-    :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`. The function
-    :py:obj:`~.cuDeviceGetAttribute()` can be used with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE` to determine the compute
-    mode of the device. The `nvidia-smi` tool can be used to set the
-    compute mode for devices. Documentation for `nvidia-smi` can be
-    obtained by passing a -h option to it.
-
-    Please note that the primary context always supports pinned
-    allocations. Other flags can be specified by
-    :py:obj:`~.cuDevicePrimaryCtxSetFlags()`.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device for which primary context is requested
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle of the new context
-
-    See Also
-    --------
-    :py:obj:`~.cuDevicePrimaryCtxRelease`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuDevicePrimaryCtxRetain(<cydriver.CUcontext*>pctx._ptr, cydev)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxRelease_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevicePrimaryCtxRelease(dev):
-    """ Release the primary context on the GPU.
-
-    Releases the primary context interop on the device. A retained context
-    should always be released once the user is done using it. The context
-    is automatically reset once the last reference to it is released. This
-    behavior is different when the primary context was retained by the CUDA
-    runtime from CUDA 4.0 and earlier. In this case, the primary context
-    remains always active.
-
-    Releasing a primary context that has not been previously retained will
-    fail with :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`.
-
-    Please note that unlike :py:obj:`~.cuCtxDestroy()` this method does not
-    pop the context from stack in any circumstances.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device which primary context is released
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuDevicePrimaryCtxRetain`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    err = cydriver.cuDevicePrimaryCtxRelease(cydev)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxSetFlags_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevicePrimaryCtxSetFlags(dev, unsigned int flags):
-    """ Set flags for the primary context.
-
-    Sets the flags for the primary context on the device overwriting
-    perviously set ones.
-
-    The three LSBs of the `flags` parameter can be used to control how the
-    OS thread, which owns the CUDA context at the time of an API call,
-    interacts with the OS scheduler when waiting for results from the GPU.
-    Only one of the scheduling flags can be set when creating a context.
-
-    - :py:obj:`~.CU_CTX_SCHED_SPIN`: Instruct CUDA to actively spin when
-      waiting for results from the GPU. This can decrease latency when
-      waiting for the GPU, but may lower the performance of CPU threads if
-      they are performing work in parallel with the CUDA thread.
-
-    - :py:obj:`~.CU_CTX_SCHED_YIELD`: Instruct CUDA to yield its thread
-      when waiting for results from the GPU. This can increase latency when
-      waiting for the GPU, but can increase the performance of CPU threads
-      performing work in parallel with the GPU.
-
-    - :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`: Instruct CUDA to block the
-      CPU thread on a synchronization primitive when waiting for the GPU to
-      finish work.
-
-    - :py:obj:`~.CU_CTX_BLOCKING_SYNC`: Instruct CUDA to block the CPU
-      thread on a synchronization primitive when waiting for the GPU to
-      finish work.   Deprecated: This flag was deprecated as of CUDA 4.0
-      and was replaced with :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`.
-
-    - :py:obj:`~.CU_CTX_SCHED_AUTO`: The default value if the `flags`
-      parameter is zero, uses a heuristic based on the number of active
-      CUDA contexts in the process `C` and the number of logical processors
-      in the system `P`. If `C` > `P`, then CUDA will yield to other OS
-      threads when waiting for the GPU (:py:obj:`~.CU_CTX_SCHED_YIELD`),
-      otherwise CUDA will not yield while waiting for results and actively
-      spin on the processor (:py:obj:`~.CU_CTX_SCHED_SPIN`). Additionally,
-      on Tegra devices, :py:obj:`~.CU_CTX_SCHED_AUTO` uses a heuristic
-      based on the power profile of the platform and may choose
-      :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` for low-powered devices.
-
-    - :py:obj:`~.CU_CTX_LMEM_RESIZE_TO_MAX`: Instruct CUDA to not reduce
-      local memory after resizing local memory for a kernel. This can
-      prevent thrashing by local memory allocations when launching many
-      kernels with high local memory usage at the cost of potentially
-      increased memory usage.   Deprecated: This flag is deprecated and the
-      behavior enabled by this flag is now the default and cannot be
-      disabled.
-
-    - :py:obj:`~.CU_CTX_COREDUMP_ENABLE`: If GPU coredumps have not been
-      enabled globally with :py:obj:`~.cuCoredumpSetAttributeGlobal` or
-      environment variables, this flag can be set during context creation
-      to instruct CUDA to create a coredump if this context raises an
-      exception during execution. These environment variables are described
-      in the CUDA-GDB user guide under the "GPU core dump support" section.
-      The initial settings will be taken from the global settings at the
-      time of context creation. The other settings that control coredump
-      output can be modified by calling :py:obj:`~.cuCoredumpSetAttribute`
-      from the created context after it becomes current.
-
-    - :py:obj:`~.CU_CTX_USER_COREDUMP_ENABLE`: If user-triggered GPU
-      coredumps have not been enabled globally with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` or environment variables,
-      this flag can be set during context creation to instruct CUDA to
-      create a coredump if data is written to a certain pipe that is
-      present in the OS space. These environment variables are described in
-      the CUDA-GDB user guide under the "GPU core dump support" section. It
-      is important to note that the pipe name `must` be set with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` before creating the context
-      if this flag is used. Setting this flag implies that
-      :py:obj:`~.CU_CTX_COREDUMP_ENABLE` is set. The initial settings will
-      be taken from the global settings at the time of context creation.
-      The other settings that control coredump output can be modified by
-      calling :py:obj:`~.cuCoredumpSetAttribute` from the created context
-      after it becomes current.
-
-    - :py:obj:`~.CU_CTX_SYNC_MEMOPS`: Ensures that synchronous memory
-      operations initiated on this context will always synchronize. See
-      further documentation in the section titled "API Synchronization
-      behavior" to learn more about cases when synchronous memory
-      operations can exhibit asynchronous behavior.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device for which the primary context flags are set
-    flags : unsigned int
-        New flags for the device
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuDevicePrimaryCtxRetain`, :py:obj:`~.cuDevicePrimaryCtxGetState`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxSetFlags`, :py:obj:`~.cudaSetDeviceFlags`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    err = cydriver.cuDevicePrimaryCtxSetFlags(cydev, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxGetState' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevicePrimaryCtxGetState(dev):
-    """ Get the state of the primary context.
-
-    Returns in `*flags` the flags for the primary context of `dev`, and in
-    `*active` whether it is active. See
-    :py:obj:`~.cuDevicePrimaryCtxSetFlags` for flag values.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device to get primary context flags for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    flags : unsigned int
-        Pointer to store flags
-    active : int
-        Pointer to store context state; 0 = inactive, 1 = active
-
-    See Also
-    --------
-    :py:obj:`~.cuDevicePrimaryCtxSetFlags`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxSetFlags`, :py:obj:`~.cudaGetDeviceFlags`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef unsigned int flags = 0
-    cdef int active = 0
-    err = cydriver.cuDevicePrimaryCtxGetState(cydev, &flags, &active)
-    return (CUresult(err), flags, active)
-{{endif}}
-
-{{if 'cuDevicePrimaryCtxReset_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevicePrimaryCtxReset(dev):
-    """ Destroy all allocations and reset all state on the primary context.
-
-    Explicitly destroys and cleans up all resources associated with the
-    current device in the current process.
-
-    Note that it is responsibility of the calling function to ensure that
-    no other module in the process is using the device any more. For that
-    reason it is recommended to use :py:obj:`~.cuDevicePrimaryCtxRelease()`
-    in most cases. However it is safe for other modules to call
-    :py:obj:`~.cuDevicePrimaryCtxRelease()` even after resetting the
-    device. Resetting the primary context does not release it, an
-    application that has retained the primary context should explicitly
-    release its usage.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device for which primary context is destroyed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE`
-
-    See Also
-    --------
-    :py:obj:`~.cuDevicePrimaryCtxRetain`, :py:obj:`~.cuDevicePrimaryCtxRelease`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceReset`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    err = cydriver.cuDevicePrimaryCtxReset(cydev)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxCreate_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxCreate(unsigned int flags, dev):
-    """ Create a CUDA context.
-
-    Creates a new CUDA context and associates it with the calling thread.
-    The `flags` parameter is described below. The context is created with a
-    usage count of 1 and the caller of :py:obj:`~.cuCtxCreate()` must call
-    :py:obj:`~.cuCtxDestroy()` when done using the context. If a context is
-    already current to the thread, it is supplanted by the newly created
-    context and may be restored by a subsequent call to
-    :py:obj:`~.cuCtxPopCurrent()`.
-
-    The three LSBs of the `flags` parameter can be used to control how the
-    OS thread, which owns the CUDA context at the time of an API call,
-    interacts with the OS scheduler when waiting for results from the GPU.
-    Only one of the scheduling flags can be set when creating a context.
-
-    - :py:obj:`~.CU_CTX_SCHED_SPIN`: Instruct CUDA to actively spin when
-      waiting for results from the GPU. This can decrease latency when
-      waiting for the GPU, but may lower the performance of CPU threads if
-      they are performing work in parallel with the CUDA thread.
-
-    - :py:obj:`~.CU_CTX_SCHED_YIELD`: Instruct CUDA to yield its thread
-      when waiting for results from the GPU. This can increase latency when
-      waiting for the GPU, but can increase the performance of CPU threads
-      performing work in parallel with the GPU.
-
-    - :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`: Instruct CUDA to block the
-      CPU thread on a synchronization primitive when waiting for the GPU to
-      finish work.
-
-    - :py:obj:`~.CU_CTX_BLOCKING_SYNC`: Instruct CUDA to block the CPU
-      thread on a synchronization primitive when waiting for the GPU to
-      finish work.   Deprecated: This flag was deprecated as of CUDA 4.0
-      and was replaced with :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`.
-
-    - :py:obj:`~.CU_CTX_SCHED_AUTO`: The default value if the `flags`
-      parameter is zero, uses a heuristic based on the number of active
-      CUDA contexts in the process `C` and the number of logical processors
-      in the system `P`. If `C` > `P`, then CUDA will yield to other OS
-      threads when waiting for the GPU (:py:obj:`~.CU_CTX_SCHED_YIELD`),
-      otherwise CUDA will not yield while waiting for results and actively
-      spin on the processor (:py:obj:`~.CU_CTX_SCHED_SPIN`). Additionally,
-      on Tegra devices, :py:obj:`~.CU_CTX_SCHED_AUTO` uses a heuristic
-      based on the power profile of the platform and may choose
-      :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` for low-powered devices.
-
-    - :py:obj:`~.CU_CTX_MAP_HOST`: Instruct CUDA to support mapped pinned
-      allocations. This flag must be set in order to allocate pinned host
-      memory that is accessible to the GPU.
-
-    - :py:obj:`~.CU_CTX_LMEM_RESIZE_TO_MAX`: Instruct CUDA to not reduce
-      local memory after resizing local memory for a kernel. This can
-      prevent thrashing by local memory allocations when launching many
-      kernels with high local memory usage at the cost of potentially
-      increased memory usage.   Deprecated: This flag is deprecated and the
-      behavior enabled by this flag is now the default and cannot be
-      disabled. Instead, the per-thread stack size can be controlled with
-      :py:obj:`~.cuCtxSetLimit()`.
-
-    - :py:obj:`~.CU_CTX_COREDUMP_ENABLE`: If GPU coredumps have not been
-      enabled globally with :py:obj:`~.cuCoredumpSetAttributeGlobal` or
-      environment variables, this flag can be set during context creation
-      to instruct CUDA to create a coredump if this context raises an
-      exception during execution. These environment variables are described
-      in the CUDA-GDB user guide under the "GPU core dump support" section.
-      The initial attributes will be taken from the global attributes at
-      the time of context creation. The other attributes that control
-      coredump output can be modified by calling
-      :py:obj:`~.cuCoredumpSetAttribute` from the created context after it
-      becomes current.
-
-    - :py:obj:`~.CU_CTX_USER_COREDUMP_ENABLE`: If user-triggered GPU
-      coredumps have not been enabled globally with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` or environment variables,
-      this flag can be set during context creation to instruct CUDA to
-      create a coredump if data is written to a certain pipe that is
-      present in the OS space. These environment variables are described in
-      the CUDA-GDB user guide under the "GPU core dump support" section. It
-      is important to note that the pipe name `must` be set with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` before creating the context
-      if this flag is used. Setting this flag implies that
-      :py:obj:`~.CU_CTX_COREDUMP_ENABLE` is set. The initial attributes
-      will be taken from the global attributes at the time of context
-      creation. The other attributes that control coredump output can be
-      modified by calling :py:obj:`~.cuCoredumpSetAttribute` from the
-      created context after it becomes current. Setting this flag on any
-      context creation is equivalent to setting the
-      :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` attribute to `true`
-      globally.
-
-    - :py:obj:`~.CU_CTX_SYNC_MEMOPS`: Ensures that synchronous memory
-      operations initiated on this context will always synchronize. See
-      further documentation in the section titled "API Synchronization
-      behavior" to learn more about cases when synchronous memory
-      operations can exhibit asynchronous behavior.
-
-    Context creation will fail with :py:obj:`~.CUDA_ERROR_UNKNOWN` if the
-    compute mode of the device is :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`.
-    The function :py:obj:`~.cuDeviceGetAttribute()` can be used with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE` to determine the compute
-    mode of the device. The `nvidia-smi` tool can be used to set the
-    compute mode for * devices. Documentation for `nvidia-smi` can be
-    obtained by passing a -h option to it.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Context creation flags
-    dev : :py:obj:`~.CUdevice`
-        Device to create context on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle of the new context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCoredumpSetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCtxSynchronize`
-
-    Notes
-    -----
-    In most cases it is recommended to use :py:obj:`~.cuDevicePrimaryCtxRetain`.
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuCtxCreate(<cydriver.CUcontext*>pctx._ptr, flags, cydev)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxCreate_v3' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxCreate_v3(paramsArray : Optional[Tuple[CUexecAffinityParam] | List[CUexecAffinityParam]], int numParams, unsigned int flags, dev):
-    """ Create a CUDA context with execution affinity.
-
-    Creates a new CUDA context with execution affinity and associates it
-    with the calling thread. The `paramsArray` and `flags` parameter are
-    described below. The context is created with a usage count of 1 and the
-    caller of :py:obj:`~.cuCtxCreate()` must call
-    :py:obj:`~.cuCtxDestroy()` when done using the context. If a context is
-    already current to the thread, it is supplanted by the newly created
-    context and may be restored by a subsequent call to
-    :py:obj:`~.cuCtxPopCurrent()`.
-
-    The type and the amount of execution resource the context can use is
-    limited by `paramsArray` and `numParams`. The `paramsArray` is an array
-    of `CUexecAffinityParam` and the `numParams` describes the size of the
-    array. If two `CUexecAffinityParam` in the array have the same type,
-    the latter execution affinity parameter overrides the former execution
-    affinity parameter. The supported execution affinity types are:
-
-    - :py:obj:`~.CU_EXEC_AFFINITY_TYPE_SM_COUNT` limits the portion of SMs
-      that the context can use. The portion of SMs is specified as the
-      number of SMs via `CUexecAffinitySmCount`. This limit will be
-      internally rounded up to the next hardware-supported amount. Hence,
-      it is imperative to query the actual execution affinity of the
-      context via `cuCtxGetExecAffinity` after context creation. Currently,
-      this attribute is only supported under Volta+ MPS.
-
-    The three LSBs of the `flags` parameter can be used to control how the
-    OS thread, which owns the CUDA context at the time of an API call,
-    interacts with the OS scheduler when waiting for results from the GPU.
-    Only one of the scheduling flags can be set when creating a context.
-
-    - :py:obj:`~.CU_CTX_SCHED_SPIN`: Instruct CUDA to actively spin when
-      waiting for results from the GPU. This can decrease latency when
-      waiting for the GPU, but may lower the performance of CPU threads if
-      they are performing work in parallel with the CUDA thread.
-
-    - :py:obj:`~.CU_CTX_SCHED_YIELD`: Instruct CUDA to yield its thread
-      when waiting for results from the GPU. This can increase latency when
-      waiting for the GPU, but can increase the performance of CPU threads
-      performing work in parallel with the GPU.
-
-    - :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`: Instruct CUDA to block the
-      CPU thread on a synchronization primitive when waiting for the GPU to
-      finish work.
-
-    - :py:obj:`~.CU_CTX_BLOCKING_SYNC`: Instruct CUDA to block the CPU
-      thread on a synchronization primitive when waiting for the GPU to
-      finish work.   Deprecated: This flag was deprecated as of CUDA 4.0
-      and was replaced with :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`.
-
-    - :py:obj:`~.CU_CTX_SCHED_AUTO`: The default value if the `flags`
-      parameter is zero, uses a heuristic based on the number of active
-      CUDA contexts in the process `C` and the number of logical processors
-      in the system `P`. If `C` > `P`, then CUDA will yield to other OS
-      threads when waiting for the GPU (:py:obj:`~.CU_CTX_SCHED_YIELD`),
-      otherwise CUDA will not yield while waiting for results and actively
-      spin on the processor (:py:obj:`~.CU_CTX_SCHED_SPIN`). Additionally,
-      on Tegra devices, :py:obj:`~.CU_CTX_SCHED_AUTO` uses a heuristic
-      based on the power profile of the platform and may choose
-      :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` for low-powered devices.
-
-    - :py:obj:`~.CU_CTX_MAP_HOST`: Instruct CUDA to support mapped pinned
-      allocations. This flag must be set in order to allocate pinned host
-      memory that is accessible to the GPU.
-
-    - :py:obj:`~.CU_CTX_LMEM_RESIZE_TO_MAX`: Instruct CUDA to not reduce
-      local memory after resizing local memory for a kernel. This can
-      prevent thrashing by local memory allocations when launching many
-      kernels with high local memory usage at the cost of potentially
-      increased memory usage.   Deprecated: This flag is deprecated and the
-      behavior enabled by this flag is now the default and cannot be
-      disabled. Instead, the per-thread stack size can be controlled with
-      :py:obj:`~.cuCtxSetLimit()`.
-
-    - :py:obj:`~.CU_CTX_COREDUMP_ENABLE`: If GPU coredumps have not been
-      enabled globally with :py:obj:`~.cuCoredumpSetAttributeGlobal` or
-      environment variables, this flag can be set during context creation
-      to instruct CUDA to create a coredump if this context raises an
-      exception during execution. These environment variables are described
-      in the CUDA-GDB user guide under the "GPU core dump support" section.
-      The initial attributes will be taken from the global attributes at
-      the time of context creation. The other attributes that control
-      coredump output can be modified by calling
-      :py:obj:`~.cuCoredumpSetAttribute` from the created context after it
-      becomes current.
-
-    - :py:obj:`~.CU_CTX_USER_COREDUMP_ENABLE`: If user-triggered GPU
-      coredumps have not been enabled globally with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` or environment variables,
-      this flag can be set during context creation to instruct CUDA to
-      create a coredump if data is written to a certain pipe that is
-      present in the OS space. These environment variables are described in
-      the CUDA-GDB user guide under the "GPU core dump support" section. It
-      is important to note that the pipe name `must` be set with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` before creating the context
-      if this flag is used. Setting this flag implies that
-      :py:obj:`~.CU_CTX_COREDUMP_ENABLE` is set. The initial attributes
-      will be taken from the global attributes at the time of context
-      creation. The other attributes that control coredump output can be
-      modified by calling :py:obj:`~.cuCoredumpSetAttribute` from the
-      created context after it becomes current. Setting this flag on any
-      context creation is equivalent to setting the
-      :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` attribute to `true`
-      globally.
-
-    Context creation will fail with :py:obj:`~.CUDA_ERROR_UNKNOWN` if the
-    compute mode of the device is :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`.
-    The function :py:obj:`~.cuDeviceGetAttribute()` can be used with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE` to determine the compute
-    mode of the device. The `nvidia-smi` tool can be used to set the
-    compute mode for * devices. Documentation for `nvidia-smi` can be
-    obtained by passing a -h option to it.
-
-    Parameters
-    ----------
-    paramsArray : List[:py:obj:`~.CUexecAffinityParam`]
-        Execution affinity parameters
-    numParams : int
-        Number of execution affinity parameters
-    flags : unsigned int
-        Context creation flags
-    dev : :py:obj:`~.CUdevice`
-        Device to create context on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle of the new context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCoredumpSetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.CUexecAffinityParam`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    paramsArray = [] if paramsArray is None else paramsArray
-    if not all(isinstance(_x, (CUexecAffinityParam,)) for _x in paramsArray):
-        raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cydriver.CUexecAffinityParam,] or List[cydriver.CUexecAffinityParam,]")
-    cdef CUcontext pctx = CUcontext()
-    cdef cydriver.CUexecAffinityParam* cyparamsArray = NULL
-    if len(paramsArray) > 0:
-        cyparamsArray = <cydriver.CUexecAffinityParam*> calloc(len(paramsArray), sizeof(cydriver.CUexecAffinityParam))
-        if cyparamsArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUexecAffinityParam)))
-        for idx in range(len(paramsArray)):
-            string.memcpy(&cyparamsArray[idx], (<CUexecAffinityParam>paramsArray[idx])._ptr, sizeof(cydriver.CUexecAffinityParam))
-    err = cydriver.cuCtxCreate_v3(<cydriver.CUcontext*>pctx._ptr, (<CUexecAffinityParam>paramsArray[0])._ptr if len(paramsArray) == 1 else cyparamsArray, numParams, flags, cydev)
-    if cyparamsArray is not NULL:
-        free(cyparamsArray)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxCreate_v4' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxCreate_v4(ctxCreateParams : Optional[CUctxCreateParams], unsigned int flags, dev):
-    """ Create a CUDA context.
-
-    Creates a new CUDA context and associates it with the calling thread.
-    The `flags` parameter is described below. The context is created with a
-    usage count of 1 and the caller of :py:obj:`~.cuCtxCreate()` must call
-    :py:obj:`~.cuCtxDestroy()` when done using the context. If a context is
-    already current to the thread, it is supplanted by the newly created
-    context and may be restored by a subsequent call to
-    :py:obj:`~.cuCtxPopCurrent()`.
-
-    CUDA context can be created with execution affinity. The type and the
-    amount of execution resource the context can use is limited by
-    `paramsArray` and `numExecAffinityParams` in `execAffinity`. The
-    `paramsArray` is an array of `CUexecAffinityParam` and the
-    `numExecAffinityParams` describes the size of the paramsArray. If two
-    `CUexecAffinityParam` in the array have the same type, the latter
-    execution affinity parameter overrides the former execution affinity
-    parameter. The supported execution affinity types are:
-
-    - :py:obj:`~.CU_EXEC_AFFINITY_TYPE_SM_COUNT` limits the portion of SMs
-      that the context can use. The portion of SMs is specified as the
-      number of SMs via `CUexecAffinitySmCount`. This limit will be
-      internally rounded up to the next hardware-supported amount. Hence,
-      it is imperative to query the actual execution affinity of the
-      context via `cuCtxGetExecAffinity` after context creation. Currently,
-      this attribute is only supported under Volta+ MPS.
-
-    CUDA context can be created in CIG(CUDA in Graphics) mode by setting /p
-    cigParams. Hardware support and software support for graphics clients
-    can be determined using :py:obj:`~.cuDeviceGetAttribute()` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED`. Data from graphics
-    client is shared with CUDA via the /p sharedData in /pcigParams. For
-    D3D12, /p sharedData is a ID3D12CommandQueue handle.
-
-    Either /p execAffinityParams or /p cigParams can be set to a non-null
-    value. Setting both to a non-null value will result in an undefined
-    behavior.
-
-    The three LSBs of the `flags` parameter can be used to control how the
-    OS thread, which owns the CUDA context at the time of an API call,
-    interacts with the OS scheduler when waiting for results from the GPU.
-    Only one of the scheduling flags can be set when creating a context.
-
-    - :py:obj:`~.CU_CTX_SCHED_SPIN`: Instruct CUDA to actively spin when
-      waiting for results from the GPU. This can decrease latency when
-      waiting for the GPU, but may lower the performance of CPU threads if
-      they are performing work in parallel with the CUDA thread.
-
-    - :py:obj:`~.CU_CTX_SCHED_YIELD`: Instruct CUDA to yield its thread
-      when waiting for results from the GPU. This can increase latency when
-      waiting for the GPU, but can increase the performance of CPU threads
-      performing work in parallel with the GPU.
-
-    - :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`: Instruct CUDA to block the
-      CPU thread on a synchronization primitive when waiting for the GPU to
-      finish work.
-
-    - :py:obj:`~.CU_CTX_BLOCKING_SYNC`: Instruct CUDA to block the CPU
-      thread on a synchronization primitive when waiting for the GPU to
-      finish work.   Deprecated: This flag was deprecated as of CUDA 4.0
-      and was replaced with :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC`.
-
-    - :py:obj:`~.CU_CTX_SCHED_AUTO`: The default value if the `flags`
-      parameter is zero, uses a heuristic based on the number of active
-      CUDA contexts in the process `C` and the number of logical processors
-      in the system `P`. If `C` > `P`, then CUDA will yield to other OS
-      threads when waiting for the GPU (:py:obj:`~.CU_CTX_SCHED_YIELD`),
-      otherwise CUDA will not yield while waiting for results and actively
-      spin on the processor (:py:obj:`~.CU_CTX_SCHED_SPIN`). Additionally,
-      on Tegra devices, :py:obj:`~.CU_CTX_SCHED_AUTO` uses a heuristic
-      based on the power profile of the platform and may choose
-      :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` for low-powered devices.
-
-    - :py:obj:`~.CU_CTX_MAP_HOST`: Instruct CUDA to support mapped pinned
-      allocations. This flag must be set in order to allocate pinned host
-      memory that is accessible to the GPU.
-
-    - :py:obj:`~.CU_CTX_LMEM_RESIZE_TO_MAX`: Instruct CUDA to not reduce
-      local memory after resizing local memory for a kernel. This can
-      prevent thrashing by local memory allocations when launching many
-      kernels with high local memory usage at the cost of potentially
-      increased memory usage.   Deprecated: This flag is deprecated and the
-      behavior enabled by this flag is now the default and cannot be
-      disabled. Instead, the per-thread stack size can be controlled with
-      :py:obj:`~.cuCtxSetLimit()`.
-
-    - :py:obj:`~.CU_CTX_COREDUMP_ENABLE`: If GPU coredumps have not been
-      enabled globally with :py:obj:`~.cuCoredumpSetAttributeGlobal` or
-      environment variables, this flag can be set during context creation
-      to instruct CUDA to create a coredump if this context raises an
-      exception during execution. These environment variables are described
-      in the CUDA-GDB user guide under the "GPU core dump support" section.
-      The initial attributes will be taken from the global attributes at
-      the time of context creation. The other attributes that control
-      coredump output can be modified by calling
-      :py:obj:`~.cuCoredumpSetAttribute` from the created context after it
-      becomes current. This flag is not supported when CUDA context is
-      created in CIG(CUDA in Graphics) mode.
-
-    - :py:obj:`~.CU_CTX_USER_COREDUMP_ENABLE`: If user-triggered GPU
-      coredumps have not been enabled globally with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` or environment variables,
-      this flag can be set during context creation to instruct CUDA to
-      create a coredump if data is written to a certain pipe that is
-      present in the OS space. These environment variables are described in
-      the CUDA-GDB user guide under the "GPU core dump support" section. It
-      is important to note that the pipe name `must` be set with
-      :py:obj:`~.cuCoredumpSetAttributeGlobal` before creating the context
-      if this flag is used. Setting this flag implies that
-      :py:obj:`~.CU_CTX_COREDUMP_ENABLE` is set. The initial attributes
-      will be taken from the global attributes at the time of context
-      creation. The other attributes that control coredump output can be
-      modified by calling :py:obj:`~.cuCoredumpSetAttribute` from the
-      created context after it becomes current. Setting this flag on any
-      context creation is equivalent to setting the
-      :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` attribute to `true`
-      globally. This flag is not supported when CUDA context is created in
-      CIG(CUDA in Graphics) mode.
-
-    - :py:obj:`~.CU_CTX_SYNC_MEMOPS`: Ensures that synchronous memory
-      operations initiated on this context will always synchronize. See
-      further documentation in the section titled "API Synchronization
-      behavior" to learn more about cases when synchronous memory
-      operations can exhibit asynchronous behavior.
-
-    Context creation will fail with :py:obj:`~.CUDA_ERROR_UNKNOWN` if the
-    compute mode of the device is :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`.
-    The function :py:obj:`~.cuDeviceGetAttribute()` can be used with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE` to determine the compute
-    mode of the device. The `nvidia-smi` tool can be used to set the
-    compute mode for * devices. Documentation for `nvidia-smi` can be
-    obtained by passing a -h option to it.
-
-    Context creation will fail with :: CUDA_ERROR_INVALID_VALUE if invalid
-    parameter was passed by client to create the CUDA context.
-
-    Context creation in CIG mode will fail with
-    :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` if CIG is not supported by the
-    device or the driver.
-
-    Parameters
-    ----------
-    ctxCreateParams : :py:obj:`~.CUctxCreateParams`
-        Context creation parameters
-    flags : unsigned int
-        Context creation flags
-    dev : :py:obj:`~.CUdevice`
-        Device to create context on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle of the new context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCoredumpSetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef CUcontext pctx = CUcontext()
-    cdef cydriver.CUctxCreateParams* cyctxCreateParams_ptr = ctxCreateParams._ptr if ctxCreateParams != None else NULL
-    err = cydriver.cuCtxCreate_v4(<cydriver.CUcontext*>pctx._ptr, cyctxCreateParams_ptr, flags, cydev)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxDestroy_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxDestroy(ctx):
-    """ Destroy a CUDA context.
-
-    Destroys the CUDA context specified by `ctx`. The context `ctx` will be
-    destroyed regardless of how many threads it is current to. It is the
-    responsibility of the calling function to ensure that no API call
-    issues using `ctx` while :py:obj:`~.cuCtxDestroy()` is executing.
-
-    Destroys and cleans up all resources associated with the context. It is
-    the caller's responsibility to ensure that the context or its resources
-    are not accessed or passed in subsequent API calls and doing so will
-    result in undefined behavior. These resources include CUDA types
-    :py:obj:`~.CUmodule`, :py:obj:`~.CUfunction`, :py:obj:`~.CUstream`,
-    :py:obj:`~.CUevent`, :py:obj:`~.CUarray`, :py:obj:`~.CUmipmappedArray`,
-    :py:obj:`~.CUtexObject`, :py:obj:`~.CUsurfObject`,
-    :py:obj:`~.CUtexref`, :py:obj:`~.CUsurfref`,
-    :py:obj:`~.CUgraphicsResource`, :py:obj:`~.CUlinkState`,
-    :py:obj:`~.CUexternalMemory` and :py:obj:`~.CUexternalSemaphore`. These
-    resources also include memory allocations by :py:obj:`~.cuMemAlloc()`,
-    :py:obj:`~.cuMemAllocHost()`, :py:obj:`~.cuMemAllocManaged()` and
-    :py:obj:`~.cuMemAllocPitch()`.
-
-    If `ctx` is current to the calling thread then `ctx` will also be
-    popped from the current thread's context stack (as though
-    :py:obj:`~.cuCtxPopCurrent()` were called). If `ctx` is current to
-    other threads, then `ctx` will remain current to those threads, and
-    attempting to access `ctx` from those threads will result in the error
-    :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`.
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-
-    Notes
-    -----
-    :py:obj:`~.cuCtxDestroy()` will not destroy memory allocations by :py:obj:`~.cuMemCreate()`, :py:obj:`~.cuMemAllocAsync()` and :py:obj:`~.cuMemAllocFromPoolAsync()`. These memory allocations are not associated with any CUDA context and need to be destroyed explicitly.
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    err = cydriver.cuCtxDestroy(cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxPushCurrent_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxPushCurrent(ctx):
-    """ Pushes a context on the current CPU thread.
-
-    Pushes the given context `ctx` onto the CPU thread's stack of current
-    contexts. The specified context becomes the CPU thread's current
-    context, so all CUDA functions that operate on the current context are
-    affected.
-
-    The previous current context may be made current again by calling
-    :py:obj:`~.cuCtxDestroy()` or :py:obj:`~.cuCtxPopCurrent()`.
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context to push
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    err = cydriver.cuCtxPushCurrent(cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxPopCurrent_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxPopCurrent():
-    """ Pops the current CUDA context from the current CPU thread.
-
-    Pops the current CUDA context from the CPU thread and passes back the
-    old context handle in `*pctx`. That context may then be made current to
-    a different CPU thread by calling :py:obj:`~.cuCtxPushCurrent()`.
-
-    If a context was current to the CPU thread before
-    :py:obj:`~.cuCtxCreate()` or :py:obj:`~.cuCtxPushCurrent()` was called,
-    this function makes that context current to the CPU thread again.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-    pctx : :py:obj:`~.CUcontext`
-        Returned popped context handle
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuCtxPopCurrent(<cydriver.CUcontext*>pctx._ptr)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxSetCurrent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSetCurrent(ctx):
-    """ Binds the specified CUDA context to the calling CPU thread.
-
-    Binds the specified CUDA context to the calling CPU thread. If `ctx` is
-    NULL then the CUDA context previously bound to the calling CPU thread
-    is unbound and :py:obj:`~.CUDA_SUCCESS` is returned.
-
-    If there exists a CUDA context stack on the calling CPU thread, this
-    will replace the top of that stack with `ctx`. If `ctx` is NULL then
-    this will be equivalent to popping the top of the calling CPU thread's
-    CUDA context stack (or a no-op if the calling CPU thread's CUDA context
-    stack is empty).
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context to bind to the calling CPU thread
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cudaSetDevice`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    err = cydriver.cuCtxSetCurrent(cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetCurrent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetCurrent():
-    """ Returns the CUDA context bound to the calling CPU thread.
-
-    Returns in `*pctx` the CUDA context bound to the calling CPU thread. If
-    no context is bound to the calling CPU thread then `*pctx` is set to
-    NULL and :py:obj:`~.CUDA_SUCCESS` is returned.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`,
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxSetCurrent`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cudaGetDevice`
-    """
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuCtxGetCurrent(<cydriver.CUcontext*>pctx._ptr)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxGetDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetDevice():
-    """ Returns the device ID for the current context.
-
-    Returns in `*device` the ordinal of the current context's device.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    device : :py:obj:`~.CUdevice`
-        Returned device ID for the current context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaGetDevice`
-    """
-    cdef CUdevice device = CUdevice()
-    err = cydriver.cuCtxGetDevice(<cydriver.CUdevice*>device._ptr)
-    return (CUresult(err), device)
-{{endif}}
-
-{{if 'cuCtxGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetFlags():
-    """ Returns the flags for the current context.
-
-    Returns in `*flags` the flags of the current context. See
-    :py:obj:`~.cuCtxCreate` for flag values.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    flags : unsigned int
-        Pointer to store flags of current context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuCtxSetFlags`, :py:obj:`~.cudaGetDeviceFlags`
-    """
-    cdef unsigned int flags = 0
-    err = cydriver.cuCtxGetFlags(&flags)
-    return (CUresult(err), flags)
-{{endif}}
-
-{{if 'cuCtxSetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSetFlags(unsigned int flags):
-    """ Sets the flags for the current context.
-
-    Sets the flags for the current context overwriting previously set ones.
-    See :py:obj:`~.cuDevicePrimaryCtxSetFlags` for flag values.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Flags to set on the current context
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`,
-    """
-    err = cydriver.cuCtxSetFlags(flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetId' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetId(ctx):
-    """ Returns the unique Id associated with the context supplied.
-
-    Returns in `ctxId` the unique Id which is associated with a given
-    context. The Id is unique for the life of the program for this instance
-    of CUDA. If context is supplied as NULL and there is one current, the
-    Id of the current context is returned.
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context for which to obtain the Id
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    ctxId : unsigned long long
-        Pointer to store the Id of the context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPushCurrent`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    cdef unsigned long long ctxId = 0
-    err = cydriver.cuCtxGetId(cyctx, &ctxId)
-    return (CUresult(err), ctxId)
-{{endif}}
-
-{{if 'cuCtxSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSynchronize():
-    """ Block for the current context's tasks to complete.
-
-    Blocks until the current context has completed all preceding requested
-    tasks. If the current context is the primary context, green contexts
-    that have been created will also be synchronized.
-    :py:obj:`~.cuCtxSynchronize()` returns an error if one of the preceding
-    tasks failed. If the context was created with the
-    :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` flag, the CPU thread will block
-    until the GPU context has finished its work.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cudaDeviceSynchronize`
-    """
-    err = cydriver.cuCtxSynchronize()
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxSetLimit' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSetLimit(limit not None : CUlimit, size_t value):
-    """ Set resource limits.
-
-    Setting `limit` to `value` is a request by the application to update
-    the current limit maintained by the context. The driver is free to
-    modify the requested value to meet h/w requirements (this could be
-    clamping to minimum or maximum values, rounding up to nearest element
-    size, etc). The application can use :py:obj:`~.cuCtxGetLimit()` to find
-    out exactly what the limit has been set to.
-
-    Setting each :py:obj:`~.CUlimit` has its own specific restrictions, so
-    each is discussed here.
-
-    - :py:obj:`~.CU_LIMIT_STACK_SIZE` controls the stack size in bytes of
-      each GPU thread. The driver automatically increases the per-thread
-      stack size for each kernel launch as needed. This size isn't reset
-      back to the original value after each launch. Setting this value will
-      take effect immediately, and if necessary, the device will block
-      until all preceding requested tasks are complete.
-
-    - :py:obj:`~.CU_LIMIT_PRINTF_FIFO_SIZE` controls the size in bytes of
-      the FIFO used by the :py:obj:`~.printf()` device system call. Setting
-      :py:obj:`~.CU_LIMIT_PRINTF_FIFO_SIZE` must be performed before
-      launching any kernel that uses the :py:obj:`~.printf()` device system
-      call, otherwise :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be
-      returned.
-
-    - :py:obj:`~.CU_LIMIT_MALLOC_HEAP_SIZE` controls the size in bytes of
-      the heap used by the :py:obj:`~.malloc()` and :py:obj:`~.free()`
-      device system calls. Setting :py:obj:`~.CU_LIMIT_MALLOC_HEAP_SIZE`
-      must be performed before launching any kernel that uses the
-      :py:obj:`~.malloc()` or :py:obj:`~.free()` device system calls,
-      otherwise :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned.
-
-    - :py:obj:`~.CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH` controls the maximum
-      nesting depth of a grid at which a thread can safely call
-      :py:obj:`~.cudaDeviceSynchronize()`. Setting this limit must be
-      performed before any launch of a kernel that uses the device runtime
-      and calls :py:obj:`~.cudaDeviceSynchronize()` above the default sync
-      depth, two levels of grids. Calls to
-      :py:obj:`~.cudaDeviceSynchronize()` will fail with error code
-      :py:obj:`~.cudaErrorSyncDepthExceeded` if the limitation is violated.
-      This limit can be set smaller than the default or up the maximum
-      launch depth of 24. When setting this limit, keep in mind that
-      additional levels of sync depth require the driver to reserve large
-      amounts of device memory which can no longer be used for user
-      allocations. If these reservations of device memory fail,
-      :py:obj:`~.cuCtxSetLimit()` will return
-      :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, and the limit can be reset to a
-      lower value. This limit is only applicable to devices of compute
-      capability < 9.0. Attempting to set this limit on devices of other
-      compute capability versions will result in the error
-      :py:obj:`~.CUDA_ERROR_UNSUPPORTED_LIMIT` being returned.
-
-    - :py:obj:`~.CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT` controls the
-      maximum number of outstanding device runtime launches that can be
-      made from the current context. A grid is outstanding from the point
-      of launch up until the grid is known to have been completed. Device
-      runtime launches which violate this limitation fail and return
-      :py:obj:`~.cudaErrorLaunchPendingCountExceeded` when
-      :py:obj:`~.cudaGetLastError()` is called after launch. If more
-      pending launches than the default (2048 launches) are needed for a
-      module using the device runtime, this limit can be increased. Keep in
-      mind that being able to sustain additional pending launches will
-      require the driver to reserve larger amounts of device memory upfront
-      which can no longer be used for allocations. If these reservations
-      fail, :py:obj:`~.cuCtxSetLimit()` will return
-      :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, and the limit can be reset to a
-      lower value. This limit is only applicable to devices of compute
-      capability 3.5 and higher. Attempting to set this limit on devices of
-      compute capability less than 3.5 will result in the error
-      :py:obj:`~.CUDA_ERROR_UNSUPPORTED_LIMIT` being returned.
-
-    - :py:obj:`~.CU_LIMIT_MAX_L2_FETCH_GRANULARITY` controls the L2 cache
-      fetch granularity. Values can range from 0B to 128B. This is purely a
-      performance hint and it can be ignored or clamped depending on the
-      platform.
-
-    - :py:obj:`~.CU_LIMIT_PERSISTING_L2_CACHE_SIZE` controls size in bytes
-      available for persisting L2 cache. This is purely a performance hint
-      and it can be ignored or clamped depending on the platform.
-
-    Parameters
-    ----------
-    limit : :py:obj:`~.CUlimit`
-        Limit to set
-    value : size_t
-        Size of limit
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_LIMIT`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceSetLimit`
-    """
-    cdef cydriver.CUlimit cylimit = limit.value
-    err = cydriver.cuCtxSetLimit(cylimit, value)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetLimit' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetLimit(limit not None : CUlimit):
-    """ Returns resource limits.
-
-    Returns in `*pvalue` the current size of `limit`. The supported
-    :py:obj:`~.CUlimit` values are:
-
-    - :py:obj:`~.CU_LIMIT_STACK_SIZE`: stack size in bytes of each GPU
-      thread.
-
-    - :py:obj:`~.CU_LIMIT_PRINTF_FIFO_SIZE`: size in bytes of the FIFO used
-      by the :py:obj:`~.printf()` device system call.
-
-    - :py:obj:`~.CU_LIMIT_MALLOC_HEAP_SIZE`: size in bytes of the heap used
-      by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device system
-      calls.
-
-    - :py:obj:`~.CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH`: maximum grid depth at
-      which a thread can issue the device runtime call
-      :py:obj:`~.cudaDeviceSynchronize()` to wait on child grid launches to
-      complete.
-
-    - :py:obj:`~.CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT`: maximum number
-      of outstanding device runtime launches that can be made from this
-      context.
-
-    - :py:obj:`~.CU_LIMIT_MAX_L2_FETCH_GRANULARITY`: L2 cache fetch
-      granularity.
-
-    - :py:obj:`~.CU_LIMIT_PERSISTING_L2_CACHE_SIZE`: Persisting L2 cache
-      size in bytes
-
-    Parameters
-    ----------
-    limit : :py:obj:`~.CUlimit`
-        Limit to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_LIMIT`
-    pvalue : int
-        Returned size of limit
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceGetLimit`
-    """
-    cdef size_t pvalue = 0
-    cdef cydriver.CUlimit cylimit = limit.value
-    err = cydriver.cuCtxGetLimit(&pvalue, cylimit)
-    return (CUresult(err), pvalue)
-{{endif}}
-
-{{if 'cuCtxGetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetCacheConfig():
-    """ Returns the preferred cache configuration for the current context.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this function returns through `pconfig` the preferred cache
-    configuration for the current context. This is only a preference. The
-    driver will use the requested configuration if possible, but it is free
-    to choose a different configuration if required to execute functions.
-
-    This will return a `pconfig` of :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`
-    on devices where the size of the L1 cache and shared memory are fixed.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`: no preference for shared
-      memory or L1 (default)
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_SHARED`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_L1`: prefer larger L1 cache and
-      smaller shared memory
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_EQUAL`: prefer equal sized L1 cache
-      and shared memory
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pconfig : :py:obj:`~.CUfunc_cache`
-        Returned cache configuration
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`
-    """
-    cdef cydriver.CUfunc_cache pconfig
-    err = cydriver.cuCtxGetCacheConfig(&pconfig)
-    return (CUresult(err), CUfunc_cache(pconfig))
-{{endif}}
-
-{{if 'cuCtxSetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSetCacheConfig(config not None : CUfunc_cache):
-    """ Sets the preferred cache configuration for the current context.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this sets through `config` the preferred cache configuration
-    for the current context. This is only a preference. The driver will use
-    the requested configuration if possible, but it is free to choose a
-    different configuration if required to execute the function. Any
-    function preference set via :py:obj:`~.cuFuncSetCacheConfig()` or
-    :py:obj:`~.cuKernelSetCacheConfig()` will be preferred over this
-    context-wide setting. Setting the context-wide cache configuration to
-    :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE` will cause subsequent kernel
-    launches to prefer to not change the cache configuration unless
-    required to launch the kernel.
-
-    This setting does nothing on devices where the size of the L1 cache and
-    shared memory are fixed.
-
-    Launching a kernel with a different preference than the most recent
-    preference setting may insert a device-side synchronization point.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`: no preference for shared
-      memory or L1 (default)
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_SHARED`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_L1`: prefer larger L1 cache and
-      smaller shared memory
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_EQUAL`: prefer equal sized L1 cache
-      and shared memory
-
-    Parameters
-    ----------
-    config : :py:obj:`~.CUfunc_cache`
-        Requested cache configuration
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cuKernelSetCacheConfig`
-    """
-    cdef cydriver.CUfunc_cache cyconfig = config.value
-    err = cydriver.cuCtxSetCacheConfig(cyconfig)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetApiVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetApiVersion(ctx):
-    """ Gets the context's API version.
-
-    Returns a version number in `version` corresponding to the capabilities
-    of the context (e.g. 3010 or 3020), which library developers can use to
-    direct callers to a specific API version. If `ctx` is NULL, returns the
-    API version used to create the currently bound context.
-
-    Note that new API versions are only introduced when context
-    capabilities are changed that break binary compatibility, so the API
-    version and driver version may be different. For example, it is valid
-    for the API version to be 3020 while the driver version is 4020.
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context to check
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    version : unsigned int
-        Pointer to version
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    cdef unsigned int version = 0
-    err = cydriver.cuCtxGetApiVersion(cyctx, &version)
-    return (CUresult(err), version)
-{{endif}}
-
-{{if 'cuCtxGetStreamPriorityRange' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetStreamPriorityRange():
-    """ Returns numerical values that correspond to the least and greatest stream priorities.
-
-    Returns in `*leastPriority` and `*greatestPriority` the numerical
-    values that correspond to the least and greatest stream priorities
-    respectively. Stream priorities follow a convention where lower numbers
-    imply greater priorities. The range of meaningful stream priorities is
-    given by [`*greatestPriority`, `*leastPriority`]. If the user attempts
-    to create a stream with a priority value that is outside the meaningful
-    range as specified by this API, the priority is automatically clamped
-    down or up to either `*leastPriority` or `*greatestPriority`
-    respectively. See :py:obj:`~.cuStreamCreateWithPriority` for details on
-    creating a priority stream. A NULL may be passed in for
-    `*leastPriority` or `*greatestPriority` if the value is not desired.
-
-    This function will return '0' in both `*leastPriority` and
-    `*greatestPriority` if the current context's device does not support
-    stream priorities (see :py:obj:`~.cuDeviceGetAttribute`).
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    leastPriority : int
-        Pointer to an int in which the numerical value for least stream
-        priority is returned
-    greatestPriority : int
-        Pointer to an int in which the numerical value for greatest stream
-        priority is returned
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`
-    """
-    cdef int leastPriority = 0
-    cdef int greatestPriority = 0
-    err = cydriver.cuCtxGetStreamPriorityRange(&leastPriority, &greatestPriority)
-    return (CUresult(err), leastPriority, greatestPriority)
-{{endif}}
-
-{{if 'cuCtxResetPersistingL2Cache' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxResetPersistingL2Cache():
-    """ Resets all persisting lines in cache to normal status.
-
-    :py:obj:`~.cuCtxResetPersistingL2Cache` Resets all persisting lines in
-    cache to normal status. Takes effect on function return.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    err = cydriver.cuCtxResetPersistingL2Cache()
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetExecAffinity' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetExecAffinity(typename not None : CUexecAffinityType):
-    """ Returns the execution affinity setting for the current context.
-
-    Returns in `*pExecAffinity` the current value of `typename`. The
-    supported :py:obj:`~.CUexecAffinityType` values are:
-
-    - :py:obj:`~.CU_EXEC_AFFINITY_TYPE_SM_COUNT`: number of SMs the context
-      is limited to use.
-
-    Parameters
-    ----------
-    typename : :py:obj:`~.CUexecAffinityType`
-        Execution affinity type to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY`
-    pExecAffinity : :py:obj:`~.CUexecAffinityParam`
-        Returned execution affinity
-
-    See Also
-    --------
-    :py:obj:`~.CUexecAffinityParam`
-    """
-    cdef CUexecAffinityParam pExecAffinity = CUexecAffinityParam()
-    cdef cydriver.CUexecAffinityType cytypename = typename.value
-    err = cydriver.cuCtxGetExecAffinity(<cydriver.CUexecAffinityParam*>pExecAffinity._ptr, cytypename)
-    return (CUresult(err), pExecAffinity)
-{{endif}}
-
-{{if 'cuCtxRecordEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxRecordEvent(hCtx, hEvent):
-    """ Records an event.
-
-    Captures in `hEvent` all the activities of the context `hCtx` at the
-    time of this call. `hEvent` and `hCtx` must be from the same CUDA
-    context, otherwise :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` will be
-    returned. Calls such as :py:obj:`~.cuEventQuery()` or
-    :py:obj:`~.cuCtxWaitEvent()` will then examine or wait for completion
-    of the work that was captured. Uses of `hCtx` after this call do not
-    modify `hEvent`. If the context passed to `hCtx` is the primary
-    context, `hEvent` will capture all the activities of the primary
-    context and its green contexts. If the context passed to `hCtx` is a
-    context converted from green context via
-    :py:obj:`~.cuCtxFromGreenCtx()`, `hEvent` will capture only the
-    activities of the green context.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUcontext`
-        Context to record event for
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxWaitEvent`, :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuEventRecord`
-
-    Notes
-    -----
-    The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures.
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    cdef cydriver.CUcontext cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(hCtx, (CUcontext,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    else:
-        phCtx = int(CUcontext(hCtx))
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    err = cydriver.cuCtxRecordEvent(cyhCtx, cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxWaitEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxWaitEvent(hCtx, hEvent):
-    """ Make a context wait on an event.
-
-    Makes all future work submitted to context `hCtx` wait for all work
-    captured in `hEvent`. The synchronization will be performed on the
-    device and will not block the calling CPU thread. See
-    :py:obj:`~.cuCtxRecordEvent()` for details on what is captured by an
-    event. If the context passed to `hCtx` is the primary context, the
-    primary context and its green contexts will wait for `hEvent`. If the
-    context passed to `hCtx` is a context converted from green context via
-    :py:obj:`~.cuCtxFromGreenCtx()`, the green context will wait for
-    `hEvent`.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUcontext`
-        Context to wait
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuStreamWaitEvent`
-
-    Notes
-    -----
-    `hEvent` may be from a different context or device than `hCtx`.
-
-    The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified context `hCtx` has a stream in the capture mode.
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    cdef cydriver.CUcontext cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(hCtx, (CUcontext,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    else:
-        phCtx = int(CUcontext(hCtx))
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    err = cydriver.cuCtxWaitEvent(cyhCtx, cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxAttach' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxAttach(unsigned int flags):
-    """ Increment a context's usage-count.
-
-    [Deprecated]
-
-    Note that this function is deprecated and should not be used.
-
-    Increments the usage count of the context and passes back a context
-    handle in `*pctx` that must be passed to :py:obj:`~.cuCtxDetach()` when
-    the application is done with the context. :py:obj:`~.cuCtxAttach()`
-    fails if there is no context current to the thread.
-
-    Currently, the `flags` parameter must be 0.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Context attach flags (must be 0)
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context handle of the current context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxDetach`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuCtxAttach(<cydriver.CUcontext*>pctx._ptr, flags)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuCtxDetach' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxDetach(ctx):
-    """ Decrement a context's usage-count.
-
-    [Deprecated]
-
-    Note that this function is deprecated and should not be used.
-
-    Decrements the usage count of the context `ctx`, and destroys the
-    context if the usage count goes to 0. The context must be a handle that
-    was passed back by :py:obj:`~.cuCtxCreate()` or
-    :py:obj:`~.cuCtxAttach()`, and must be current to the calling thread.
-
-    Parameters
-    ----------
-    ctx : :py:obj:`~.CUcontext`
-        Context to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    err = cydriver.cuCtxDetach(cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxGetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetSharedMemConfig():
-    """ Returns the current shared memory configuration for the current context.
-
-    [Deprecated]
-
-    This function will return in `pConfig` the current size of shared
-    memory banks in the current context. On devices with configurable
-    shared memory banks, :py:obj:`~.cuCtxSetSharedMemConfig` can be used to
-    change this setting, so that all subsequent kernel launches will by
-    default use the new bank size. When :py:obj:`~.cuCtxGetSharedMemConfig`
-    is called on devices without configurable shared memory, it will return
-    the fixed bank size of the hardware.
-
-    The returned bank configurations can be either:
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE`: shared memory
-      bank width is four bytes.
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE`: shared memory
-      bank width will eight bytes.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pConfig : :py:obj:`~.CUsharedconfig`
-        returned shared memory configuration
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`
-    """
-    cdef cydriver.CUsharedconfig pConfig
-    err = cydriver.cuCtxGetSharedMemConfig(&pConfig)
-    return (CUresult(err), CUsharedconfig(pConfig))
-{{endif}}
-
-{{if 'cuCtxSetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxSetSharedMemConfig(config not None : CUsharedconfig):
-    """ Sets the shared memory configuration for the current context.
-
-    [Deprecated]
-
-    On devices with configurable shared memory banks, this function will
-    set the context's shared memory bank size which is used for subsequent
-    kernel launches.
-
-    Changed the shared memory configuration between launches may insert a
-    device side synchronization point between those launches.
-
-    Changing the shared memory bank size will not increase shared memory
-    usage or affect occupancy of kernels, but may have major effects on
-    performance. Larger bank sizes will allow for greater potential
-    bandwidth to shared memory, but will change what kinds of accesses to
-    shared memory will result in bank conflicts.
-
-    This function will do nothing on devices with fixed shared memory bank
-    size.
-
-    The supported bank configurations are:
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE`: set bank width to
-      the default initial setting (currently, four bytes).
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE`: set shared
-      memory bank width to be natively four bytes.
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE`: set shared
-      memory bank width to be natively eight bytes.
-
-    Parameters
-    ----------
-    config : :py:obj:`~.CUsharedconfig`
-        requested shared memory configuration
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig`
-    """
-    cdef cydriver.CUsharedconfig cyconfig = config.value
-    err = cydriver.cuCtxSetSharedMemConfig(cyconfig)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuModuleLoad' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleLoad(char* fname):
-    """ Loads a compute module.
-
-    Takes a filename `fname` and loads the corresponding module `module`
-    into the current context. The CUDA driver API does not attempt to
-    lazily allocate the resources needed by a module; if the memory for
-    functions and data (constant and global) needed by the module cannot be
-    allocated, :py:obj:`~.cuModuleLoad()` fails. The file should be a
-    `cubin` file as output by nvcc, or a `PTX` file either as output by
-    nvcc or handwritten, or a `fatbin` file as output by nvcc from
-    toolchain 4.0 or later.
-
-    Parameters
-    ----------
-    fname : bytes
-        Filename of module to load
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_FILE_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`
-    module : :py:obj:`~.CUmodule`
-        Returned module
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef CUmodule module = CUmodule()
-    err = cydriver.cuModuleLoad(<cydriver.CUmodule*>module._ptr, fname)
-    return (CUresult(err), module)
-{{endif}}
-
-{{if 'cuModuleLoadData' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleLoadData(image):
-    """ Load a module's data.
-
-    Takes a pointer `image` and loads the corresponding module `module`
-    into the current context. The `image` may be a `cubin` or `fatbin` as
-    output by nvcc, or a NULL-terminated `PTX`, either as output by nvcc or
-    hand-written.
-
-    Parameters
-    ----------
-    image : Any
-        Module data to load
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`
-    module : :py:obj:`~.CUmodule`
-        Returned module
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef CUmodule module = CUmodule()
-    cyimage = utils.HelperInputVoidPtr(image)
-    cdef void* cyimage_ptr = <void*><void_ptr>cyimage.cptr
-    err = cydriver.cuModuleLoadData(<cydriver.CUmodule*>module._ptr, cyimage_ptr)
-    return (CUresult(err), module)
-{{endif}}
-
-{{if 'cuModuleLoadDataEx' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleLoadDataEx(image, unsigned int numOptions, options : Optional[Tuple[CUjit_option] | List[CUjit_option]], optionValues : Optional[Tuple[Any] | List[Any]]):
-    """ Load a module's data with options.
-
-    Takes a pointer `image` and loads the corresponding module `module`
-    into the current context. The `image` may be a `cubin` or `fatbin` as
-    output by nvcc, or a NULL-terminated `PTX`, either as output by nvcc or
-    hand-written.
-
-    Parameters
-    ----------
-    image : Any
-        Module data to load
-    numOptions : unsigned int
-        Number of options
-    options : List[:py:obj:`~.CUjit_option`]
-        Options for JIT
-    optionValues : List[Any]
-        Option values for JIT
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`
-    module : :py:obj:`~.CUmodule`
-        Returned module
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    optionValues = [] if optionValues is None else optionValues
-    options = [] if options is None else options
-    if not all(isinstance(_x, (CUjit_option)) for _x in options):
-        raise TypeError("Argument 'options' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    cdef CUmodule module = CUmodule()
-    cyimage = utils.HelperInputVoidPtr(image)
-    cdef void* cyimage_ptr = <void*><void_ptr>cyimage.cptr
-    if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions))
-    if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions))
-    cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    err = cydriver.cuModuleLoadDataEx(<cydriver.CUmodule*>module._ptr, cyimage_ptr, numOptions, cyoptions.data(), <void**><void_ptr>voidStarHelperoptionValues.cptr)
-    return (CUresult(err), module)
-{{endif}}
-
-{{if 'cuModuleLoadFatBinary' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleLoadFatBinary(fatCubin):
-    """ Load a module's data.
-
-    Takes a pointer `fatCubin` and loads the corresponding module `module`
-    into the current context. The pointer represents a `fat binary` object,
-    which is a collection of different `cubin` and/or `PTX` files, all
-    representing the same device code, but compiled and optimized for
-    different architectures.
-
-    Prior to CUDA 4.0, there was no documented API for constructing and
-    using fat binary objects by programmers. Starting with CUDA 4.0, fat
-    binary objects can be constructed by providing the `-fatbin option` to
-    nvcc. More information can be found in the nvcc document.
-
-    Parameters
-    ----------
-    fatCubin : Any
-        Fat binary to load
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`
-    module : :py:obj:`~.CUmodule`
-        Returned module
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef CUmodule module = CUmodule()
-    cyfatCubin = utils.HelperInputVoidPtr(fatCubin)
-    cdef void* cyfatCubin_ptr = <void*><void_ptr>cyfatCubin.cptr
-    err = cydriver.cuModuleLoadFatBinary(<cydriver.CUmodule*>module._ptr, cyfatCubin_ptr)
-    return (CUresult(err), module)
-{{endif}}
-
-{{if 'cuModuleUnload' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleUnload(hmod):
-    """ Unloads a module.
-
-    Unloads a module `hmod` from the current context. Attempting to unload
-    a module which was obtained from the Library Management API such as
-    :py:obj:`~.cuLibraryGetModule` will return
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`.
-
-    Parameters
-    ----------
-    hmod : :py:obj:`~.CUmodule`
-        Module to unload
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`
-    """
-    cdef cydriver.CUmodule cyhmod
-    if hmod is None:
-        cyhmod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(hmod, (CUmodule,)):
-        phmod = int(hmod)
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    else:
-        phmod = int(CUmodule(hmod))
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    err = cydriver.cuModuleUnload(cyhmod)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuModuleGetLoadingMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetLoadingMode():
-    """ Query lazy loading mode.
-
-    Returns lazy loading mode Module loading mode is controlled by
-    CUDA_MODULE_LOADING env variable
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    mode : :py:obj:`~.CUmoduleLoadingMode`
-        Returns the lazy loading mode
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleLoad`,
-    """
-    cdef cydriver.CUmoduleLoadingMode mode
-    err = cydriver.cuModuleGetLoadingMode(&mode)
-    return (CUresult(err), CUmoduleLoadingMode(mode))
-{{endif}}
-
-{{if 'cuModuleGetFunction' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetFunction(hmod, char* name):
-    """ Returns a function handle.
-
-    Returns in `*hfunc` the handle of the function of name `name` located
-    in module `hmod`. If no function of that name exists,
-    :py:obj:`~.cuModuleGetFunction()` returns
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    hmod : :py:obj:`~.CUmodule`
-        Module to retrieve function from
-    name : bytes
-        Name of function to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    hfunc : :py:obj:`~.CUfunction`
-        Returned function handle
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef cydriver.CUmodule cyhmod
-    if hmod is None:
-        cyhmod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(hmod, (CUmodule,)):
-        phmod = int(hmod)
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    else:
-        phmod = int(CUmodule(hmod))
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    cdef CUfunction hfunc = CUfunction()
-    err = cydriver.cuModuleGetFunction(<cydriver.CUfunction*>hfunc._ptr, cyhmod, name)
-    return (CUresult(err), hfunc)
-{{endif}}
-
-{{if 'cuModuleGetFunctionCount' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetFunctionCount(mod):
-    """ Returns the number of functions within a module.
-
-    Returns in `count` the number of functions in `mod`.
-
-    Parameters
-    ----------
-    mod : :py:obj:`~.CUmodule`
-        Module to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    count : unsigned int
-        Number of functions found within the module
-    """
-    cdef cydriver.CUmodule cymod
-    if mod is None:
-        cymod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(mod, (CUmodule,)):
-        pmod = int(mod)
-        cymod = <cydriver.CUmodule><void_ptr>pmod
-    else:
-        pmod = int(CUmodule(mod))
-        cymod = <cydriver.CUmodule><void_ptr>pmod
-    cdef unsigned int count = 0
-    err = cydriver.cuModuleGetFunctionCount(&count, cymod)
-    return (CUresult(err), count)
-{{endif}}
-
-{{if 'cuModuleEnumerateFunctions' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleEnumerateFunctions(unsigned int numFunctions, mod):
-    """ Returns the function handles within a module.
-
-    Returns in `functions` a maximum number of `numFunctions` function
-    handles within `mod`. When function loading mode is set to LAZY the
-    function retrieved may be partially loaded. The loading state of a
-    function can be queried using :py:obj:`~.cuFunctionIsLoaded`. CUDA APIs
-    may load the function automatically when called with partially loaded
-    function handle which may incur additional latency. Alternatively,
-    :py:obj:`~.cuFunctionLoad` can be used to explicitly load a function.
-    The returned function handles become invalid when the module is
-    unloaded.
-
-    Parameters
-    ----------
-    numFunctions : unsigned int
-        Maximum number of function handles may be returned to the buffer
-    mod : :py:obj:`~.CUmodule`
-        Module to query from
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    functions : List[:py:obj:`~.CUfunction`]
-        Buffer where the function handles are returned to
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetFunctionCount`, :py:obj:`~.cuFuncIsLoaded`, :py:obj:`~.cuFuncLoad`
-    """
-    cdef cydriver.CUmodule cymod
-    if mod is None:
-        cymod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(mod, (CUmodule,)):
-        pmod = int(mod)
-        cymod = <cydriver.CUmodule><void_ptr>pmod
-    else:
-        pmod = int(CUmodule(mod))
-        cymod = <cydriver.CUmodule><void_ptr>pmod
-    cdef cydriver.CUfunction* cyfunctions = NULL
-    pyfunctions = []
-    if numFunctions != 0:
-        cyfunctions = <cydriver.CUfunction*>calloc(numFunctions, sizeof(cydriver.CUfunction))
-        if cyfunctions is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(numFunctions) + 'x' + str(sizeof(cydriver.CUfunction)))
-    err = cydriver.cuModuleEnumerateFunctions(cyfunctions, numFunctions, cymod)
-    if CUresult(err) == CUresult(0):
-        pyfunctions = [CUfunction(init_value=<void_ptr>cyfunctions[idx]) for idx in range(numFunctions)]
-    if cyfunctions is not NULL:
-        free(cyfunctions)
-    return (CUresult(err), pyfunctions)
-{{endif}}
-
-{{if 'cuModuleGetGlobal_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetGlobal(hmod, char* name):
-    """ Returns a global pointer from a module.
-
-    Returns in `*dptr` and `*bytes` the base pointer and size of the global
-    of name `name` located in module `hmod`. If no variable of that name
-    exists, :py:obj:`~.cuModuleGetGlobal()` returns
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`. One of the parameters `dptr` or
-    `numbytes` (not both) can be NULL in which case it is ignored.
-
-    Parameters
-    ----------
-    hmod : :py:obj:`~.CUmodule`
-        Module to retrieve global from
-    name : bytes
-        Name of global to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned global device pointer
-    numbytes : int
-        Returned global size in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`, :py:obj:`~.cudaGetSymbolAddress`, :py:obj:`~.cudaGetSymbolSize`
-    """
-    cdef cydriver.CUmodule cyhmod
-    if hmod is None:
-        cyhmod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(hmod, (CUmodule,)):
-        phmod = int(hmod)
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    else:
-        phmod = int(CUmodule(hmod))
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    cdef size_t numbytes = 0
-    err = cydriver.cuModuleGetGlobal(<cydriver.CUdeviceptr*>dptr._ptr, &numbytes, cyhmod, name)
-    return (CUresult(err), dptr, numbytes)
-{{endif}}
-
-{{if 'cuLinkCreate_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLinkCreate(unsigned int numOptions, options : Optional[Tuple[CUjit_option] | List[CUjit_option]], optionValues : Optional[Tuple[Any] | List[Any]]):
-    """ Creates a pending JIT linker invocation.
-
-    If the call is successful, the caller owns the returned CUlinkState,
-    which should eventually be destroyed with :py:obj:`~.cuLinkDestroy`.
-    The device code machine size (32 or 64 bit) will match the calling
-    application.
-
-    Both linker and compiler options may be specified. Compiler options
-    will be applied to inputs to this linker action which must be compiled
-    from PTX. The options :py:obj:`~.CU_JIT_WALL_TIME`,
-    :py:obj:`~.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`, and
-    :py:obj:`~.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES` will accumulate data
-    until the CUlinkState is destroyed.
-
-    The data passed in via :py:obj:`~.cuLinkAddData` and
-    :py:obj:`~.cuLinkAddFile` will be treated as relocatable (-rdc=true to
-    nvcc) when linking the final cubin during :py:obj:`~.cuLinkComplete`
-    and will have similar consequences as offline relocatable device code
-    linking.
-
-    `optionValues` must remain valid for the life of the CUlinkState if
-    output options are used. No other references to inputs are maintained
-    after this call returns.
-
-    Parameters
-    ----------
-    numOptions : unsigned int
-        Size of options arrays
-    options : List[:py:obj:`~.CUjit_option`]
-        Array of linker and compiler options
-    optionValues : List[Any]
-        Array of option values, each cast to void *
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`
-    stateOut : :py:obj:`~.CUlinkState`
-        On success, this will contain a CUlinkState to specify and complete
-        this action
-
-    See Also
-    --------
-    :py:obj:`~.cuLinkAddData`, :py:obj:`~.cuLinkAddFile`, :py:obj:`~.cuLinkComplete`, :py:obj:`~.cuLinkDestroy`
-
-    Notes
-    -----
-    For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
-    """
-    optionValues = [] if optionValues is None else optionValues
-    options = [] if options is None else options
-    if not all(isinstance(_x, (CUjit_option)) for _x in options):
-        raise TypeError("Argument 'options' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions))
-    if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions))
-    cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    cdef CUlinkState stateOut = CUlinkState()
-    err = cydriver.cuLinkCreate(numOptions, cyoptions.data(), <void**><void_ptr>voidStarHelperoptionValues.cptr, stateOut._ptr)
-    stateOut._keepalive.append(voidStarHelperoptionValues)
-    for option in pylist:
-        stateOut._keepalive.append(option)
-    return (CUresult(err), stateOut)
-{{endif}}
-
-{{if 'cuLinkAddData_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLinkAddData(state, typename not None : CUjitInputType, data, size_t size, char* name, unsigned int numOptions, options : Optional[Tuple[CUjit_option] | List[CUjit_option]], optionValues : Optional[Tuple[Any] | List[Any]]):
-    """ Add an input to a pending linker invocation.
-
-    Ownership of `data` is retained by the caller. No reference is retained
-    to any inputs after this call returns.
-
-    This method accepts only compiler options, which are used if the data
-    must be compiled from PTX, and does not accept any of
-    :py:obj:`~.CU_JIT_WALL_TIME`, :py:obj:`~.CU_JIT_INFO_LOG_BUFFER`,
-    :py:obj:`~.CU_JIT_ERROR_LOG_BUFFER`,
-    :py:obj:`~.CU_JIT_TARGET_FROM_CUCONTEXT`, or :py:obj:`~.CU_JIT_TARGET`.
-
-    Parameters
-    ----------
-    state : :py:obj:`~.CUlinkState`
-        A pending linker action.
-    typename : :py:obj:`~.CUjitInputType`
-        The type of the input data.
-    data : Any
-        The input data. PTX must be NULL-terminated.
-    size : size_t
-        The length of the input data.
-    name : bytes
-        An optional name for this input in log messages.
-    numOptions : unsigned int
-        Size of options.
-    options : List[:py:obj:`~.CUjit_option`]
-        Options to be applied only for this input (overrides options from
-        :py:obj:`~.cuLinkCreate`).
-    optionValues : List[Any]
-        Array of option values, each cast to void *.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`
-
-    See Also
-    --------
-    :py:obj:`~.cuLinkCreate`, :py:obj:`~.cuLinkAddFile`, :py:obj:`~.cuLinkComplete`, :py:obj:`~.cuLinkDestroy`
-
-    Notes
-    -----
-    For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
-    """
-    optionValues = [] if optionValues is None else optionValues
-    options = [] if options is None else options
-    if not all(isinstance(_x, (CUjit_option)) for _x in options):
-        raise TypeError("Argument 'options' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    cdef cydriver.CUlinkState cystate
-    if state is None:
-        cystate = <cydriver.CUlinkState><void_ptr>0
-    elif isinstance(state, (CUlinkState,)):
-        pstate = int(state)
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    else:
-        pstate = int(CUlinkState(state))
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    cdef cydriver.CUjitInputType cytypename = typename.value
-    cydata = utils.HelperInputVoidPtr(data)
-    cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr
-    if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions))
-    if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions))
-    cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    err = cydriver.cuLinkAddData(cystate, cytypename, cydata_ptr, size, name, numOptions, cyoptions.data(), <void**><void_ptr>voidStarHelperoptionValues.cptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLinkAddFile_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigned int numOptions, options : Optional[Tuple[CUjit_option] | List[CUjit_option]], optionValues : Optional[Tuple[Any] | List[Any]]):
-    """ Add a file input to a pending linker invocation.
-
-    No reference is retained to any inputs after this call returns.
-
-    This method accepts only compiler options, which are used if the input
-    must be compiled from PTX, and does not accept any of
-    :py:obj:`~.CU_JIT_WALL_TIME`, :py:obj:`~.CU_JIT_INFO_LOG_BUFFER`,
-    :py:obj:`~.CU_JIT_ERROR_LOG_BUFFER`,
-    :py:obj:`~.CU_JIT_TARGET_FROM_CUCONTEXT`, or :py:obj:`~.CU_JIT_TARGET`.
-
-    This method is equivalent to invoking :py:obj:`~.cuLinkAddData` on the
-    contents of the file.
-
-    Parameters
-    ----------
-    state : :py:obj:`~.CUlinkState`
-        A pending linker action
-    typename : :py:obj:`~.CUjitInputType`
-        The type of the input data
-    path : bytes
-        Path to the input file
-    numOptions : unsigned int
-        Size of options
-    options : List[:py:obj:`~.CUjit_option`]
-        Options to be applied only for this input (overrides options from
-        :py:obj:`~.cuLinkCreate`)
-    optionValues : List[Any]
-        Array of option values, each cast to void *
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_FILE_NOT_FOUND` :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`
-
-    See Also
-    --------
-    :py:obj:`~.cuLinkCreate`, :py:obj:`~.cuLinkAddData`, :py:obj:`~.cuLinkComplete`, :py:obj:`~.cuLinkDestroy`
-
-    Notes
-    -----
-    For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted
-    """
-    optionValues = [] if optionValues is None else optionValues
-    options = [] if options is None else options
-    if not all(isinstance(_x, (CUjit_option)) for _x in options):
-        raise TypeError("Argument 'options' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    cdef cydriver.CUlinkState cystate
-    if state is None:
-        cystate = <cydriver.CUlinkState><void_ptr>0
-    elif isinstance(state, (CUlinkState,)):
-        pstate = int(state)
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    else:
-        pstate = int(CUlinkState(state))
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    cdef cydriver.CUjitInputType cytypename = typename.value
-    if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions))
-    if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions))
-    cdef vector[cydriver.CUjit_option] cyoptions = [pyoptions.value for pyoptions in (options)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperoptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    err = cydriver.cuLinkAddFile(cystate, cytypename, path, numOptions, cyoptions.data(), <void**><void_ptr>voidStarHelperoptionValues.cptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLinkComplete' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLinkComplete(state):
-    """ Complete a pending linker invocation.
-
-    Completes the pending linker action and returns the cubin image for the
-    linked device code, which can be used with
-    :py:obj:`~.cuModuleLoadData`. The cubin is owned by `state`, so it
-    should be loaded before `state` is destroyed via
-    :py:obj:`~.cuLinkDestroy`. This call does not destroy `state`.
-
-    Parameters
-    ----------
-    state : :py:obj:`~.CUlinkState`
-        A pending linker invocation
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    cubinOut : Any
-        On success, this will point to the output image
-    sizeOut : int
-        Optional parameter to receive the size of the generated image
-
-    See Also
-    --------
-    :py:obj:`~.cuLinkCreate`, :py:obj:`~.cuLinkAddData`, :py:obj:`~.cuLinkAddFile`, :py:obj:`~.cuLinkDestroy`, :py:obj:`~.cuModuleLoadData`
-    """
-    cdef cydriver.CUlinkState cystate
-    if state is None:
-        cystate = <cydriver.CUlinkState><void_ptr>0
-    elif isinstance(state, (CUlinkState,)):
-        pstate = int(state)
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    else:
-        pstate = int(CUlinkState(state))
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    cdef void_ptr cubinOut = 0
-    cdef size_t sizeOut = 0
-    err = cydriver.cuLinkComplete(cystate, <void**>&cubinOut, &sizeOut)
-    return (CUresult(err), cubinOut, sizeOut)
-{{endif}}
-
-{{if 'cuLinkDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLinkDestroy(state):
-    """ Destroys state for a JIT linker invocation.
-
-    Parameters
-    ----------
-    state : :py:obj:`~.CUlinkState`
-        State object for the linker invocation
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuLinkCreate`
-    """
-    cdef cydriver.CUlinkState cystate
-    if state is None:
-        cystate = <cydriver.CUlinkState><void_ptr>0
-    elif isinstance(state, (CUlinkState,)):
-        pstate = int(state)
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    else:
-        pstate = int(CUlinkState(state))
-        cystate = <cydriver.CUlinkState><void_ptr>pstate
-    err = cydriver.cuLinkDestroy(cystate)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuModuleGetTexRef' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetTexRef(hmod, char* name):
-    """ Returns a handle to a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pTexRef` the handle of the texture reference of name
-    `name` in the module `hmod`. If no texture reference of that name
-    exists, :py:obj:`~.cuModuleGetTexRef()` returns
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`. This texture reference handle should
-    not be destroyed, since it will be destroyed when the module is
-    unloaded.
-
-    Parameters
-    ----------
-    hmod : :py:obj:`~.CUmodule`
-        Module to retrieve texture reference from
-    name : bytes
-        Name of texture reference to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    pTexRef : :py:obj:`~.CUtexref`
-        Returned texture reference
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef cydriver.CUmodule cyhmod
-    if hmod is None:
-        cyhmod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(hmod, (CUmodule,)):
-        phmod = int(hmod)
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    else:
-        phmod = int(CUmodule(hmod))
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    cdef CUtexref pTexRef = CUtexref()
-    err = cydriver.cuModuleGetTexRef(<cydriver.CUtexref*>pTexRef._ptr, cyhmod, name)
-    return (CUresult(err), pTexRef)
-{{endif}}
-
-{{if 'cuModuleGetSurfRef' in found_functions}}
-
-@cython.embedsignature(True)
-def cuModuleGetSurfRef(hmod, char* name):
-    """ Returns a handle to a surface reference.
-
-    [Deprecated]
-
-    Returns in `*pSurfRef` the handle of the surface reference of name
-    `name` in the module `hmod`. If no surface reference of that name
-    exists, :py:obj:`~.cuModuleGetSurfRef()` returns
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    hmod : :py:obj:`~.CUmodule`
-        Module to retrieve surface reference from
-    name : bytes
-        Name of surface reference to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    pSurfRef : :py:obj:`~.CUsurfref`
-        Returned surface reference
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef cydriver.CUmodule cyhmod
-    if hmod is None:
-        cyhmod = <cydriver.CUmodule><void_ptr>0
-    elif isinstance(hmod, (CUmodule,)):
-        phmod = int(hmod)
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    else:
-        phmod = int(CUmodule(hmod))
-        cyhmod = <cydriver.CUmodule><void_ptr>phmod
-    cdef CUsurfref pSurfRef = CUsurfref()
-    err = cydriver.cuModuleGetSurfRef(<cydriver.CUsurfref*>pSurfRef._ptr, cyhmod, name)
-    return (CUresult(err), pSurfRef)
-{{endif}}
-
-{{if 'cuLibraryLoadData' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryLoadData(code, jitOptions : Optional[Tuple[CUjit_option] | List[CUjit_option]], jitOptionsValues : Optional[Tuple[Any] | List[Any]], unsigned int numJitOptions, libraryOptions : Optional[Tuple[CUlibraryOption] | List[CUlibraryOption]], libraryOptionValues : Optional[Tuple[Any] | List[Any]], unsigned int numLibraryOptions):
-    """ Load a library with specified code and options.
-
-    Takes a pointer `code` and loads the corresponding library `library`
-    based on the application defined library loading mode:
-
-    - If module loading is set to EAGER, via the environment variables
-      described in "Module loading", `library` is loaded eagerly into all
-      contexts at the time of the call and future contexts at the time of
-      creation until the library is unloaded with
-      :py:obj:`~.cuLibraryUnload()`.
-
-    - If the environment variables are set to LAZY, `library` is not
-      immediately loaded onto all existent contexts and will only be loaded
-      when a function is needed for that context, such as a kernel launch.
-
-    These environment variables are described in the CUDA programming guide
-    under the "CUDA environment variables" section.
-
-    The `code` may be a `cubin` or `fatbin` as output by nvcc, or a NULL-
-    terminated `PTX`, either as output by nvcc or hand-written. A fatbin
-    should also contain relocatable code when doing separate compilation.
-
-    Options are passed as an array via `jitOptions` and any corresponding
-    parameters are passed in `jitOptionsValues`. The number of total JIT
-    options is supplied via `numJitOptions`. Any outputs will be returned
-    via `jitOptionsValues`.
-
-    Library load options are passed as an array via `libraryOptions` and
-    any corresponding parameters are passed in `libraryOptionValues`. The
-    number of total library load options is supplied via
-    `numLibraryOptions`.
-
-    Parameters
-    ----------
-    code : Any
-        Code to load
-    jitOptions : List[:py:obj:`~.CUjit_option`]
-        Options for JIT
-    jitOptionsValues : List[Any]
-        Option values for JIT
-    numJitOptions : unsigned int
-        Number of options
-    libraryOptions : List[:py:obj:`~.CUlibraryOption`]
-        Options for loading
-    libraryOptionValues : List[Any]
-        Option values for loading
-    numLibraryOptions : unsigned int
-        Number of options for loading
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    library : :py:obj:`~.CUlibrary`
-        Returned library
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`
-
-    Notes
-    -----
-    If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    """
-    libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues
-    libraryOptions = [] if libraryOptions is None else libraryOptions
-    if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions):
-        raise TypeError("Argument 'libraryOptions' is not instance of type (expected Tuple[cydriver.CUlibraryOption] or List[cydriver.CUlibraryOption]")
-    jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues
-    jitOptions = [] if jitOptions is None else jitOptions
-    if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions):
-        raise TypeError("Argument 'jitOptions' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    cdef CUlibrary library = CUlibrary()
-    cycode = utils.HelperInputVoidPtr(code)
-    cdef void* cycode_ptr = <void*><void_ptr>cycode.cptr
-    cdef vector[cydriver.CUjit_option] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist)
-    if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))
-    if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))
-    cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]
-    pylist = [utils.HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))
-    if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))
-    err = cydriver.cuLibraryLoadData(<cydriver.CUlibrary*>library._ptr, cycode_ptr, cyjitOptions.data(), <void**><void_ptr>voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr, numLibraryOptions)
-    return (CUresult(err), library)
-{{endif}}
-
-{{if 'cuLibraryLoadFromFile' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryLoadFromFile(char* fileName, jitOptions : Optional[Tuple[CUjit_option] | List[CUjit_option]], jitOptionsValues : Optional[Tuple[Any] | List[Any]], unsigned int numJitOptions, libraryOptions : Optional[Tuple[CUlibraryOption] | List[CUlibraryOption]], libraryOptionValues : Optional[Tuple[Any] | List[Any]], unsigned int numLibraryOptions):
-    """ Load a library with specified file and options.
-
-    Takes a pointer `code` and loads the corresponding library `library`
-    based on the application defined library loading mode:
-
-    - If module loading is set to EAGER, via the environment variables
-      described in "Module loading", `library` is loaded eagerly into all
-      contexts at the time of the call and future contexts at the time of
-      creation until the library is unloaded with
-      :py:obj:`~.cuLibraryUnload()`.
-
-    - If the environment variables are set to LAZY, `library` is not
-      immediately loaded onto all existent contexts and will only be loaded
-      when a function is needed for that context, such as a kernel launch.
-
-    These environment variables are described in the CUDA programming guide
-    under the "CUDA environment variables" section.
-
-    The file should be a `cubin` file as output by nvcc, or a `PTX` file
-    either as output by nvcc or handwritten, or a `fatbin` file as output
-    by nvcc. A fatbin should also contain relocatable code when doing
-    separate compilation.
-
-    Options are passed as an array via `jitOptions` and any corresponding
-    parameters are passed in `jitOptionsValues`. The number of total
-    options is supplied via `numJitOptions`. Any outputs will be returned
-    via `jitOptionsValues`.
-
-    Library load options are passed as an array via `libraryOptions` and
-    any corresponding parameters are passed in `libraryOptionValues`. The
-    number of total library load options is supplied via
-    `numLibraryOptions`.
-
-    Parameters
-    ----------
-    fileName : bytes
-        File to load from
-    jitOptions : List[:py:obj:`~.CUjit_option`]
-        Options for JIT
-    jitOptionsValues : List[Any]
-        Option values for JIT
-    numJitOptions : unsigned int
-        Number of options
-    libraryOptions : List[:py:obj:`~.CUlibraryOption`]
-        Options for loading
-    libraryOptionValues : List[Any]
-        Option values for loading
-    numLibraryOptions : unsigned int
-        Number of options for loading
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_PTX`, :py:obj:`~.CUDA_ERROR_UNSUPPORTED_PTX_VERSION`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NO_BINARY_FOR_GPU`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_JIT_COMPILER_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    library : :py:obj:`~.CUlibrary`
-        Returned library
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`
-
-    Notes
-    -----
-    If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    """
-    libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues
-    libraryOptions = [] if libraryOptions is None else libraryOptions
-    if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions):
-        raise TypeError("Argument 'libraryOptions' is not instance of type (expected Tuple[cydriver.CUlibraryOption] or List[cydriver.CUlibraryOption]")
-    jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues
-    jitOptions = [] if jitOptions is None else jitOptions
-    if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions):
-        raise TypeError("Argument 'jitOptions' is not instance of type (expected Tuple[cydriver.CUjit_option] or List[cydriver.CUjit_option]")
-    cdef CUlibrary library = CUlibrary()
-    cdef vector[cydriver.CUjit_option] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]
-    pylist = [utils.HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = utils.InputVoidPtrPtrHelper(pylist)
-    if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))
-    if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))
-    cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]
-    pylist = [utils.HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = utils.InputVoidPtrPtrHelper(pylist)
-    if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))
-    if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))
-    err = cydriver.cuLibraryLoadFromFile(<cydriver.CUlibrary*>library._ptr, fileName, cyjitOptions.data(), <void**><void_ptr>voidStarHelperjitOptionsValues.cptr, numJitOptions, cylibraryOptions.data(), <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr, numLibraryOptions)
-    return (CUresult(err), library)
-{{endif}}
-
-{{if 'cuLibraryUnload' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryUnload(library):
-    """ Unloads a library.
-
-    Unloads the library specified with `library`
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to unload
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuModuleUnload`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    err = cydriver.cuLibraryUnload(cylibrary)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLibraryGetKernel' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetKernel(library, char* name):
-    """ Returns a kernel handle.
-
-    Returns in `pKernel` the handle of the kernel with name `name` located
-    in library `library`. If kernel handle is not found, the call returns
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to retrieve kernel from
-    name : bytes
-        Name of kernel to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    pKernel : :py:obj:`~.CUkernel`
-        Returned kernel handle
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuKernelGetFunction`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    cdef CUkernel pKernel = CUkernel()
-    err = cydriver.cuLibraryGetKernel(<cydriver.CUkernel*>pKernel._ptr, cylibrary, name)
-    return (CUresult(err), pKernel)
-{{endif}}
-
-{{if 'cuLibraryGetKernelCount' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetKernelCount(lib):
-    """ Returns the number of kernels within a library.
-
-    Returns in `count` the number of kernels in `lib`.
-
-    Parameters
-    ----------
-    lib : :py:obj:`~.CUlibrary`
-        Library to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    count : unsigned int
-        Number of kernels found within the library
-    """
-    cdef cydriver.CUlibrary cylib
-    if lib is None:
-        cylib = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(lib, (CUlibrary,)):
-        plib = int(lib)
-        cylib = <cydriver.CUlibrary><void_ptr>plib
-    else:
-        plib = int(CUlibrary(lib))
-        cylib = <cydriver.CUlibrary><void_ptr>plib
-    cdef unsigned int count = 0
-    err = cydriver.cuLibraryGetKernelCount(&count, cylib)
-    return (CUresult(err), count)
-{{endif}}
-
-{{if 'cuLibraryEnumerateKernels' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryEnumerateKernels(unsigned int numKernels, lib):
-    """ Retrieve the kernel handles within a library.
-
-    Returns in `kernels` a maximum number of `numKernels` kernel handles
-    within `lib`. The returned kernel handle becomes invalid when the
-    library is unloaded.
-
-    Parameters
-    ----------
-    numKernels : unsigned int
-        Maximum number of kernel handles may be returned to the buffer
-    lib : :py:obj:`~.CUlibrary`
-        Library to query from
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    kernels : List[:py:obj:`~.CUkernel`]
-        Buffer where the kernel handles are returned to
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryGetKernelCount`
-    """
-    cdef cydriver.CUlibrary cylib
-    if lib is None:
-        cylib = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(lib, (CUlibrary,)):
-        plib = int(lib)
-        cylib = <cydriver.CUlibrary><void_ptr>plib
-    else:
-        plib = int(CUlibrary(lib))
-        cylib = <cydriver.CUlibrary><void_ptr>plib
-    cdef cydriver.CUkernel* cykernels = NULL
-    pykernels = []
-    if numKernels != 0:
-        cykernels = <cydriver.CUkernel*>calloc(numKernels, sizeof(cydriver.CUkernel))
-        if cykernels is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cydriver.CUkernel)))
-    err = cydriver.cuLibraryEnumerateKernels(cykernels, numKernels, cylib)
-    if CUresult(err) == CUresult(0):
-        pykernels = [CUkernel(init_value=<void_ptr>cykernels[idx]) for idx in range(numKernels)]
-    if cykernels is not NULL:
-        free(cykernels)
-    return (CUresult(err), pykernels)
-{{endif}}
-
-{{if 'cuLibraryGetModule' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetModule(library):
-    """ Returns a module handle.
-
-    Returns in `pMod` the module handle associated with the current context
-    located in library `library`. If module handle is not found, the call
-    returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to retrieve module from
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-    pMod : :py:obj:`~.CUmodule`
-        Returned module handle
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleGetFunction`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    cdef CUmodule pMod = CUmodule()
-    err = cydriver.cuLibraryGetModule(<cydriver.CUmodule*>pMod._ptr, cylibrary)
-    return (CUresult(err), pMod)
-{{endif}}
-
-{{if 'cuKernelGetFunction' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelGetFunction(kernel):
-    """ Returns a function handle.
-
-    Returns in `pFunc` the handle of the function for the requested kernel
-    `kernel` and the current context. If function handle is not found, the
-    call returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    kernel : :py:obj:`~.CUkernel`
-        Kernel to retrieve function for the requested context
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-    pFunc : :py:obj:`~.CUfunction`
-        Returned function handle
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction`
-    """
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef CUfunction pFunc = CUfunction()
-    err = cydriver.cuKernelGetFunction(<cydriver.CUfunction*>pFunc._ptr, cykernel)
-    return (CUresult(err), pFunc)
-{{endif}}
-
-{{if 'cuKernelGetLibrary' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelGetLibrary(kernel):
-    """ Returns a library handle.
-
-    Returns in `pLib` the handle of the library for the requested kernel
-    `kernel`
-
-    Parameters
-    ----------
-    kernel : :py:obj:`~.CUkernel`
-        Kernel to retrieve library handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    pLib : :py:obj:`~.CUlibrary`
-        Returned library handle
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`
-    """
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef CUlibrary pLib = CUlibrary()
-    err = cydriver.cuKernelGetLibrary(<cydriver.CUlibrary*>pLib._ptr, cykernel)
-    return (CUresult(err), pLib)
-{{endif}}
-
-{{if 'cuLibraryGetGlobal' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetGlobal(library, char* name):
-    """ Returns a global device pointer.
-
-    Returns in `*dptr` and `*bytes` the base pointer and size of the global
-    with name `name` for the requested library `library` and the current
-    context. If no global for the requested name `name` exists, the call
-    returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`. One of the parameters `dptr`
-    or `numbytes` (not both) can be NULL in which case it is ignored.
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to retrieve global from
-    name : bytes
-        Name of global to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned global device pointer for the requested context
-    numbytes : int
-        Returned global size in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetGlobal`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    cdef size_t numbytes = 0
-    err = cydriver.cuLibraryGetGlobal(<cydriver.CUdeviceptr*>dptr._ptr, &numbytes, cylibrary, name)
-    return (CUresult(err), dptr, numbytes)
-{{endif}}
-
-{{if 'cuLibraryGetManaged' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetManaged(library, char* name):
-    """ Returns a pointer to managed memory.
-
-    Returns in `*dptr` and `*bytes` the base pointer and size of the
-    managed memory with name `name` for the requested library `library`. If
-    no managed memory with the requested name `name` exists, the call
-    returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`. One of the parameters `dptr`
-    or `numbytes` (not both) can be NULL in which case it is ignored. Note
-    that managed memory for library `library` is shared across devices and
-    is registered when the library is loaded into atleast one context.
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to retrieve managed memory from
-    name : bytes
-        Name of managed memory to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned pointer to the managed memory
-    numbytes : int
-        Returned memory size in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    cdef size_t numbytes = 0
-    err = cydriver.cuLibraryGetManaged(<cydriver.CUdeviceptr*>dptr._ptr, &numbytes, cylibrary, name)
-    return (CUresult(err), dptr, numbytes)
-{{endif}}
-
-{{if 'cuLibraryGetUnifiedFunction' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLibraryGetUnifiedFunction(library, char* symbol):
-    """ Returns a pointer to a unified function.
-
-    Returns in `*fptr` the function pointer to a unified function denoted
-    by `symbol`. If no unified function with name `symbol` exists, the call
-    returns :py:obj:`~.CUDA_ERROR_NOT_FOUND`. If there is no device with
-    attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS`
-    present in the system, the call may return
-    :py:obj:`~.CUDA_ERROR_NOT_FOUND`.
-
-    Parameters
-    ----------
-    library : :py:obj:`~.CUlibrary`
-        Library to retrieve function pointer memory from
-    symbol : bytes
-        Name of function pointer to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    fptr : Any
-        Returned pointer to a unified function
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`
-    """
-    cdef cydriver.CUlibrary cylibrary
-    if library is None:
-        cylibrary = <cydriver.CUlibrary><void_ptr>0
-    elif isinstance(library, (CUlibrary,)):
-        plibrary = int(library)
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    else:
-        plibrary = int(CUlibrary(library))
-        cylibrary = <cydriver.CUlibrary><void_ptr>plibrary
-    cdef void_ptr fptr = 0
-    err = cydriver.cuLibraryGetUnifiedFunction(<void**>&fptr, cylibrary, symbol)
-    return (CUresult(err), fptr)
-{{endif}}
-
-{{if 'cuKernelGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelGetAttribute(attrib not None : CUfunction_attribute, kernel, dev):
-    """ Returns information about a kernel.
-
-    Returns in `*pi` the integer value of the attribute `attrib` for the
-    kernel `kernel` for the requested device `dev`. The supported
-    attributes are:
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`: The maximum
-      number of threads per block, beyond which a launch of the kernel
-      would fail. This number depends on both the kernel and the requested
-      device.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`: The size in bytes of
-      statically-allocated shared memory per block required by this kernel.
-      This does not include dynamically-allocated shared memory requested
-      by the user at runtime.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`: The size in bytes of
-      user-allocated constant memory required by this kernel.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`: The size in bytes of
-      local memory used by each thread of this kernel.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NUM_REGS`: The number of registers used
-      by each thread of this kernel.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PTX_VERSION`: The PTX virtual
-      architecture version for which the kernel was compiled. This value is
-      the major PTX version * 10
-
-      - the minor PTX version, so a PTX version 1.3 function would return
-        the value 13. Note that this may return the undefined value of 0
-        for cubins compiled prior to CUDA 3.0.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_BINARY_VERSION`: The binary architecture
-      version for which the kernel was compiled. This value is the major
-      binary version * 10 + the minor binary version, so a binary version
-      1.3 function would return the value 13. Note that this will return a
-      value of 10 for legacy cubins that do not have a properly-encoded
-      binary architecture version.
-
-    - :py:obj:`~.CU_FUNC_CACHE_MODE_CA`: The attribute to indicate whether
-      the kernel has been compiled with user specified option "-Xptxas
-      --dlcm=ca" set.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`: The
-      maximum size in bytes of dynamically-allocated shared memory.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`:
-      Preferred shared memory-L1 cache split ratio in percent of total
-      shared memory.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET`: If this
-      attribute is set, the kernel must launch with a valid cluster size
-      specified.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`: The required
-      cluster width in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`: The required
-      cluster height in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`: The required
-      cluster depth in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`:
-      Indicates whether the function can be launched with non-portable
-      cluster size. 1 is allowed, 0 is disallowed. A non-portable cluster
-      size may only function on the specific SKUs the program is tested on.
-      The launch might fail if the program is run on a different hardware
-      platform. CUDA API provides cudaOccupancyMaxActiveClusters to assist
-      with checking whether the desired size can be launched on the current
-      device. A portable cluster size is guaranteed to be functional on all
-      compute capabilities higher than the target compute capability. The
-      portable cluster size for sm_90 is 8 blocks per cluster. This value
-      may increase for future compute capabilities. The specific hardware
-      unit may support higher cluster sizes that’s not guaranteed to be
-      portable.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`:
-      The block scheduling policy of a function. The value type is
-      CUclusterSchedulingPolicy.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUfunction_attribute`
-        Attribute requested
-    kernel : :py:obj:`~.CUkernel`
-        Kernel to query attribute of
-    dev : :py:obj:`~.CUdevice`
-        Device to query attribute of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    pi : int
-        Returned attribute value
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuKernelSetAttribute`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuKernelGetFunction`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuFuncGetAttribute`
-
-    Notes
-    -----
-    If another thread is trying to set the same attribute on the same device using :py:obj:`~.cuKernelSetAttribute()` simultaneously, the attribute query will give the old or new value depending on the interleavings chosen by the OS scheduler and memory consistency.
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef int pi = 0
-    cdef cydriver.CUfunction_attribute cyattrib = attrib.value
-    err = cydriver.cuKernelGetAttribute(&pi, cyattrib, cykernel, cydev)
-    return (CUresult(err), pi)
-{{endif}}
-
-{{if 'cuKernelSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelSetAttribute(attrib not None : CUfunction_attribute, int val, kernel, dev):
-    """ Sets information about a kernel.
-
-    This call sets the value of a specified attribute `attrib` on the
-    kernel `kernel` for the requested device `dev` to an integer value
-    specified by `val`. This function returns CUDA_SUCCESS if the new value
-    of the attribute could be successfully set. If the set fails, this call
-    will return an error. Not all attributes can have values set.
-    Attempting to set a value on a read-only attribute will result in an
-    error (CUDA_ERROR_INVALID_VALUE)
-
-    Note that attributes set using :py:obj:`~.cuFuncSetAttribute()` will
-    override the attribute set by this API irrespective of whether the call
-    to :py:obj:`~.cuFuncSetAttribute()` is made before or after this API
-    call. However, :py:obj:`~.cuKernelGetAttribute()` will always return
-    the attribute value set by this API.
-
-    Supported attributes are:
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`: This is
-      the maximum size in bytes of dynamically-allocated shared memory. The
-      value should contain the requested maximum size of dynamically-
-      allocated shared memory. The sum of this value and the function
-      attribute :py:obj:`~.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES` cannot
-      exceed the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN`.
-      The maximal size of requestable dynamic shared memory may differ by
-      GPU architecture.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`: On
-      devices where the L1 cache and shared memory use the same hardware
-      resources, this sets the shared memory carveout preference, in
-      percent of the total shared memory. See
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`
-      This is only a hint, and the driver can choose a different ratio if
-      required to execute the function.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`: The required
-      cluster width in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`: The required
-      cluster height in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`: The required
-      cluster depth in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`:
-      Indicates whether the function can be launched with non-portable
-      cluster size. 1 is allowed, 0 is disallowed.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`:
-      The block scheduling policy of a function. The value type is
-      CUclusterSchedulingPolicy.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUfunction_attribute`
-        Attribute requested
-    val : int
-        Value to set
-    kernel : :py:obj:`~.CUkernel`
-        Kernel to set attribute of
-    dev : :py:obj:`~.CUdevice`
-        Device to set attribute of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuKernelGetFunction`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuFuncSetAttribute`
-
-    Notes
-    -----
-    The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cuFuncSetAttribute()` due to device-wide semantics. If multiple threads are trying to set the same attribute on the same device simultaneously, the attribute setting will depend on the interleavings chosen by the OS scheduler and memory consistency.
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef cydriver.CUfunction_attribute cyattrib = attrib.value
-    err = cydriver.cuKernelSetAttribute(cyattrib, val, cykernel, cydev)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuKernelSetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelSetCacheConfig(kernel, config not None : CUfunc_cache, dev):
-    """ Sets the preferred cache configuration for a device kernel.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this sets through `config` the preferred cache configuration
-    for the device kernel `kernel` on the requested device `dev`. This is
-    only a preference. The driver will use the requested configuration if
-    possible, but it is free to choose a different configuration if
-    required to execute `kernel`. Any context-wide preference set via
-    :py:obj:`~.cuCtxSetCacheConfig()` will be overridden by this per-kernel
-    setting.
-
-    Note that attributes set using :py:obj:`~.cuFuncSetCacheConfig()` will
-    override the attribute set by this API irrespective of whether the call
-    to :py:obj:`~.cuFuncSetCacheConfig()` is made before or after this API
-    call.
-
-    This setting does nothing on devices where the size of the L1 cache and
-    shared memory are fixed.
-
-    Launching a kernel with a different preference than the most recent
-    preference setting may insert a device-side synchronization point.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`: no preference for shared
-      memory or L1 (default)
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_SHARED`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_L1`: prefer larger L1 cache and
-      smaller shared memory
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_EQUAL`: prefer equal sized L1 cache
-      and shared memory
-
-    Parameters
-    ----------
-    kernel : :py:obj:`~.CUkernel`
-        Kernel to configure cache for
-    config : :py:obj:`~.CUfunc_cache`
-        Requested cache configuration
-    dev : :py:obj:`~.CUdevice`
-        Device to set attribute of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelGetFunction`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuLaunchKernel`
-
-    Notes
-    -----
-    The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cuFuncSetCacheConfig()` due to device-wide semantics. If multiple threads are trying to set a config on the same device simultaneously, the cache config setting will depend on the interleavings chosen by the OS scheduler and memory consistency.
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef cydriver.CUfunc_cache cyconfig = config.value
-    err = cydriver.cuKernelSetCacheConfig(cykernel, cyconfig, cydev)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuKernelGetName' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelGetName(hfunc):
-    """ Returns the function name for a :py:obj:`~.CUkernel` handle.
-
-    Returns in `**name` the function name associated with the kernel handle
-    `hfunc` . The function name is returned as a null-terminated string.
-    The returned name is only valid when the kernel handle is valid. If the
-    library is unloaded or reloaded, one must call the API again to get the
-    updated name. This API may return a mangled name if the function is not
-    declared as having C linkage. If either `**name` or `hfunc` is NULL,
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUkernel`
-        The function handle to retrieve the name for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    name : bytes
-        The returned name of the function
-    """
-    cdef cydriver.CUkernel cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(hfunc, (CUkernel,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUkernel><void_ptr>phfunc
-    else:
-        phfunc = int(CUkernel(hfunc))
-        cyhfunc = <cydriver.CUkernel><void_ptr>phfunc
-    cdef const char* name = NULL
-    err = cydriver.cuKernelGetName(&name, cyhfunc)
-    return (CUresult(err), <bytes>name)
-{{endif}}
-
-{{if 'cuKernelGetParamInfo' in found_functions}}
-
-@cython.embedsignature(True)
-def cuKernelGetParamInfo(kernel, size_t paramIndex):
-    """ Returns the offset and size of a kernel parameter in the device-side parameter layout.
-
-    Queries the kernel parameter at `paramIndex` into `kernel's` list of
-    parameters, and returns in `paramOffset` and `paramSize` the offset and
-    size, respectively, where the parameter will reside in the device-side
-    parameter layout. This information can be used to update kernel node
-    parameters from the device via
-    :py:obj:`~.cudaGraphKernelNodeSetParam()` and
-    :py:obj:`~.cudaGraphKernelNodeUpdatesApply()`. `paramIndex` must be
-    less than the number of parameters that `kernel` takes. `paramSize` can
-    be set to NULL if only the parameter offset is desired.
-
-    Parameters
-    ----------
-    kernel : :py:obj:`~.CUkernel`
-        The kernel to query
-    paramIndex : size_t
-        The parameter index to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    paramOffset : int
-        Returns the offset into the device-side parameter layout at which
-        the parameter resides
-    paramSize : int
-        Optionally returns the size of the parameter in the device-side
-        parameter layout
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncGetParamInfo`
-    """
-    cdef cydriver.CUkernel cykernel
-    if kernel is None:
-        cykernel = <cydriver.CUkernel><void_ptr>0
-    elif isinstance(kernel, (CUkernel,)):
-        pkernel = int(kernel)
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    else:
-        pkernel = int(CUkernel(kernel))
-        cykernel = <cydriver.CUkernel><void_ptr>pkernel
-    cdef size_t paramOffset = 0
-    cdef size_t paramSize = 0
-    err = cydriver.cuKernelGetParamInfo(cykernel, paramIndex, &paramOffset, &paramSize)
-    return (CUresult(err), paramOffset, paramSize)
-{{endif}}
-
-{{if 'cuMemGetInfo_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetInfo():
-    """ Gets free and total memory.
-
-    Returns in `*total` the total amount of memory available to the the
-    current context. Returns in `*free` the amount of memory on the device
-    that is free according to the OS. CUDA is not guaranteed to be able to
-    allocate all of the memory that the OS reports as free. In a multi-
-    tenet situation, free estimate returned is prone to race condition
-    where a new allocation/free done by a different process or a different
-    thread in the same process between the time when free memory was
-    estimated and reported, will result in deviation in free value reported
-    and actual free memory.
-
-    The integrated GPU on Tegra shares memory with CPU and other component
-    of the SoC. The free and total values returned by the API excludes the
-    SWAP memory space maintained by the OS on some platforms. The OS may
-    move some of the memory pages into swap area as the GPU or CPU allocate
-    or access memory. See Tegra app note on how to calculate total and free
-    memory on Tegra.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    free : int
-        Returned free memory in bytes
-    total : int
-        Returned total memory in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemGetInfo`
-    """
-    cdef size_t free = 0
-    cdef size_t total = 0
-    err = cydriver.cuMemGetInfo(&free, &total)
-    return (CUresult(err), free, total)
-{{endif}}
-
-{{if 'cuMemAlloc_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAlloc(size_t bytesize):
-    """ Allocates device memory.
-
-    Allocates `bytesize` bytes of linear memory on the device and returns
-    in `*dptr` a pointer to the allocated memory. The allocated memory is
-    suitably aligned for any kind of variable. The memory is not cleared.
-    If `bytesize` is 0, :py:obj:`~.cuMemAlloc()` returns
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Requested allocation size in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMalloc`
-    """
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    err = cydriver.cuMemAlloc(<cydriver.CUdeviceptr*>dptr._ptr, bytesize)
-    return (CUresult(err), dptr)
-{{endif}}
-
-{{if 'cuMemAllocPitch_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAllocPitch(size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes):
-    """ Allocates pitched device memory.
-
-    Allocates at least `WidthInBytes` * `Height` bytes of linear memory on
-    the device and returns in `*dptr` a pointer to the allocated memory.
-    The function may pad the allocation to ensure that corresponding
-    pointers in any given row will continue to meet the alignment
-    requirements for coalescing as the address is updated from row to row.
-    `ElementSizeBytes` specifies the size of the largest reads and writes
-    that will be performed on the memory range. `ElementSizeBytes` may be
-    4, 8 or 16 (since coalesced memory transactions are not possible on
-    other data sizes). If `ElementSizeBytes` is smaller than the actual
-    read/write size of a kernel, the kernel will run correctly, but
-    possibly at reduced speed. The pitch returned in `*pPitch` by
-    :py:obj:`~.cuMemAllocPitch()` is the width in bytes of the allocation.
-    The intended usage of pitch is as a separate parameter of the
-    allocation, used to compute addresses within the 2D array. Given the
-    row and column of an array element of type T, the address is computed
-    as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The pitch returned by :py:obj:`~.cuMemAllocPitch()` is guaranteed to
-    work with :py:obj:`~.cuMemcpy2D()` under all circumstances. For
-    allocations of 2D arrays, it is recommended that programmers consider
-    performing pitch allocations using :py:obj:`~.cuMemAllocPitch()`. Due
-    to alignment restrictions in the hardware, this is especially true if
-    the application will be performing 2D memory copies between different
-    regions of device memory (whether linear memory or CUDA arrays).
-
-    The byte alignment of the pitch returned by
-    :py:obj:`~.cuMemAllocPitch()` is guaranteed to match or exceed the
-    alignment requirement for texture binding with
-    :py:obj:`~.cuTexRefSetAddress2D()`.
-
-    Parameters
-    ----------
-    WidthInBytes : size_t
-        Requested allocation width in bytes
-    Height : size_t
-        Requested allocation height in rows
-    ElementSizeBytes : unsigned int
-        Size of largest reads/writes for range
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-    pPitch : int
-        Returned pitch of allocation in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocPitch`
-    """
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    cdef size_t pPitch = 0
-    err = cydriver.cuMemAllocPitch(<cydriver.CUdeviceptr*>dptr._ptr, &pPitch, WidthInBytes, Height, ElementSizeBytes)
-    return (CUresult(err), dptr, pPitch)
-{{endif}}
-
-{{if 'cuMemFree_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemFree(dptr):
-    """ Frees device memory.
-
-    Frees the memory space pointed to by `dptr`, which must have been
-    returned by a previous call to one of the following memory allocation
-    APIs - :py:obj:`~.cuMemAlloc()`, :py:obj:`~.cuMemAllocPitch()`,
-    :py:obj:`~.cuMemAllocManaged()`, :py:obj:`~.cuMemAllocAsync()`,
-    :py:obj:`~.cuMemAllocFromPoolAsync()`
-
-    Note - This API will not perform any implict synchronization when the
-    pointer was allocated with :py:obj:`~.cuMemAllocAsync` or
-    :py:obj:`~.cuMemAllocFromPoolAsync`. Callers must ensure that all
-    accesses to these pointer have completed before invoking
-    :py:obj:`~.cuMemFree`. For best performance and memory reuse, users
-    should use :py:obj:`~.cuMemFreeAsync` to free memory allocated via the
-    stream ordered memory allocator. For all other pointers, this API may
-    perform implicit synchronization.
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        Pointer to memory to free
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemAllocFromPoolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaFree`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    err = cydriver.cuMemFree(cydptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemGetAddressRange_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetAddressRange(dptr):
-    """ Get information on memory allocations.
-
-    Returns the base address in `*pbase` and size in `*psize` of the
-    allocation by :py:obj:`~.cuMemAlloc()` or :py:obj:`~.cuMemAllocPitch()`
-    that contains the input pointer `dptr`. Both parameters `pbase` and
-    `psize` are optional. If one of them is NULL, it is ignored.
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        Device pointer to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pbase : :py:obj:`~.CUdeviceptr`
-        Returned base address
-    psize : int
-        Returned size of device memory allocation
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef CUdeviceptr pbase = CUdeviceptr()
-    cdef size_t psize = 0
-    err = cydriver.cuMemGetAddressRange(<cydriver.CUdeviceptr*>pbase._ptr, &psize, cydptr)
-    return (CUresult(err), pbase, psize)
-{{endif}}
-
-{{if 'cuMemAllocHost_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAllocHost(size_t bytesize):
-    """ Allocates page-locked host memory.
-
-    Allocates `bytesize` bytes of host memory that is page-locked and
-    accessible to the device. The driver tracks the virtual memory ranges
-    allocated with this function and automatically accelerates calls to
-    functions such as :py:obj:`~.cuMemcpy()`. Since the memory can be
-    accessed directly by the device, it can be read or written with much
-    higher bandwidth than pageable memory obtained with functions such as
-    :py:obj:`~.malloc()`.
-
-    On systems where
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`
-    is true, :py:obj:`~.cuMemAllocHost` may not page-lock the allocated
-    memory.
-
-    Page-locking excessive amounts of memory with
-    :py:obj:`~.cuMemAllocHost()` may degrade system performance, since it
-    reduces the amount of memory available to the system for paging. As a
-    result, this function is best used sparingly to allocate staging areas
-    for data exchange between host and device.
-
-    Note all host memory allocated using :py:obj:`~.cuMemAllocHost()` will
-    automatically be immediately accessible to all contexts on all devices
-    which support unified addressing (as may be queried using
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`). The device pointer
-    that may be used to access this host memory from those contexts is
-    always equal to the returned host pointer `*pp`. See :py:obj:`~.Unified
-    Addressing` for additional details.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Requested allocation size in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    pp : Any
-        Returned pointer to host memory
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocHost`
-    """
-    cdef void_ptr pp = 0
-    err = cydriver.cuMemAllocHost(<void**>&pp, bytesize)
-    return (CUresult(err), pp)
-{{endif}}
-
-{{if 'cuMemFreeHost' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemFreeHost(p):
-    """ Frees page-locked host memory.
-
-    Frees the memory space pointed to by `p`, which must have been returned
-    by a previous call to :py:obj:`~.cuMemAllocHost()`.
-
-    Parameters
-    ----------
-    p : Any
-        Pointer to memory to free
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaFreeHost`
-    """
-    cyp = utils.HelperInputVoidPtr(p)
-    cdef void* cyp_ptr = <void*><void_ptr>cyp.cptr
-    err = cydriver.cuMemFreeHost(cyp_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemHostAlloc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemHostAlloc(size_t bytesize, unsigned int Flags):
-    """ Allocates page-locked host memory.
-
-    Allocates `bytesize` bytes of host memory that is page-locked and
-    accessible to the device. The driver tracks the virtual memory ranges
-    allocated with this function and automatically accelerates calls to
-    functions such as :py:obj:`~.cuMemcpyHtoD()`. Since the memory can be
-    accessed directly by the device, it can be read or written with much
-    higher bandwidth than pageable memory obtained with functions such as
-    :py:obj:`~.malloc()`.
-
-    On systems where
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`
-    is true, :py:obj:`~.cuMemHostAlloc` may not page-lock the allocated
-    memory.
-
-    Page-locking excessive amounts of memory may degrade system
-    performance, since it reduces the amount of memory available to the
-    system for paging. As a result, this function is best used sparingly to
-    allocate staging areas for data exchange between host and device.
-
-    The `Flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.CU_MEMHOSTALLOC_PORTABLE`: The memory returned by this
-      call will be considered as pinned memory by all CUDA contexts, not
-      just the one that performed the allocation.
-
-    - :py:obj:`~.CU_MEMHOSTALLOC_DEVICEMAP`: Maps the allocation into the
-      CUDA address space. The device pointer to the memory may be obtained
-      by calling :py:obj:`~.cuMemHostGetDevicePointer()`.
-
-    - :py:obj:`~.CU_MEMHOSTALLOC_WRITECOMBINED`: Allocates the memory as
-      write-combined (WC). WC memory can be transferred across the PCI
-      Express bus more quickly on some system configurations, but cannot be
-      read efficiently by most CPUs. WC memory is a good option for buffers
-      that will be written by the CPU and read by the GPU via mapped pinned
-      memory or host->device transfers.
-
-    All of these flags are orthogonal to one another: a developer may
-    allocate memory that is portable, mapped and/or write-combined with no
-    restrictions.
-
-    The :py:obj:`~.CU_MEMHOSTALLOC_DEVICEMAP` flag may be specified on CUDA
-    contexts for devices that do not support mapped pinned memory. The
-    failure is deferred to :py:obj:`~.cuMemHostGetDevicePointer()` because
-    the memory may be mapped into other CUDA contexts via the
-    :py:obj:`~.CU_MEMHOSTALLOC_PORTABLE` flag.
-
-    The memory allocated by this function must be freed with
-    :py:obj:`~.cuMemFreeHost()`.
-
-    Note all host memory allocated using :py:obj:`~.cuMemHostAlloc()` will
-    automatically be immediately accessible to all contexts on all devices
-    which support unified addressing (as may be queried using
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`). Unless the flag
-    :py:obj:`~.CU_MEMHOSTALLOC_WRITECOMBINED` is specified, the device
-    pointer that may be used to access this host memory from those contexts
-    is always equal to the returned host pointer `*pp`. If the flag
-    :py:obj:`~.CU_MEMHOSTALLOC_WRITECOMBINED` is specified, then the
-    function :py:obj:`~.cuMemHostGetDevicePointer()` must be used to query
-    the device pointer, even if the context supports unified addressing.
-    See :py:obj:`~.Unified Addressing` for additional details.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Requested allocation size in bytes
-    Flags : unsigned int
-        Flags for allocation request
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    pp : Any
-        Returned pointer to host memory
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaHostAlloc`
-    """
-    cdef void_ptr pp = 0
-    err = cydriver.cuMemHostAlloc(<void**>&pp, bytesize, Flags)
-    return (CUresult(err), pp)
-{{endif}}
-
-{{if 'cuMemHostGetDevicePointer_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemHostGetDevicePointer(p, unsigned int Flags):
-    """ Passes back device pointer of mapped pinned memory.
-
-    Passes back the device pointer `pdptr` corresponding to the mapped,
-    pinned host buffer `p` allocated by :py:obj:`~.cuMemHostAlloc`.
-
-    :py:obj:`~.cuMemHostGetDevicePointer()` will fail if the
-    :py:obj:`~.CU_MEMHOSTALLOC_DEVICEMAP` flag was not specified at the
-    time the memory was allocated, or if the function is called on a GPU
-    that does not support mapped pinned memory.
-
-    For devices that have a non-zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM`,
-    the memory can also be accessed from the device using the host pointer
-    `p`. The device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` may or may not match the
-    original host pointer `p` and depends on the devices visible to the
-    application. If all devices visible to the application have a non-zero
-    value for the device attribute, the device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` will match the original pointer
-    `p`. If any device visible to the application has a zero value for the
-    device attribute, the device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` will not match the original
-    host pointer `p`, but it will be suitable for use on all devices
-    provided Unified Virtual Addressing is enabled. In such systems, it is
-    valid to access the memory using either pointer on devices that have a
-    non-zero value for the device attribute. Note however that such devices
-    should access the memory using only one of the two pointers and not
-    both.
-
-    `Flags` provides for future releases. For now, it must be set to 0.
-
-    Parameters
-    ----------
-    p : Any
-        Host pointer
-    Flags : unsigned int
-        Options (must be 0)
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pdptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaHostGetDevicePointer`
-    """
-    cdef CUdeviceptr pdptr = CUdeviceptr()
-    cyp = utils.HelperInputVoidPtr(p)
-    cdef void* cyp_ptr = <void*><void_ptr>cyp.cptr
-    err = cydriver.cuMemHostGetDevicePointer(<cydriver.CUdeviceptr*>pdptr._ptr, cyp_ptr, Flags)
-    return (CUresult(err), pdptr)
-{{endif}}
-
-{{if 'cuMemHostGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemHostGetFlags(p):
-    """ Passes back flags that were used for a pinned allocation.
-
-    Passes back the flags `pFlags` that were specified when allocating the
-    pinned host buffer `p` allocated by :py:obj:`~.cuMemHostAlloc`.
-
-    :py:obj:`~.cuMemHostGetFlags()` will fail if the pointer does not
-    reside in an allocation performed by :py:obj:`~.cuMemAllocHost()` or
-    :py:obj:`~.cuMemHostAlloc()`.
-
-    Parameters
-    ----------
-    p : Any
-        Host pointer
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pFlags : unsigned int
-        Returned flags word
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cudaHostGetFlags`
-    """
-    cdef unsigned int pFlags = 0
-    cyp = utils.HelperInputVoidPtr(p)
-    cdef void* cyp_ptr = <void*><void_ptr>cyp.cptr
-    err = cydriver.cuMemHostGetFlags(&pFlags, cyp_ptr)
-    return (CUresult(err), pFlags)
-{{endif}}
-
-{{if 'cuMemAllocManaged' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAllocManaged(size_t bytesize, unsigned int flags):
-    """ Allocates memory that will be automatically managed by the Unified Memory system.
-
-    Allocates `bytesize` bytes of managed memory on the device and returns
-    in `*dptr` a pointer to the allocated memory. If the device doesn't
-    support allocating managed memory, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    is returned. Support for managed memory can be queried using the device
-    attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY`. The allocated
-    memory is suitably aligned for any kind of variable. The memory is not
-    cleared. If `bytesize` is 0, :py:obj:`~.cuMemAllocManaged` returns
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`. The pointer is valid on the CPU
-    and on all GPUs in the system that support managed memory. All accesses
-    to this pointer must obey the Unified Memory programming model.
-
-    `flags` specifies the default stream association for this allocation.
-    `flags` must be one of :py:obj:`~.CU_MEM_ATTACH_GLOBAL` or
-    :py:obj:`~.CU_MEM_ATTACH_HOST`. If :py:obj:`~.CU_MEM_ATTACH_GLOBAL` is
-    specified, then this memory is accessible from any stream on any
-    device. If :py:obj:`~.CU_MEM_ATTACH_HOST` is specified, then the
-    allocation should not be accessed from devices that have a zero value
-    for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`; an explicit
-    call to :py:obj:`~.cuStreamAttachMemAsync` will be required to enable
-    access on such devices.
-
-    If the association is later changed via
-    :py:obj:`~.cuStreamAttachMemAsync` to a single stream, the default
-    association as specified during :py:obj:`~.cuMemAllocManaged` is
-    restored when that stream is destroyed. For managed variables, the
-    default association is always :py:obj:`~.CU_MEM_ATTACH_GLOBAL`. Note
-    that destroying a stream is an asynchronous operation, and as a result,
-    the change to default association won't happen until all work in the
-    stream has completed.
-
-    Memory allocated with :py:obj:`~.cuMemAllocManaged` should be released
-    with :py:obj:`~.cuMemFree`.
-
-    Device memory oversubscription is possible for GPUs that have a non-
-    zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Managed
-    memory on such GPUs may be evicted from device memory to host memory at
-    any time by the Unified Memory driver in order to make room for other
-    allocations.
-
-    In a system where all GPUs have a non-zero value for the device
-    attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`,
-    managed memory may not be populated when this API returns and instead
-    may be populated on access. In such systems, managed memory can migrate
-    to any processor's memory at any time. The Unified Memory driver will
-    employ heuristics to maintain data locality and prevent excessive page
-    faults to the extent possible. The application can also guide the
-    driver about memory usage patterns via :py:obj:`~.cuMemAdvise`. The
-    application can also explicitly migrate memory to a desired processor's
-    memory via :py:obj:`~.cuMemPrefetchAsync`.
-
-    In a multi-GPU system where all of the GPUs have a zero value for the
-    device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` and all the
-    GPUs have peer-to-peer support with each other, the physical storage
-    for managed memory is created on the GPU which is active at the time
-    :py:obj:`~.cuMemAllocManaged` is called. All other GPUs will reference
-    the data at reduced bandwidth via peer mappings over the PCIe bus. The
-    Unified Memory driver does not migrate memory among such GPUs.
-
-    In a multi-GPU system where not all GPUs have peer-to-peer support with
-    each other and where the value of the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` is zero for
-    at least one of those GPUs, the location chosen for physical storage of
-    managed memory is system-dependent.
-
-    - On Linux, the location chosen will be device memory as long as the
-      current set of active contexts are on devices that either have peer-
-      to-peer support with each other or have a non-zero value for the
-      device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. If there
-      is an active context on a GPU that does not have a non-zero value for
-      that device attribute and it does not have peer-to-peer support with
-      the other devices that have active contexts on them, then the
-      location for physical storage will be 'zero-copy' or host memory.
-      Note that this means that managed memory that is located in device
-      memory is migrated to host memory if a new context is created on a
-      GPU that doesn't have a non-zero value for the device attribute and
-      does not support peer-to-peer with at least one of the other devices
-      that has an active context. This in turn implies that context
-      creation may fail if there is insufficient host memory to migrate all
-      managed allocations.
-
-    - On Windows, the physical storage is always created in 'zero-copy' or
-      host memory. All GPUs will reference the data at reduced bandwidth
-      over the PCIe bus. In these circumstances, use of the environment
-      variable CUDA_VISIBLE_DEVICES is recommended to restrict CUDA to only
-      use those GPUs that have peer-to-peer support. Alternatively, users
-      can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to
-      force the driver to always use device memory for physical storage.
-      When this environment variable is set to a non-zero value, all
-      contexts created in that process on devices that support managed
-      memory have to be peer-to-peer compatible with each other. Context
-      creation will fail if a context is created on a device that supports
-      managed memory and is not peer-to-peer compatible with any of the
-      other managed memory supporting devices on which contexts were
-      previously created, even if those contexts have been destroyed. These
-      environment variables are described in the CUDA programming guide
-      under the "CUDA environment variables" section.
-
-    - On ARM, managed memory is not available on discrete gpu with Drive
-      PX-2.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Requested allocation size in bytes
-    flags : unsigned int
-        Must be one of :py:obj:`~.CU_MEM_ATTACH_GLOBAL` or
-        :py:obj:`~.CU_MEM_ATTACH_HOST`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cudaMallocManaged`
-    """
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    err = cydriver.cuMemAllocManaged(<cydriver.CUdeviceptr*>dptr._ptr, bytesize, flags)
-    return (CUresult(err), dptr)
-{{endif}}
-
-{{if 'cuDeviceRegisterAsyncNotification' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceRegisterAsyncNotification(device, callbackFunc, userData):
-    """ Registers a callback function to receive async notifications.
-
-    Registers `callbackFunc` to receive async notifications.
-
-    The `userData` parameter is passed to the callback function at async
-    notification time.   Likewise, `callback` is also passed to the
-    callback function to distinguish between multiple registered callbacks.
-
-    The callback function being registered should be designed to return
-    quickly (~10ms).   Any long running tasks should be queued for
-    execution on an application thread.
-
-    Callbacks may not call cuDeviceRegisterAsyncNotification or
-    cuDeviceUnregisterAsyncNotification. Doing so will result in
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. Async notification callbacks
-    execute in an undefined order and may be serialized.
-
-    Returns in `*callback` a handle representing the registered callback
-    instance.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        The device on which to register the callback
-    callbackFunc : :py:obj:`~.CUasyncCallback`
-        The function to register as a callback
-    userData : Any
-        A generic pointer to user data. This is passed into the callback
-        function.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` :py:obj:`~.CUDA_ERROR_INVALID_DEVICE` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    callback : :py:obj:`~.CUasyncCallbackHandle`
-        A handle representing the registered callback instance
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceUnregisterAsyncNotification`
-    """
-    cdef cydriver.CUasyncCallback cycallbackFunc
-    if callbackFunc is None:
-        cycallbackFunc = <cydriver.CUasyncCallback><void_ptr>0
-    elif isinstance(callbackFunc, (CUasyncCallback,)):
-        pcallbackFunc = int(callbackFunc)
-        cycallbackFunc = <cydriver.CUasyncCallback><void_ptr>pcallbackFunc
-    else:
-        pcallbackFunc = int(CUasyncCallback(callbackFunc))
-        cycallbackFunc = <cydriver.CUasyncCallback><void_ptr>pcallbackFunc
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    cdef CUasyncCallbackHandle callback = CUasyncCallbackHandle()
-    err = cydriver.cuDeviceRegisterAsyncNotification(cydevice, cycallbackFunc, cyuserData_ptr, <cydriver.CUasyncCallbackHandle*>callback._ptr)
-    return (CUresult(err), callback)
-{{endif}}
-
-{{if 'cuDeviceUnregisterAsyncNotification' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceUnregisterAsyncNotification(device, callback):
-    """ Unregisters an async notification callback.
-
-    Unregisters `callback` so that the corresponding callback function will
-    stop receiving async notifications.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        The device from which to remove `callback`.
-    callback : :py:obj:`~.CUasyncCallbackHandle`
-        The callback instance to unregister from receiving async
-        notifications.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` :py:obj:`~.CUDA_ERROR_INVALID_DEVICE` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` :py:obj:`~.CUDA_ERROR_UNKNOWN`
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceRegisterAsyncNotification`
-    """
-    cdef cydriver.CUasyncCallbackHandle cycallback
-    if callback is None:
-        cycallback = <cydriver.CUasyncCallbackHandle><void_ptr>0
-    elif isinstance(callback, (CUasyncCallbackHandle,)):
-        pcallback = int(callback)
-        cycallback = <cydriver.CUasyncCallbackHandle><void_ptr>pcallback
-    else:
-        pcallback = int(CUasyncCallbackHandle(callback))
-        cycallback = <cydriver.CUasyncCallbackHandle><void_ptr>pcallback
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    err = cydriver.cuDeviceUnregisterAsyncNotification(cydevice, cycallback)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceGetByPCIBusId' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetByPCIBusId(char* pciBusId):
-    """ Returns a handle to a compute device.
-
-    Returns in `*device` a device handle given a PCI bus ID string.
-
-    where `domain`, `bus`, `device`, and `function` are all hexadecimal
-    values
-
-    Parameters
-    ----------
-    pciBusId : bytes
-        String in one of the following forms:
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    dev : :py:obj:`~.CUdevice`
-        Returned device handle
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetPCIBusId`, :py:obj:`~.cudaDeviceGetByPCIBusId`
-    """
-    cdef CUdevice dev = CUdevice()
-    err = cydriver.cuDeviceGetByPCIBusId(<cydriver.CUdevice*>dev._ptr, pciBusId)
-    return (CUresult(err), dev)
-{{endif}}
-
-{{if 'cuDeviceGetPCIBusId' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetPCIBusId(int length, dev):
-    """ Returns a PCI Bus Id string for the device.
-
-    Returns an ASCII string identifying the device `dev` in the NULL-
-    terminated string pointed to by `pciBusId`. `length` specifies the
-    maximum length of the string that may be returned.
-
-    where `domain`, `bus`, `device`, and `function` are all hexadecimal
-    values. pciBusId should be large enough to store 13 characters
-    including the NULL-terminator.
-
-    Parameters
-    ----------
-    length : int
-        Maximum length of string to store in `name`
-    dev : :py:obj:`~.CUdevice`
-        Device to get identifier string for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    pciBusId : bytes
-        Returned identifier string for the device in the following format
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetByPCIBusId`, :py:obj:`~.cudaDeviceGetPCIBusId`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    pypciBusId = b" " * length
-    cdef char* pciBusId = pypciBusId
-    err = cydriver.cuDeviceGetPCIBusId(pciBusId, length, cydev)
-    return (CUresult(err), pypciBusId)
-{{endif}}
-
-{{if 'cuIpcGetEventHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuIpcGetEventHandle(event):
-    """ Gets an interprocess handle for a previously allocated event.
-
-    Takes as input a previously allocated event. This event must have been
-    created with the :py:obj:`~.CU_EVENT_INTERPROCESS` and
-    :py:obj:`~.CU_EVENT_DISABLE_TIMING` flags set. This opaque handle may
-    be copied into other processes and opened with
-    :py:obj:`~.cuIpcOpenEventHandle` to allow efficient hardware
-    synchronization between GPU work in different processes.
-
-    After the event has been opened in the importing process,
-    :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventSynchronize`,
-    :py:obj:`~.cuStreamWaitEvent` and :py:obj:`~.cuEventQuery` may be used
-    in either process. Performing operations on the imported event after
-    the exported event has been freed with :py:obj:`~.cuEventDestroy` will
-    result in undefined behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED`
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event allocated with :py:obj:`~.CU_EVENT_INTERPROCESS` and
-        :py:obj:`~.CU_EVENT_DISABLE_TIMING` flags.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_MAP_FAILED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pHandle : :py:obj:`~.CUipcEventHandle`
-        Pointer to a user allocated CUipcEventHandle in which to return the
-        opaque event handle
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcGetEventHandle`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    cdef CUipcEventHandle pHandle = CUipcEventHandle()
-    err = cydriver.cuIpcGetEventHandle(<cydriver.CUipcEventHandle*>pHandle._ptr, cyevent)
-    return (CUresult(err), pHandle)
-{{endif}}
-
-{{if 'cuIpcOpenEventHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuIpcOpenEventHandle(handle not None : CUipcEventHandle):
-    """ Opens an interprocess event handle for use in the current process.
-
-    Opens an interprocess event handle exported from another process with
-    :py:obj:`~.cuIpcGetEventHandle`. This function returns a
-    :py:obj:`~.CUevent` that behaves like a locally created event with the
-    :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag specified. This event must be
-    freed with :py:obj:`~.cuEventDestroy`.
-
-    Performing operations on the imported event after the exported event
-    has been freed with :py:obj:`~.cuEventDestroy` will result in undefined
-    behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED`
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.CUipcEventHandle`
-        Interprocess handle to open
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_MAP_FAILED`, :py:obj:`~.CUDA_ERROR_PEER_ACCESS_UNSUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phEvent : :py:obj:`~.CUevent`
-        Returns the imported event
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcOpenEventHandle`
-    """
-    cdef CUevent phEvent = CUevent()
-    err = cydriver.cuIpcOpenEventHandle(<cydriver.CUevent*>phEvent._ptr, handle._ptr[0])
-    return (CUresult(err), phEvent)
-{{endif}}
-
-{{if 'cuIpcGetMemHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuIpcGetMemHandle(dptr):
-    """ Gets an interprocess memory handle for an existing device memory allocation.
-
-    Takes a pointer to the base of an existing device memory allocation
-    created with :py:obj:`~.cuMemAlloc` and exports it for use in another
-    process. This is a lightweight operation and may be called multiple
-    times on an allocation without adverse effects.
-
-    If a region of memory is freed with :py:obj:`~.cuMemFree` and a
-    subsequent call to :py:obj:`~.cuMemAlloc` returns memory with the same
-    device address, :py:obj:`~.cuIpcGetMemHandle` will return a unique
-    handle for the new memory.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED`
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        Base pointer to previously allocated device memory
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_MAP_FAILED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pHandle : :py:obj:`~.CUipcMemHandle`
-        Pointer to user allocated :py:obj:`~.CUipcMemHandle` to return the
-        handle in.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcGetMemHandle`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef CUipcMemHandle pHandle = CUipcMemHandle()
-    err = cydriver.cuIpcGetMemHandle(<cydriver.CUipcMemHandle*>pHandle._ptr, cydptr)
-    return (CUresult(err), pHandle)
-{{endif}}
-
-{{if 'cuIpcOpenMemHandle_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuIpcOpenMemHandle(handle not None : CUipcMemHandle, unsigned int Flags):
-    """ Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process.
-
-    Maps memory exported from another process with
-    :py:obj:`~.cuIpcGetMemHandle` into the current device address space.
-    For contexts on different devices :py:obj:`~.cuIpcOpenMemHandle` can
-    attempt to enable peer access between the devices as if the user called
-    :py:obj:`~.cuCtxEnablePeerAccess`. This behavior is controlled by the
-    :py:obj:`~.CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS` flag.
-    :py:obj:`~.cuDeviceCanAccessPeer` can determine if a mapping is
-    possible.
-
-    Contexts that may open :py:obj:`~.CUipcMemHandles` are restricted in
-    the following way. :py:obj:`~.CUipcMemHandles` from each
-    :py:obj:`~.CUdevice` in a given process may only be opened by one
-    :py:obj:`~.CUcontext` per :py:obj:`~.CUdevice` per other process.
-
-    If the memory handle has already been opened by the current context,
-    the reference count on the handle is incremented by 1 and the existing
-    device pointer is returned.
-
-    Memory returned from :py:obj:`~.cuIpcOpenMemHandle` must be freed with
-    :py:obj:`~.cuIpcCloseMemHandle`.
-
-    Calling :py:obj:`~.cuMemFree` on an exported memory region before
-    calling :py:obj:`~.cuIpcCloseMemHandle` in the importing context will
-    result in undefined behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED`
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.CUipcMemHandle`
-        :py:obj:`~.CUipcMemHandle` to open
-    Flags : unsigned int
-        Flags for this operation. Must be specified as
-        :py:obj:`~.CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_MAP_FAILED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_TOO_MANY_PEERS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pdptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`, :py:obj:`~.cudaIpcOpenMemHandle`
-
-    Notes
-    -----
-    No guarantees are made about the address returned in `*pdptr`. In particular, multiple processes may not receive the same address for the same `handle`.
-    """
-    cdef CUdeviceptr pdptr = CUdeviceptr()
-    err = cydriver.cuIpcOpenMemHandle(<cydriver.CUdeviceptr*>pdptr._ptr, handle._ptr[0], Flags)
-    return (CUresult(err), pdptr)
-{{endif}}
-
-{{if 'cuIpcCloseMemHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuIpcCloseMemHandle(dptr):
-    """ Attempts to close memory mapped with :py:obj:`~.cuIpcOpenMemHandle`.
-
-    Decrements the reference count of the memory returned by
-    :py:obj:`~.cuIpcOpenMemHandle` by 1. When the reference count reaches
-    0, this API unmaps the memory. The original allocation in the exporting
-    process as well as imported mappings in other processes will be
-    unaffected.
-
-    Any resources used to enable peer access will be freed if this is the
-    last mapping using them.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cuapiDeviceGetAttribute` with
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED`
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        Device pointer returned by :py:obj:`~.cuIpcOpenMemHandle`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_MAP_FAILED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    err = cydriver.cuIpcCloseMemHandle(cydptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemHostRegister_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemHostRegister(p, size_t bytesize, unsigned int Flags):
-    """ Registers an existing host memory range for use by CUDA.
-
-    Page-locks the memory range specified by `p` and `bytesize` and maps it
-    for the device(s) as specified by `Flags`. This memory range also is
-    added to the same tracking mechanism as :py:obj:`~.cuMemHostAlloc` to
-    automatically accelerate calls to functions such as
-    :py:obj:`~.cuMemcpyHtoD()`. Since the memory can be accessed directly
-    by the device, it can be read or written with much higher bandwidth
-    than pageable memory that has not been registered. Page-locking
-    excessive amounts of memory may degrade system performance, since it
-    reduces the amount of memory available to the system for paging. As a
-    result, this function is best used sparingly to register staging areas
-    for data exchange between host and device.
-
-    On systems where
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`
-    is true, :py:obj:`~.cuMemHostRegister` will not page-lock the memory
-    range specified by `ptr` but only populate unpopulated pages.
-
-    The `Flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.CU_MEMHOSTREGISTER_PORTABLE`: The memory returned by this
-      call will be considered as pinned memory by all CUDA contexts, not
-      just the one that performed the allocation.
-
-    - :py:obj:`~.CU_MEMHOSTREGISTER_DEVICEMAP`: Maps the allocation into
-      the CUDA address space. The device pointer to the memory may be
-      obtained by calling :py:obj:`~.cuMemHostGetDevicePointer()`.
-
-    - :py:obj:`~.CU_MEMHOSTREGISTER_IOMEMORY`: The pointer is treated as
-      pointing to some I/O memory space, e.g. the PCI Express resource of a
-      3rd party device.
-
-    - :py:obj:`~.CU_MEMHOSTREGISTER_READ_ONLY`: The pointer is treated as
-      pointing to memory that is considered read-only by the device. On
-      platforms without
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      this flag is required in order to register memory mapped to the CPU
-      as read-only. Support for the use of this flag can be queried from
-      the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED`.
-      Using this flag with a current context associated with a device that
-      does not have this attribute set will cause
-      :py:obj:`~.cuMemHostRegister` to error with CUDA_ERROR_NOT_SUPPORTED.
-
-    All of these flags are orthogonal to one another: a developer may page-
-    lock memory that is portable or mapped with no restrictions.
-
-    The :py:obj:`~.CU_MEMHOSTREGISTER_DEVICEMAP` flag may be specified on
-    CUDA contexts for devices that do not support mapped pinned memory. The
-    failure is deferred to :py:obj:`~.cuMemHostGetDevicePointer()` because
-    the memory may be mapped into other CUDA contexts via the
-    :py:obj:`~.CU_MEMHOSTREGISTER_PORTABLE` flag.
-
-    For devices that have a non-zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM`,
-    the memory can also be accessed from the device using the host pointer
-    `p`. The device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` may or may not match the
-    original host pointer `ptr` and depends on the devices visible to the
-    application. If all devices visible to the application have a non-zero
-    value for the device attribute, the device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` will match the original pointer
-    `ptr`. If any device visible to the application has a zero value for
-    the device attribute, the device pointer returned by
-    :py:obj:`~.cuMemHostGetDevicePointer()` will not match the original
-    host pointer `ptr`, but it will be suitable for use on all devices
-    provided Unified Virtual Addressing is enabled. In such systems, it is
-    valid to access the memory using either pointer on devices that have a
-    non-zero value for the device attribute. Note however that such devices
-    should access the memory using only of the two pointers and not both.
-
-    The memory page-locked by this function must be unregistered with
-    :py:obj:`~.cuMemHostUnregister()`.
-
-    Parameters
-    ----------
-    p : Any
-        Host pointer to memory to page-lock
-    bytesize : size_t
-        Size in bytes of the address range to page-lock
-    Flags : unsigned int
-        Flags for allocation request
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemHostUnregister`, :py:obj:`~.cuMemHostGetFlags`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cudaHostRegister`
-    """
-    cyp = utils.HelperInputVoidPtr(p)
-    cdef void* cyp_ptr = <void*><void_ptr>cyp.cptr
-    err = cydriver.cuMemHostRegister(cyp_ptr, bytesize, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemHostUnregister' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemHostUnregister(p):
-    """ Unregisters a memory range that was registered with cuMemHostRegister.
-
-    Unmaps the memory range whose base address is specified by `p`, and
-    makes it pageable again.
-
-    The base address must be the same one specified to
-    :py:obj:`~.cuMemHostRegister()`.
-
-    Parameters
-    ----------
-    p : Any
-        Host pointer to memory to unregister
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED`,
-
-    See Also
-    --------
-    :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cudaHostUnregister`
-    """
-    cyp = utils.HelperInputVoidPtr(p)
-    cdef void* cyp_ptr = <void*><void_ptr>cyp.cptr
-    err = cydriver.cuMemHostUnregister(cyp_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy(dst, src, size_t ByteCount):
-    """ Copies memory.
-
-    Copies data between two pointers. `dst` and `src` are base pointers of
-    the destination and source, respectively. `ByteCount` specifies the
-    number of bytes to copy. Note that this function infers the type of the
-    transfer (host to host, host to device, device to device, or device to
-    host) from the pointer values. This function is only allowed in
-    contexts which support unified addressing.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUdeviceptr`
-        Destination unified virtual address space pointer
-    src : :py:obj:`~.CUdeviceptr`
-        Source unified virtual address space pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`
-    """
-    cdef cydriver.CUdeviceptr cysrc
-    if src is None:
-        cysrc = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(src, (CUdeviceptr,)):
-        psrc = int(src)
-        cysrc = <cydriver.CUdeviceptr><void_ptr>psrc
-    else:
-        psrc = int(CUdeviceptr(src))
-        cysrc = <cydriver.CUdeviceptr><void_ptr>psrc
-    cdef cydriver.CUdeviceptr cydst
-    if dst is None:
-        cydst = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dst, (CUdeviceptr,)):
-        pdst = int(dst)
-        cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-    else:
-        pdst = int(CUdeviceptr(dst))
-        cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-    err = cydriver.cuMemcpy(cydst, cysrc, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, size_t ByteCount):
-    """ Copies device memory between two contexts.
-
-    Copies from device memory in one context to device memory in another
-    context. `dstDevice` is the base device pointer of the destination
-    memory and `dstContext` is the destination context. `srcDevice` is the
-    base device pointer of the source memory and `srcContext` is the source
-    pointer. `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstContext : :py:obj:`~.CUcontext`
-        Destination context
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    srcContext : :py:obj:`~.CUcontext`
-        Source context
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy3DPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpyPeer`
-    """
-    cdef cydriver.CUcontext cysrcContext
-    if srcContext is None:
-        cysrcContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(srcContext, (CUcontext,)):
-        psrcContext = int(srcContext)
-        cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-    else:
-        psrcContext = int(CUcontext(srcContext))
-        cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cdef cydriver.CUcontext cydstContext
-    if dstContext is None:
-        cydstContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(dstContext, (CUcontext,)):
-        pdstContext = int(dstContext)
-        cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-    else:
-        pdstContext = int(CUcontext(dstContext))
-        cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemcpyPeer(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyHtoD_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyHtoD(dstDevice, srcHost, size_t ByteCount):
-    """ Copies memory from Host to Device.
-
-    Copies from host memory to device memory. `dstDevice` and `srcHost` are
-    the base addresses of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    srcHost : Any
-        Source host pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    cysrcHost = utils.HelperInputVoidPtr(srcHost)
-    cdef void* cysrcHost_ptr = <void*><void_ptr>cysrcHost.cptr
-    err = cydriver.cuMemcpyHtoD(cydstDevice, cysrcHost_ptr, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyDtoH_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyDtoH(dstHost, srcDevice, size_t ByteCount):
-    """ Copies memory from Device to Host.
-
-    Copies from device to host memory. `dstHost` and `srcDevice` specify
-    the base pointers of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstHost : Any
-        Destination host pointer
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyFromSymbol`
-    """
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cydstHost = utils.HelperInputVoidPtr(dstHost)
-    cdef void* cydstHost_ptr = <void*><void_ptr>cydstHost.cptr
-    err = cydriver.cuMemcpyDtoH(cydstHost_ptr, cysrcDevice, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyDtoD_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyDtoD(dstDevice, srcDevice, size_t ByteCount):
-    """ Copies memory from Device to Device.
-
-    Copies from device memory to device memory. `dstDevice` and `srcDevice`
-    are the base pointers of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`
-    """
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemcpyDtoD(cydstDevice, cysrcDevice, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyDtoA_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyDtoA(dstArray, size_t dstOffset, srcDevice, size_t ByteCount):
-    """ Copies memory from Device to Array.
-
-    Copies from device memory to a 1D CUDA array. `dstArray` and
-    `dstOffset` specify the CUDA array handle and starting index of the
-    destination data. `srcDevice` specifies the base pointer of the source.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstArray : :py:obj:`~.CUarray`
-        Destination array
-    dstOffset : size_t
-        Offset in bytes of destination array
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyToArray`
-    """
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cdef cydriver.CUarray cydstArray
-    if dstArray is None:
-        cydstArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(dstArray, (CUarray,)):
-        pdstArray = int(dstArray)
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    else:
-        pdstArray = int(CUarray(dstArray))
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    err = cydriver.cuMemcpyDtoA(cydstArray, dstOffset, cysrcDevice, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyAtoD_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyAtoD(dstDevice, srcArray, size_t srcOffset, size_t ByteCount):
-    """ Copies memory from Array to Device.
-
-    Copies from one 1D CUDA array to device memory. `dstDevice` specifies
-    the base pointer of the destination and must be naturally aligned with
-    the CUDA array elements. `srcArray` and `srcOffset` specify the CUDA
-    array handle and the offset in bytes into the array where the copy is
-    to begin. `ByteCount` specifies the number of bytes to copy and must be
-    evenly divisible by the array element size.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    srcArray : :py:obj:`~.CUarray`
-        Source array
-    srcOffset : size_t
-        Offset in bytes of source array
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyFromArray`
-    """
-    cdef cydriver.CUarray cysrcArray
-    if srcArray is None:
-        cysrcArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(srcArray, (CUarray,)):
-        psrcArray = int(srcArray)
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    else:
-        psrcArray = int(CUarray(srcArray))
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemcpyAtoD(cydstDevice, cysrcArray, srcOffset, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyHtoA_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyHtoA(dstArray, size_t dstOffset, srcHost, size_t ByteCount):
-    """ Copies memory from Host to Array.
-
-    Copies from host memory to a 1D CUDA array. `dstArray` and `dstOffset`
-    specify the CUDA array handle and starting offset in bytes of the
-    destination data. `pSrc` specifies the base address of the source.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstArray : :py:obj:`~.CUarray`
-        Destination array
-    dstOffset : size_t
-        Offset in bytes of destination array
-    srcHost : Any
-        Source host pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyToArray`
-    """
-    cdef cydriver.CUarray cydstArray
-    if dstArray is None:
-        cydstArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(dstArray, (CUarray,)):
-        pdstArray = int(dstArray)
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    else:
-        pdstArray = int(CUarray(dstArray))
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    cysrcHost = utils.HelperInputVoidPtr(srcHost)
-    cdef void* cysrcHost_ptr = <void*><void_ptr>cysrcHost.cptr
-    err = cydriver.cuMemcpyHtoA(cydstArray, dstOffset, cysrcHost_ptr, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyAtoH_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyAtoH(dstHost, srcArray, size_t srcOffset, size_t ByteCount):
-    """ Copies memory from Array to Host.
-
-    Copies from one 1D CUDA array to host memory. `dstHost` specifies the
-    base pointer of the destination. `srcArray` and `srcOffset` specify the
-    CUDA array handle and starting offset in bytes of the source data.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstHost : Any
-        Destination device pointer
-    srcArray : :py:obj:`~.CUarray`
-        Source array
-    srcOffset : size_t
-        Offset in bytes of source array
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyFromArray`
-    """
-    cdef cydriver.CUarray cysrcArray
-    if srcArray is None:
-        cysrcArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(srcArray, (CUarray,)):
-        psrcArray = int(srcArray)
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    else:
-        psrcArray = int(CUarray(srcArray))
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    cydstHost = utils.HelperInputVoidPtr(dstHost)
-    cdef void* cydstHost_ptr = <void*><void_ptr>cydstHost.cptr
-    err = cydriver.cuMemcpyAtoH(cydstHost_ptr, cysrcArray, srcOffset, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyAtoA_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyAtoA(dstArray, size_t dstOffset, srcArray, size_t srcOffset, size_t ByteCount):
-    """ Copies memory from Array to Array.
-
-    Copies from one 1D CUDA array to another. `dstArray` and `srcArray`
-    specify the handles of the destination and source CUDA arrays for the
-    copy, respectively. `dstOffset` and `srcOffset` specify the destination
-    and source offsets in bytes into the CUDA arrays. `ByteCount` is the
-    number of bytes to be copied. The size of the elements in the CUDA
-    arrays need not be the same format, but the elements must be the same
-    size; and count must be evenly divisible by that size.
-
-    Parameters
-    ----------
-    dstArray : :py:obj:`~.CUarray`
-        Destination array
-    dstOffset : size_t
-        Offset in bytes of destination array
-    srcArray : :py:obj:`~.CUarray`
-        Source array
-    srcOffset : size_t
-        Offset in bytes of source array
-    ByteCount : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyArrayToArray`
-    """
-    cdef cydriver.CUarray cysrcArray
-    if srcArray is None:
-        cysrcArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(srcArray, (CUarray,)):
-        psrcArray = int(srcArray)
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    else:
-        psrcArray = int(CUarray(srcArray))
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    cdef cydriver.CUarray cydstArray
-    if dstArray is None:
-        cydstArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(dstArray, (CUarray,)):
-        pdstArray = int(dstArray)
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    else:
-        pdstArray = int(CUarray(dstArray))
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    err = cydriver.cuMemcpyAtoA(cydstArray, dstOffset, cysrcArray, srcOffset, ByteCount)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy2D_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy2D(pCopy : Optional[CUDA_MEMCPY2D]):
-    """ Copies memory for 2D arrays.
-
-    Perform a 2D memory copy according to the parameters specified in
-    `pCopy`. The :py:obj:`~.CUDA_MEMCPY2D` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.srcMemoryType` and :py:obj:`~.dstMemoryType` specify the
-      type of memory of the source and destination, respectively;
-      :py:obj:`~.CUmemorytype_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.srcArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.srcHost` and :py:obj:`~.srcPitch` specify the (host) base
-    address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (device)
-    base address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.srcArray` specifies the handle of the source data.
-    :py:obj:`~.srcHost`, :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` are
-    ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.dstHost` and :py:obj:`~.dstPitch` specify the (host) base
-    address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.dstArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (device)
-    base address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.dstArray` specifies the handle of the destination data.
-    :py:obj:`~.dstHost`, :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` are
-    ignored.
-
-    - :py:obj:`~.srcXInBytes` and :py:obj:`~.srcY` specify the base address
-      of the source data for the copy.
-
-    For host pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.srcXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.dstXInBytes` and :py:obj:`~.dstY` specify the base address
-      of the destination data for the copy.
-
-    For host pointers, the base address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.dstXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.WidthInBytes` and :py:obj:`~.Height` specify the width (in
-      bytes) and height of the 2D copy being performed.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    :py:obj:`~.cuMemcpy2D()` returns an error if any pitch is greater than
-    the maximum allowed (:py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`).
-    :py:obj:`~.cuMemAllocPitch()` passes back pitches that always work with
-    :py:obj:`~.cuMemcpy2D()`. On intra-device memory copies (device to
-    device, CUDA array to device, CUDA array to CUDA array),
-    :py:obj:`~.cuMemcpy2D()` may fail for pitches not computed by
-    :py:obj:`~.cuMemAllocPitch()`. :py:obj:`~.cuMemcpy2DUnaligned()` does
-    not have this restriction, but may run significantly slower in the
-    cases where :py:obj:`~.cuMemcpy2D()` would have returned an error code.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY2D`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`
-    """
-    cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy2D(cypCopy_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy2DUnaligned_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy2DUnaligned(pCopy : Optional[CUDA_MEMCPY2D]):
-    """ Copies memory for 2D arrays.
-
-    Perform a 2D memory copy according to the parameters specified in
-    `pCopy`. The :py:obj:`~.CUDA_MEMCPY2D` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.srcMemoryType` and :py:obj:`~.dstMemoryType` specify the
-      type of memory of the source and destination, respectively;
-      :py:obj:`~.CUmemorytype_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.srcArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.srcHost` and :py:obj:`~.srcPitch` specify the (host) base
-    address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (device)
-    base address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.srcArray` specifies the handle of the source data.
-    :py:obj:`~.srcHost`, :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` are
-    ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.dstArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.dstHost` and :py:obj:`~.dstPitch` specify the (host) base
-    address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (device)
-    base address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.dstArray` specifies the handle of the destination data.
-    :py:obj:`~.dstHost`, :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` are
-    ignored.
-
-    - :py:obj:`~.srcXInBytes` and :py:obj:`~.srcY` specify the base address
-      of the source data for the copy.
-
-    For host pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.srcXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.dstXInBytes` and :py:obj:`~.dstY` specify the base address
-      of the destination data for the copy.
-
-    For host pointers, the base address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.dstXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.WidthInBytes` and :py:obj:`~.Height` specify the width (in
-      bytes) and height of the 2D copy being performed.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    :py:obj:`~.cuMemcpy2D()` returns an error if any pitch is greater than
-    the maximum allowed (:py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`).
-    :py:obj:`~.cuMemAllocPitch()` passes back pitches that always work with
-    :py:obj:`~.cuMemcpy2D()`. On intra-device memory copies (device to
-    device, CUDA array to device, CUDA array to CUDA array),
-    :py:obj:`~.cuMemcpy2D()` may fail for pitches not computed by
-    :py:obj:`~.cuMemAllocPitch()`. :py:obj:`~.cuMemcpy2DUnaligned()` does
-    not have this restriction, but may run significantly slower in the
-    cases where :py:obj:`~.cuMemcpy2D()` would have returned an error code.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY2D`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`
-    """
-    cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy2DUnaligned(cypCopy_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy3D_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy3D(pCopy : Optional[CUDA_MEMCPY3D]):
-    """ Copies memory for 3D arrays.
-
-    Perform a 3D memory copy according to the parameters specified in
-    `pCopy`. The :py:obj:`~.CUDA_MEMCPY3D` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.srcMemoryType` and :py:obj:`~.dstMemoryType` specify the
-      type of memory of the source and destination, respectively;
-      :py:obj:`~.CUmemorytype_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.srcArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.srcHost`, :py:obj:`~.srcPitch` and :py:obj:`~.srcHeight`
-    specify the (host) base address of the source data, the bytes per row,
-    and the height of each 2D slice of the 3D array. :py:obj:`~.srcArray`
-    is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.srcDevice`, :py:obj:`~.srcPitch` and :py:obj:`~.srcHeight`
-    specify the (device) base address of the source data, the bytes per
-    row, and the height of each 2D slice of the 3D array.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.srcArray` specifies the handle of the source data.
-    :py:obj:`~.srcHost`, :py:obj:`~.srcDevice`, :py:obj:`~.srcPitch` and
-    :py:obj:`~.srcHeight` are ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.dstArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.dstHost` and :py:obj:`~.dstPitch` specify the (host) base
-    address of the destination data, the bytes per row, and the height of
-    each 2D slice of the 3D array. :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (device)
-    base address of the destination data, the bytes per row, and the height
-    of each 2D slice of the 3D array. :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.dstArray` specifies the handle of the destination data.
-    :py:obj:`~.dstHost`, :py:obj:`~.dstDevice`, :py:obj:`~.dstPitch` and
-    :py:obj:`~.dstHeight` are ignored.
-
-    - :py:obj:`~.srcXInBytes`, :py:obj:`~.srcY` and :py:obj:`~.srcZ`
-      specify the base address of the source data for the copy.
-
-    For host pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.srcXInBytes` must be evenly divisible by
-    the array element size.
-
-    - dstXInBytes, :py:obj:`~.dstY` and :py:obj:`~.dstZ` specify the base
-      address of the destination data for the copy.
-
-    For host pointers, the base address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.dstXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.WidthInBytes`, :py:obj:`~.Height` and :py:obj:`~.Depth`
-      specify the width (in bytes), height and depth of the 3D copy being
-      performed.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    - If specified, :py:obj:`~.srcHeight` must be greater than or equal to
-      :py:obj:`~.Height` + :py:obj:`~.srcY`, and :py:obj:`~.dstHeight` must
-      be greater than or equal to :py:obj:`~.Height` + :py:obj:`~.dstY`.
-
-    :py:obj:`~.cuMemcpy3D()` returns an error if any pitch is greater than
-    the maximum allowed (:py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`).
-
-    The :py:obj:`~.srcLOD` and :py:obj:`~.dstLOD` members of the
-    :py:obj:`~.CUDA_MEMCPY3D` structure must be set to 0.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY3D`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy3D`
-    """
-    cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy3D(cypCopy_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy3DPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy3DPeer(pCopy : Optional[CUDA_MEMCPY3D_PEER]):
-    """ Copies memory between contexts.
-
-    Perform a 3D memory copy according to the parameters specified in
-    `pCopy`. See the definition of the :py:obj:`~.CUDA_MEMCPY3D_PEER`
-    structure for documentation of its parameters.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY3D_PEER`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpy3DPeer`
-    """
-    cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy3DPeer(cypCopy_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyAsync(dst, src, size_t ByteCount, hStream):
-    """ Copies memory asynchronously.
-
-    Copies data between two pointers. `dst` and `src` are base pointers of
-    the destination and source, respectively. `ByteCount` specifies the
-    number of bytes to copy. Note that this function infers the type of the
-    transfer (host to host, host to device, device to device, or device to
-    host) from the pointer values. This function is only allowed in
-    contexts which support unified addressing.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUdeviceptr`
-        Destination unified virtual address space pointer
-    src : :py:obj:`~.CUdeviceptr`
-        Source unified virtual address space pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cysrc
-    if src is None:
-        cysrc = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(src, (CUdeviceptr,)):
-        psrc = int(src)
-        cysrc = <cydriver.CUdeviceptr><void_ptr>psrc
-    else:
-        psrc = int(CUdeviceptr(src))
-        cysrc = <cydriver.CUdeviceptr><void_ptr>psrc
-    cdef cydriver.CUdeviceptr cydst
-    if dst is None:
-        cydst = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dst, (CUdeviceptr,)):
-        pdst = int(dst)
-        cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-    else:
-        pdst = int(CUdeviceptr(dst))
-        cydst = <cydriver.CUdeviceptr><void_ptr>pdst
-    err = cydriver.cuMemcpyAsync(cydst, cysrc, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyPeerAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, size_t ByteCount, hStream):
-    """ Copies device memory between two contexts asynchronously.
-
-    Copies from device memory in one context to device memory in another
-    context. `dstDevice` is the base device pointer of the destination
-    memory and `dstContext` is the destination context. `srcDevice` is the
-    base device pointer of the source memory and `srcContext` is the source
-    pointer. `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstContext : :py:obj:`~.CUcontext`
-        Destination context
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    srcContext : :py:obj:`~.CUcontext`
-        Source context
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpy3DPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpyPeerAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUcontext cysrcContext
-    if srcContext is None:
-        cysrcContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(srcContext, (CUcontext,)):
-        psrcContext = int(srcContext)
-        cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-    else:
-        psrcContext = int(CUcontext(srcContext))
-        cysrcContext = <cydriver.CUcontext><void_ptr>psrcContext
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cdef cydriver.CUcontext cydstContext
-    if dstContext is None:
-        cydstContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(dstContext, (CUcontext,)):
-        pdstContext = int(dstContext)
-        cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-    else:
-        pdstContext = int(CUcontext(dstContext))
-        cydstContext = <cydriver.CUcontext><void_ptr>pdstContext
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemcpyPeerAsync(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyHtoDAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyHtoDAsync(dstDevice, srcHost, size_t ByteCount, hStream):
-    """ Copies memory from Host to Device.
-
-    Copies from host memory to device memory. `dstDevice` and `srcHost` are
-    the base addresses of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    srcHost : Any
-        Source host pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    cysrcHost = utils.HelperInputVoidPtr(srcHost)
-    cdef void* cysrcHost_ptr = <void*><void_ptr>cysrcHost.cptr
-    err = cydriver.cuMemcpyHtoDAsync(cydstDevice, cysrcHost_ptr, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyDtoHAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyDtoHAsync(dstHost, srcDevice, size_t ByteCount, hStream):
-    """ Copies memory from Device to Host.
-
-    Copies from device to host memory. `dstHost` and `srcDevice` specify
-    the base pointers of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstHost : Any
-        Destination host pointer
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cydstHost = utils.HelperInputVoidPtr(dstHost)
-    cdef void* cydstHost_ptr = <void*><void_ptr>cydstHost.cptr
-    err = cydriver.cuMemcpyDtoHAsync(cydstHost_ptr, cysrcDevice, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyDtoDAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyDtoDAsync(dstDevice, srcDevice, size_t ByteCount, hStream):
-    """ Copies memory from Device to Device.
-
-    Copies from device memory to device memory. `dstDevice` and `srcDevice`
-    are the base pointers of the destination and source, respectively.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    srcDevice : :py:obj:`~.CUdeviceptr`
-        Source device pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(srcDevice, (CUdeviceptr,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    else:
-        psrcDevice = int(CUdeviceptr(srcDevice))
-        cysrcDevice = <cydriver.CUdeviceptr><void_ptr>psrcDevice
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemcpyDtoDAsync(cydstDevice, cysrcDevice, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyHtoAAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyHtoAAsync(dstArray, size_t dstOffset, srcHost, size_t ByteCount, hStream):
-    """ Copies memory from Host to Array.
-
-    Copies from host memory to a 1D CUDA array. `dstArray` and `dstOffset`
-    specify the CUDA array handle and starting offset in bytes of the
-    destination data. `srcHost` specifies the base address of the source.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstArray : :py:obj:`~.CUarray`
-        Destination array
-    dstOffset : size_t
-        Offset in bytes of destination array
-    srcHost : Any
-        Source host pointer
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyToArrayAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUarray cydstArray
-    if dstArray is None:
-        cydstArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(dstArray, (CUarray,)):
-        pdstArray = int(dstArray)
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    else:
-        pdstArray = int(CUarray(dstArray))
-        cydstArray = <cydriver.CUarray><void_ptr>pdstArray
-    cysrcHost = utils.HelperInputVoidPtr(srcHost)
-    cdef void* cysrcHost_ptr = <void*><void_ptr>cysrcHost.cptr
-    err = cydriver.cuMemcpyHtoAAsync(cydstArray, dstOffset, cysrcHost_ptr, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpyAtoHAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpyAtoHAsync(dstHost, srcArray, size_t srcOffset, size_t ByteCount, hStream):
-    """ Copies memory from Array to Host.
-
-    Copies from one 1D CUDA array to host memory. `dstHost` specifies the
-    base pointer of the destination. `srcArray` and `srcOffset` specify the
-    CUDA array handle and starting offset in bytes of the source data.
-    `ByteCount` specifies the number of bytes to copy.
-
-    Parameters
-    ----------
-    dstHost : Any
-        Destination pointer
-    srcArray : :py:obj:`~.CUarray`
-        Source array
-    srcOffset : size_t
-        Offset in bytes of source array
-    ByteCount : size_t
-        Size of memory copy in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyFromArrayAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUarray cysrcArray
-    if srcArray is None:
-        cysrcArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(srcArray, (CUarray,)):
-        psrcArray = int(srcArray)
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    else:
-        psrcArray = int(CUarray(srcArray))
-        cysrcArray = <cydriver.CUarray><void_ptr>psrcArray
-    cydstHost = utils.HelperInputVoidPtr(dstHost)
-    cdef void* cydstHost_ptr = <void*><void_ptr>cydstHost.cptr
-    err = cydriver.cuMemcpyAtoHAsync(cydstHost_ptr, cysrcArray, srcOffset, ByteCount, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy2DAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy2DAsync(pCopy : Optional[CUDA_MEMCPY2D], hStream):
-    """ Copies memory for 2D arrays.
-
-    Perform a 2D memory copy according to the parameters specified in
-    `pCopy`. The :py:obj:`~.CUDA_MEMCPY2D` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.srcMemoryType` and :py:obj:`~.dstMemoryType` specify the
-      type of memory of the source and destination, respectively;
-      :py:obj:`~.CUmemorytype_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.srcHost` and :py:obj:`~.srcPitch` specify the (host) base
-    address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.srcArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (device)
-    base address of the source data and the bytes per row to apply.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.srcArray` specifies the handle of the source data.
-    :py:obj:`~.srcHost`, :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` are
-    ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.dstArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.dstHost` and :py:obj:`~.dstPitch` specify the (host) base
-    address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (device)
-    base address of the destination data and the bytes per row to apply.
-    :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.dstArray` specifies the handle of the destination data.
-    :py:obj:`~.dstHost`, :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` are
-    ignored.
-
-    - :py:obj:`~.srcXInBytes` and :py:obj:`~.srcY` specify the base address
-      of the source data for the copy.
-
-    For host pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.srcXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.dstXInBytes` and :py:obj:`~.dstY` specify the base address
-      of the destination data for the copy.
-
-    For host pointers, the base address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.dstXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.WidthInBytes` and :py:obj:`~.Height` specify the width (in
-      bytes) and height of the 2D copy being performed.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    - If specified, :py:obj:`~.srcHeight` must be greater than or equal to
-      :py:obj:`~.Height` + :py:obj:`~.srcY`, and :py:obj:`~.dstHeight` must
-      be greater than or equal to :py:obj:`~.Height` + :py:obj:`~.dstY`.
-
-    :py:obj:`~.cuMemcpy2DAsync()` returns an error if any pitch is greater
-    than the maximum allowed (:py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`).
-    :py:obj:`~.cuMemAllocPitch()` passes back pitches that always work with
-    :py:obj:`~.cuMemcpy2D()`. On intra-device memory copies (device to
-    device, CUDA array to device, CUDA array to CUDA array),
-    :py:obj:`~.cuMemcpy2DAsync()` may fail for pitches not computed by
-    :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY2D`
-        Parameters for the memory copy
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy2DAsync(cypCopy_ptr, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy3DAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy3DAsync(pCopy : Optional[CUDA_MEMCPY3D], hStream):
-    """ Copies memory for 3D arrays.
-
-    Perform a 3D memory copy according to the parameters specified in
-    `pCopy`. The :py:obj:`~.CUDA_MEMCPY3D` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.srcMemoryType` and :py:obj:`~.dstMemoryType` specify the
-      type of memory of the source and destination, respectively;
-      :py:obj:`~.CUmemorytype_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.srcDevice` and :py:obj:`~.srcPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.srcArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.srcHost`, :py:obj:`~.srcPitch` and :py:obj:`~.srcHeight`
-    specify the (host) base address of the source data, the bytes per row,
-    and the height of each 2D slice of the 3D array. :py:obj:`~.srcArray`
-    is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.srcDevice`, :py:obj:`~.srcPitch` and :py:obj:`~.srcHeight`
-    specify the (device) base address of the source data, the bytes per
-    row, and the height of each 2D slice of the 3D array.
-    :py:obj:`~.srcArray` is ignored.
-
-    If :py:obj:`~.srcMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.srcArray` specifies the handle of the source data.
-    :py:obj:`~.srcHost`, :py:obj:`~.srcDevice`, :py:obj:`~.srcPitch` and
-    :py:obj:`~.srcHeight` are ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_UNIFIED`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (unified
-    virtual address space) base address of the source data and the bytes
-    per row to apply. :py:obj:`~.dstArray` is ignored. This value may be
-    used only if unified addressing is supported in the calling context.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_HOST`,
-    :py:obj:`~.dstHost` and :py:obj:`~.dstPitch` specify the (host) base
-    address of the destination data, the bytes per row, and the height of
-    each 2D slice of the 3D array. :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_DEVICE`,
-    :py:obj:`~.dstDevice` and :py:obj:`~.dstPitch` specify the (device)
-    base address of the destination data, the bytes per row, and the height
-    of each 2D slice of the 3D array. :py:obj:`~.dstArray` is ignored.
-
-    If :py:obj:`~.dstMemoryType` is :py:obj:`~.CU_MEMORYTYPE_ARRAY`,
-    :py:obj:`~.dstArray` specifies the handle of the destination data.
-    :py:obj:`~.dstHost`, :py:obj:`~.dstDevice`, :py:obj:`~.dstPitch` and
-    :py:obj:`~.dstHeight` are ignored.
-
-    - :py:obj:`~.srcXInBytes`, :py:obj:`~.srcY` and :py:obj:`~.srcZ`
-      specify the base address of the source data for the copy.
-
-    For host pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.srcXInBytes` must be evenly divisible by
-    the array element size.
-
-    - dstXInBytes, :py:obj:`~.dstY` and :py:obj:`~.dstZ` specify the base
-      address of the destination data for the copy.
-
-    For host pointers, the base address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For device pointers, the starting address is
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For CUDA arrays, :py:obj:`~.dstXInBytes` must be evenly divisible by
-    the array element size.
-
-    - :py:obj:`~.WidthInBytes`, :py:obj:`~.Height` and :py:obj:`~.Depth`
-      specify the width (in bytes), height and depth of the 3D copy being
-      performed.
-
-    - If specified, :py:obj:`~.srcPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + :py:obj:`~.srcXInBytes`, and
-      :py:obj:`~.dstPitch` must be greater than or equal to
-      :py:obj:`~.WidthInBytes` + dstXInBytes.
-
-    - If specified, :py:obj:`~.srcHeight` must be greater than or equal to
-      :py:obj:`~.Height` + :py:obj:`~.srcY`, and :py:obj:`~.dstHeight` must
-      be greater than or equal to :py:obj:`~.Height` + :py:obj:`~.dstY`.
-
-    :py:obj:`~.cuMemcpy3DAsync()` returns an error if any pitch is greater
-    than the maximum allowed (:py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_PITCH`).
-
-    The :py:obj:`~.srcLOD` and :py:obj:`~.dstLOD` members of the
-    :py:obj:`~.CUDA_MEMCPY3D` structure must be set to 0.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY3D`
-        Parameters for the memory copy
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpy3DAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy3DAsync(cypCopy_ptr, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemcpy3DPeerAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemcpy3DPeerAsync(pCopy : Optional[CUDA_MEMCPY3D_PEER], hStream):
-    """ Copies memory between contexts asynchronously.
-
-    Perform a 3D memory copy according to the parameters specified in
-    `pCopy`. See the definition of the :py:obj:`~.CUDA_MEMCPY3D_PEER`
-    structure for documentation of its parameters.
-
-    Parameters
-    ----------
-    pCopy : :py:obj:`~.CUDA_MEMCPY3D_PEER`
-        Parameters for the memory copy
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._ptr if pCopy != None else NULL
-    err = cydriver.cuMemcpy3DPeerAsync(cypCopy_ptr, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD8_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD8(dstDevice, unsigned char uc, size_t N):
-    """ Initializes device memory.
-
-    Sets the memory range of `N` 8-bit values to the specified value `uc`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    uc : unsigned char
-        Value to set
-    N : size_t
-        Number of elements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD8(cydstDevice, uc, N)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD16_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD16(dstDevice, unsigned short us, size_t N):
-    """ Initializes device memory.
-
-    Sets the memory range of `N` 16-bit values to the specified value `us`.
-    The `dstDevice` pointer must be two byte aligned.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    us : unsigned short
-        Value to set
-    N : size_t
-        Number of elements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD16(cydstDevice, us, N)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD32_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD32(dstDevice, unsigned int ui, size_t N):
-    """ Initializes device memory.
-
-    Sets the memory range of `N` 32-bit values to the specified value `ui`.
-    The `dstDevice` pointer must be four byte aligned.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    ui : unsigned int
-        Value to set
-    N : size_t
-        Number of elements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD32(cydstDevice, ui, N)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D8_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D8(dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height):
-    """ Initializes device memory.
-
-    Sets the 2D memory range of `Width` 8-bit values to the specified value
-    `uc`. `Height` specifies the number of rows to set, and `dstPitch`
-    specifies the number of bytes between each row. This function performs
-    fastest when the pitch is one that has been passed back by
-    :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    uc : unsigned char
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2D`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D8(cydstDevice, dstPitch, uc, Width, Height)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D16_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D16(dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height):
-    """ Initializes device memory.
-
-    Sets the 2D memory range of `Width` 16-bit values to the specified
-    value `us`. `Height` specifies the number of rows to set, and
-    `dstPitch` specifies the number of bytes between each row. The
-    `dstDevice` pointer and `dstPitch` offset must be two byte aligned.
-    This function performs fastest when the pitch is one that has been
-    passed back by :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    us : unsigned short
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2D`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D16(cydstDevice, dstPitch, us, Width, Height)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D32_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D32(dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height):
-    """ Initializes device memory.
-
-    Sets the 2D memory range of `Width` 32-bit values to the specified
-    value `ui`. `Height` specifies the number of rows to set, and
-    `dstPitch` specifies the number of bytes between each row. The
-    `dstDevice` pointer and `dstPitch` offset must be four byte aligned.
-    This function performs fastest when the pitch is one that has been
-    passed back by :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    ui : unsigned int
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2D`
-    """
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D32(cydstDevice, dstPitch, ui, Width, Height)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD8Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD8Async(dstDevice, unsigned char uc, size_t N, hStream):
-    """ Sets device memory.
-
-    Sets the memory range of `N` 8-bit values to the specified value `uc`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    uc : unsigned char
-        Value to set
-    N : size_t
-        Number of elements
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemsetAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD8Async(cydstDevice, uc, N, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD16Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD16Async(dstDevice, unsigned short us, size_t N, hStream):
-    """ Sets device memory.
-
-    Sets the memory range of `N` 16-bit values to the specified value `us`.
-    The `dstDevice` pointer must be two byte aligned.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    us : unsigned short
-        Value to set
-    N : size_t
-        Number of elements
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemsetAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD16Async(cydstDevice, us, N, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD32Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD32Async(dstDevice, unsigned int ui, size_t N, hStream):
-    """ Sets device memory.
-
-    Sets the memory range of `N` 32-bit values to the specified value `ui`.
-    The `dstDevice` pointer must be four byte aligned.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    ui : unsigned int
-        Value to set
-    N : size_t
-        Number of elements
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemsetAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD32Async(cydstDevice, ui, N, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D8Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D8Async(dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, hStream):
-    """ Sets device memory.
-
-    Sets the 2D memory range of `Width` 8-bit values to the specified value
-    `uc`. `Height` specifies the number of rows to set, and `dstPitch`
-    specifies the number of bytes between each row. This function performs
-    fastest when the pitch is one that has been passed back by
-    :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    uc : unsigned char
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D8Async(cydstDevice, dstPitch, uc, Width, Height, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D16Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D16Async(dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, hStream):
-    """ Sets device memory.
-
-    Sets the 2D memory range of `Width` 16-bit values to the specified
-    value `us`. `Height` specifies the number of rows to set, and
-    `dstPitch` specifies the number of bytes between each row. The
-    `dstDevice` pointer and `dstPitch` offset must be two byte aligned.
-    This function performs fastest when the pitch is one that has been
-    passed back by :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    us : unsigned short
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D16Async(cydstDevice, dstPitch, us, Width, Height, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemsetD2D32Async' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemsetD2D32Async(dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, hStream):
-    """ Sets device memory.
-
-    Sets the 2D memory range of `Width` 32-bit values to the specified
-    value `ui`. `Height` specifies the number of rows to set, and
-    `dstPitch` specifies the number of bytes between each row. The
-    `dstDevice` pointer and `dstPitch` offset must be four byte aligned.
-    This function performs fastest when the pitch is one that has been
-    passed back by :py:obj:`~.cuMemAllocPitch()`.
-
-    Parameters
-    ----------
-    dstDevice : :py:obj:`~.CUdeviceptr`
-        Destination device pointer
-    dstPitch : size_t
-        Pitch of destination device pointer(Unused if `Height` is 1)
-    ui : unsigned int
-        Value to set
-    Width : size_t
-        Width of row
-    Height : size_t
-        Number of rows
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dstDevice, (CUdeviceptr,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    else:
-        pdstDevice = int(CUdeviceptr(dstDevice))
-        cydstDevice = <cydriver.CUdeviceptr><void_ptr>pdstDevice
-    err = cydriver.cuMemsetD2D32Async(cydstDevice, dstPitch, ui, Width, Height, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuArrayCreate_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayCreate(pAllocateArray : Optional[CUDA_ARRAY_DESCRIPTOR]):
-    """ Creates a 1D or 2D CUDA array.
-
-    Creates a CUDA array according to the :py:obj:`~.CUDA_ARRAY_DESCRIPTOR`
-    structure `pAllocateArray` and returns a handle to the new CUDA array
-    in `*pHandle`. The :py:obj:`~.CUDA_ARRAY_DESCRIPTOR` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - `Width`, and `Height` are the width, and height of the CUDA array (in
-      elements); the CUDA array is one-dimensional if height is 0, two-
-      dimensional otherwise;
-
-    - :py:obj:`~.Format` specifies the format of the elements;
-      :py:obj:`~.CUarray_format` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `NumChannels` specifies the number of packed components per CUDA
-      array element; it may be 1, 2, or 4;
-
-    Here are examples of CUDA array descriptions:
-
-    Description for a CUDA array of 2048 floats:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Description for a 64 x 64 CUDA array of floats:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Description for a `width` x `height` CUDA array of 64-bit, 4x16-bit
-    float16's:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Description for a `width` x `height` CUDA array of 16-bit elements,
-    each of which is two 8-bit unsigned chars:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Parameters
-    ----------
-    pAllocateArray : :py:obj:`~.CUDA_ARRAY_DESCRIPTOR`
-        Array descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pHandle : :py:obj:`~.CUarray`
-        Returned array
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocArray`
-    """
-    cdef CUarray pHandle = CUarray()
-    cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._ptr if pAllocateArray != None else NULL
-    err = cydriver.cuArrayCreate(<cydriver.CUarray*>pHandle._ptr, cypAllocateArray_ptr)
-    return (CUresult(err), pHandle)
-{{endif}}
-
-{{if 'cuArrayGetDescriptor_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayGetDescriptor(hArray):
-    """ Get a 1D or 2D CUDA array descriptor.
-
-    Returns in `*pArrayDescriptor` a descriptor containing information on
-    the format and dimensions of the CUDA array `hArray`. It is useful for
-    subroutines that have been passed a CUDA array, but need to know the
-    CUDA array parameters for validation or other purposes.
-
-    Parameters
-    ----------
-    hArray : :py:obj:`~.CUarray`
-        Array to get descriptor of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    pArrayDescriptor : :py:obj:`~.CUDA_ARRAY_DESCRIPTOR`
-        Returned array descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaArrayGetInfo`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    cdef CUDA_ARRAY_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY_DESCRIPTOR()
-    err = cydriver.cuArrayGetDescriptor(<cydriver.CUDA_ARRAY_DESCRIPTOR*>pArrayDescriptor._ptr, cyhArray)
-    return (CUresult(err), pArrayDescriptor)
-{{endif}}
-
-{{if 'cuArrayGetSparseProperties' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayGetSparseProperties(array):
-    """ Returns the layout properties of a sparse CUDA array.
-
-    Returns the layout properties of a sparse CUDA array in
-    `sparseProperties` If the CUDA array is not allocated with flag
-    :py:obj:`~.CUDA_ARRAY3D_SPARSE` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    will be returned.
-
-    If the returned value in :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.flags`
-    contains :py:obj:`~.CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`, then
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailSize` represents the
-    total size of the array. Otherwise, it will be zero. Also, the returned
-    value in :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailFirstLevel` is
-    always zero. Note that the `array` must have been allocated using
-    :py:obj:`~.cuArrayCreate` or :py:obj:`~.cuArray3DCreate`. For CUDA
-    arrays obtained using :py:obj:`~.cuMipmappedArrayGetLevel`,
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned. Instead,
-    :py:obj:`~.cuMipmappedArrayGetSparseProperties` must be used to obtain
-    the sparse properties of the entire CUDA mipmapped array to which
-    `array` belongs to.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.CUarray`
-        CUDA array to get the sparse properties of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    sparseProperties : :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES`
-        Pointer to :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES`
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cydriver.CUarray cyarray
-    if array is None:
-        cyarray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(array, (CUarray,)):
-        parray = int(array)
-        cyarray = <cydriver.CUarray><void_ptr>parray
-    else:
-        parray = int(CUarray(array))
-        cyarray = <cydriver.CUarray><void_ptr>parray
-    cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES()
-    err = cydriver.cuArrayGetSparseProperties(<cydriver.CUDA_ARRAY_SPARSE_PROPERTIES*>sparseProperties._ptr, cyarray)
-    return (CUresult(err), sparseProperties)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetSparseProperties' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMipmappedArrayGetSparseProperties(mipmap):
-    """ Returns the layout properties of a sparse CUDA mipmapped array.
-
-    Returns the sparse array layout properties in `sparseProperties` If the
-    CUDA mipmapped array is not allocated with flag
-    :py:obj:`~.CUDA_ARRAY3D_SPARSE` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    will be returned.
-
-    For non-layered CUDA mipmapped arrays,
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailSize` returns the size
-    of the mip tail region. The mip tail region includes all mip levels
-    whose width, height or depth is less than that of the tile. For layered
-    CUDA mipmapped arrays, if
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.flags` contains
-    :py:obj:`~.CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL`, then
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailSize` specifies the size
-    of the mip tail of all layers combined. Otherwise,
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailSize` specifies mip tail
-    size per layer. The returned value of
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailFirstLevel` is valid
-    only if :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.miptailSize` is non-
-    zero.
-
-    Parameters
-    ----------
-    mipmap : :py:obj:`~.CUmipmappedArray`
-        CUDA mipmapped array to get the sparse properties of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    sparseProperties : :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES`
-        Pointer to :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES`
-
-    See Also
-    --------
-    :py:obj:`~.cuArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cydriver.CUmipmappedArray cymipmap
-    if mipmap is None:
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>0
-    elif isinstance(mipmap, (CUmipmappedArray,)):
-        pmipmap = int(mipmap)
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-    else:
-        pmipmap = int(CUmipmappedArray(mipmap))
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-    cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES()
-    err = cydriver.cuMipmappedArrayGetSparseProperties(<cydriver.CUDA_ARRAY_SPARSE_PROPERTIES*>sparseProperties._ptr, cymipmap)
-    return (CUresult(err), sparseProperties)
-{{endif}}
-
-{{if 'cuArrayGetMemoryRequirements' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayGetMemoryRequirements(array, device):
-    """ Returns the memory requirements of a CUDA array.
-
-    Returns the memory requirements of a CUDA array in `memoryRequirements`
-    If the CUDA array is not allocated with flag
-    :py:obj:`~.CUDA_ARRAY3D_DEFERRED_MAPPING`
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned.
-
-    The returned value in :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS.size`
-    represents the total size of the CUDA array. The returned value in
-    :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS.alignment` represents the
-    alignment necessary for mapping the CUDA array.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.CUarray`
-        CUDA array to get the memory requirements of
-    device : :py:obj:`~.CUdevice`
-        Device to get the memory requirements for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    memoryRequirements : :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS`
-        Pointer to :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS`
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayGetMemoryRequirements`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef cydriver.CUarray cyarray
-    if array is None:
-        cyarray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(array, (CUarray,)):
-        parray = int(array)
-        cyarray = <cydriver.CUarray><void_ptr>parray
-    else:
-        parray = int(CUarray(array))
-        cyarray = <cydriver.CUarray><void_ptr>parray
-    cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS()
-    err = cydriver.cuArrayGetMemoryRequirements(<cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS*>memoryRequirements._ptr, cyarray, cydevice)
-    return (CUresult(err), memoryRequirements)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMipmappedArrayGetMemoryRequirements(mipmap, device):
-    """ Returns the memory requirements of a CUDA mipmapped array.
-
-    Returns the memory requirements of a CUDA mipmapped array in
-    `memoryRequirements` If the CUDA mipmapped array is not allocated with
-    flag :py:obj:`~.CUDA_ARRAY3D_DEFERRED_MAPPING`
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned.
-
-    The returned value in :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS.size`
-    represents the total size of the CUDA mipmapped array. The returned
-    value in :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS.alignment`
-    represents the alignment necessary for mapping the CUDA mipmapped
-    array.
-
-    Parameters
-    ----------
-    mipmap : :py:obj:`~.CUmipmappedArray`
-        CUDA mipmapped array to get the memory requirements of
-    device : :py:obj:`~.CUdevice`
-        Device to get the memory requirements for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    memoryRequirements : :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS`
-        Pointer to :py:obj:`~.CUDA_ARRAY_MEMORY_REQUIREMENTS`
-
-    See Also
-    --------
-    :py:obj:`~.cuArrayGetMemoryRequirements`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef cydriver.CUmipmappedArray cymipmap
-    if mipmap is None:
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>0
-    elif isinstance(mipmap, (CUmipmappedArray,)):
-        pmipmap = int(mipmap)
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-    else:
-        pmipmap = int(CUmipmappedArray(mipmap))
-        cymipmap = <cydriver.CUmipmappedArray><void_ptr>pmipmap
-    cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS()
-    err = cydriver.cuMipmappedArrayGetMemoryRequirements(<cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS*>memoryRequirements._ptr, cymipmap, cydevice)
-    return (CUresult(err), memoryRequirements)
-{{endif}}
-
-{{if 'cuArrayGetPlane' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayGetPlane(hArray, unsigned int planeIdx):
-    """ Gets a CUDA array plane from a CUDA array.
-
-    Returns in `pPlaneArray` a CUDA array that represents a single format
-    plane of the CUDA array `hArray`.
-
-    If `planeIdx` is greater than the maximum number of planes in this
-    array or if the array does not have a multi-planar format e.g:
-    :py:obj:`~.CU_AD_FORMAT_NV12`, then
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    Note that if the `hArray` has format :py:obj:`~.CU_AD_FORMAT_NV12`,
-    then passing in 0 for `planeIdx` returns a CUDA array of the same size
-    as `hArray` but with one channel and
-    :py:obj:`~.CU_AD_FORMAT_UNSIGNED_INT8` as its format. If 1 is passed
-    for `planeIdx`, then the returned CUDA array has half the height and
-    width of `hArray` with two channels and
-    :py:obj:`~.CU_AD_FORMAT_UNSIGNED_INT8` as its format.
-
-    Parameters
-    ----------
-    hArray : :py:obj:`~.CUarray`
-        Multiplanar CUDA array
-    planeIdx : unsigned int
-        Plane index
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    pPlaneArray : :py:obj:`~.CUarray`
-        Returned CUDA array referenced by the `planeIdx`
-
-    See Also
-    --------
-    :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaArrayGetPlane`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    cdef CUarray pPlaneArray = CUarray()
-    err = cydriver.cuArrayGetPlane(<cydriver.CUarray*>pPlaneArray._ptr, cyhArray, planeIdx)
-    return (CUresult(err), pPlaneArray)
-{{endif}}
-
-{{if 'cuArrayDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArrayDestroy(hArray):
-    """ Destroys a CUDA array.
-
-    Destroys the CUDA array `hArray`.
-
-    Parameters
-    ----------
-    hArray : :py:obj:`~.CUarray`
-        Array to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ARRAY_IS_MAPPED`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaFreeArray`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    err = cydriver.cuArrayDestroy(cyhArray)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuArray3DCreate_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArray3DCreate(pAllocateArray : Optional[CUDA_ARRAY3D_DESCRIPTOR]):
-    """ Creates a 3D CUDA array.
-
-    Creates a CUDA array according to the
-    :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR` structure `pAllocateArray` and
-    returns a handle to the new CUDA array in `*pHandle`. The
-    :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - `Width`, `Height`, and `Depth` are the width, height, and depth of
-      the CUDA array (in elements); the following types of CUDA arrays can
-      be allocated:
-
-      - A 1D array is allocated if `Height` and `Depth` extents are both
-        zero.
-
-      - A 2D array is allocated if only `Depth` extent is zero.
-
-      - A 3D array is allocated if all three extents are non-zero.
-
-      - A 1D layered CUDA array is allocated if only `Height` is zero and
-        the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is set. Each layer is a
-        1D array. The number of layers is determined by the depth extent.
-
-      - A 2D layered CUDA array is allocated if all three extents are non-
-        zero and the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is set. Each
-        layer is a 2D array. The number of layers is determined by the
-        depth extent.
-
-      - A cubemap CUDA array is allocated if all three extents are non-zero
-        and the :py:obj:`~.CUDA_ARRAY3D_CUBEMAP` flag is set. `Width` must
-        be equal to `Height`, and `Depth` must be six. A cubemap is a
-        special type of 2D layered CUDA array, where the six layers
-        represent the six faces of a cube. The order of the six layers in
-        memory is the same as that listed in
-        :py:obj:`~.CUarray_cubemap_face`.
-
-      - A cubemap layered CUDA array is allocated if all three extents are
-        non-zero, and both, :py:obj:`~.CUDA_ARRAY3D_CUBEMAP` and
-        :py:obj:`~.CUDA_ARRAY3D_LAYERED` flags are set. `Width` must be
-        equal to `Height`, and `Depth` must be a multiple of six. A cubemap
-        layered CUDA array is a special type of 2D layered CUDA array that
-        consists of a collection of cubemaps. The first six layers
-        represent the first cubemap, the next six layers form the second
-        cubemap, and so on.
-
-    - :py:obj:`~.Format` specifies the format of the elements;
-      :py:obj:`~.CUarray_format` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `NumChannels` specifies the number of packed components per CUDA
-      array element; it may be 1, 2, or 4;
-
-    - :py:obj:`~.Flags` may be set to
-
-      - :py:obj:`~.CUDA_ARRAY3D_LAYERED` to enable creation of layered CUDA
-        arrays. If this flag is set, `Depth` specifies the number of
-        layers, not the depth of a 3D array.
-
-      - :py:obj:`~.CUDA_ARRAY3D_SURFACE_LDST` to enable surface references
-        to be bound to the CUDA array. If this flag is not set,
-        :py:obj:`~.cuSurfRefSetArray` will fail when attempting to bind the
-        CUDA array to a surface reference.
-
-      - :py:obj:`~.CUDA_ARRAY3D_CUBEMAP` to enable creation of cubemaps. If
-        this flag is set, `Width` must be equal to `Height`, and `Depth`
-        must be six. If the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is also
-        set, then `Depth` must be a multiple of six.
-
-      - :py:obj:`~.CUDA_ARRAY3D_TEXTURE_GATHER` to indicate that the CUDA
-        array will be used for texture gather. Texture gather can only be
-        performed on 2D CUDA arrays.
-
-    `Width`, `Height` and `Depth` must meet certain size requirements as
-    listed in the following table. All values are specified in elements.
-    Note that for brevity's sake, the full name of the device attribute is
-    not specified. For ex., TEXTURE1D_WIDTH refers to the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH`.
-
-    Note that 2D CUDA arrays have different size requirements if the
-    :py:obj:`~.CUDA_ARRAY3D_TEXTURE_GATHER` flag is set. `Width` and
-    `Height` must not be greater than
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH` and
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT`
-    respectively, in that case.
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Here are examples of CUDA array descriptions:
-
-    Description for a CUDA array of 2048 floats:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Description for a 64 x 64 CUDA array of floats:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Description for a `width` x `height` x `depth` CUDA array of 64-bit,
-    4x16-bit float16's:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Parameters
-    ----------
-    pAllocateArray : :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR`
-        3D array descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pHandle : :py:obj:`~.CUarray`
-        Returned array
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMalloc3DArray`
-    """
-    cdef CUarray pHandle = CUarray()
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._ptr if pAllocateArray != None else NULL
-    err = cydriver.cuArray3DCreate(<cydriver.CUarray*>pHandle._ptr, cypAllocateArray_ptr)
-    return (CUresult(err), pHandle)
-{{endif}}
-
-{{if 'cuArray3DGetDescriptor_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuArray3DGetDescriptor(hArray):
-    """ Get a 3D CUDA array descriptor.
-
-    Returns in `*pArrayDescriptor` a descriptor containing information on
-    the format and dimensions of the CUDA array `hArray`. It is useful for
-    subroutines that have been passed a CUDA array, but need to know the
-    CUDA array parameters for validation or other purposes.
-
-    This function may be called on 1D and 2D arrays, in which case the
-    `Height` and/or `Depth` members of the descriptor struct will be set to
-    0.
-
-    Parameters
-    ----------
-    hArray : :py:obj:`~.CUarray`
-        3D array to get descriptor of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-    pArrayDescriptor : :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR`
-        Returned 3D array descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaArrayGetInfo`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    cdef CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY3D_DESCRIPTOR()
-    err = cydriver.cuArray3DGetDescriptor(<cydriver.CUDA_ARRAY3D_DESCRIPTOR*>pArrayDescriptor._ptr, cyhArray)
-    return (CUresult(err), pArrayDescriptor)
-{{endif}}
-
-{{if 'cuMipmappedArrayCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMipmappedArrayCreate(pMipmappedArrayDesc : Optional[CUDA_ARRAY3D_DESCRIPTOR], unsigned int numMipmapLevels):
-    """ Creates a CUDA mipmapped array.
-
-    Creates a CUDA mipmapped array according to the
-    :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR` structure `pMipmappedArrayDesc` and
-    returns a handle to the new CUDA mipmapped array in `*pHandle`.
-    `numMipmapLevels` specifies the number of mipmap levels to be
-    allocated. This value is clamped to the range [1, 1 +
-    floor(log2(max(width, height, depth)))].
-
-    The :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - `Width`, `Height`, and `Depth` are the width, height, and depth of
-      the CUDA array (in elements); the following types of CUDA arrays can
-      be allocated:
-
-      - A 1D mipmapped array is allocated if `Height` and `Depth` extents
-        are both zero.
-
-      - A 2D mipmapped array is allocated if only `Depth` extent is zero.
-
-      - A 3D mipmapped array is allocated if all three extents are non-
-        zero.
-
-      - A 1D layered CUDA mipmapped array is allocated if only `Height` is
-        zero and the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is set. Each
-        layer is a 1D array. The number of layers is determined by the
-        depth extent.
-
-      - A 2D layered CUDA mipmapped array is allocated if all three extents
-        are non-zero and the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is set.
-        Each layer is a 2D array. The number of layers is determined by the
-        depth extent.
-
-      - A cubemap CUDA mipmapped array is allocated if all three extents
-        are non-zero and the :py:obj:`~.CUDA_ARRAY3D_CUBEMAP` flag is set.
-        `Width` must be equal to `Height`, and `Depth` must be six. A
-        cubemap is a special type of 2D layered CUDA array, where the six
-        layers represent the six faces of a cube. The order of the six
-        layers in memory is the same as that listed in
-        :py:obj:`~.CUarray_cubemap_face`.
-
-      - A cubemap layered CUDA mipmapped array is allocated if all three
-        extents are non-zero, and both, :py:obj:`~.CUDA_ARRAY3D_CUBEMAP`
-        and :py:obj:`~.CUDA_ARRAY3D_LAYERED` flags are set. `Width` must be
-        equal to `Height`, and `Depth` must be a multiple of six. A cubemap
-        layered CUDA array is a special type of 2D layered CUDA array that
-        consists of a collection of cubemaps. The first six layers
-        represent the first cubemap, the next six layers form the second
-        cubemap, and so on.
-
-    - :py:obj:`~.Format` specifies the format of the elements;
-      :py:obj:`~.CUarray_format` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `NumChannels` specifies the number of packed components per CUDA
-      array element; it may be 1, 2, or 4;
-
-    - :py:obj:`~.Flags` may be set to
-
-      - :py:obj:`~.CUDA_ARRAY3D_LAYERED` to enable creation of layered CUDA
-        mipmapped arrays. If this flag is set, `Depth` specifies the number
-        of layers, not the depth of a 3D array.
-
-      - :py:obj:`~.CUDA_ARRAY3D_SURFACE_LDST` to enable surface references
-        to be bound to individual mipmap levels of the CUDA mipmapped
-        array. If this flag is not set, :py:obj:`~.cuSurfRefSetArray` will
-        fail when attempting to bind a mipmap level of the CUDA mipmapped
-        array to a surface reference.
-
-    - :py:obj:`~.CUDA_ARRAY3D_CUBEMAP` to enable creation of mipmapped
-    cubemaps. If this flag is set, `Width` must be equal to `Height`, and
-    `Depth` must be six. If the :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is
-    also set, then `Depth` must be a multiple of six.
-
-      - :py:obj:`~.CUDA_ARRAY3D_TEXTURE_GATHER` to indicate that the CUDA
-        mipmapped array will be used for texture gather. Texture gather can
-        only be performed on 2D CUDA mipmapped arrays.
-
-    `Width`, `Height` and `Depth` must meet certain size requirements as
-    listed in the following table. All values are specified in elements.
-    Note that for brevity's sake, the full name of the device attribute is
-    not specified. For ex., TEXTURE1D_MIPMAPPED_WIDTH refers to the device
-    attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH`.
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Parameters
-    ----------
-    pMipmappedArrayDesc : :py:obj:`~.CUDA_ARRAY3D_DESCRIPTOR`
-        mipmapped array descriptor
-    numMipmapLevels : unsigned int
-        Number of mipmap levels
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pHandle : :py:obj:`~.CUmipmappedArray`
-        Returned mipmapped array
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayDestroy`, :py:obj:`~.cuMipmappedArrayGetLevel`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaMallocMipmappedArray`
-    """
-    cdef CUmipmappedArray pHandle = CUmipmappedArray()
-    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypMipmappedArrayDesc_ptr = pMipmappedArrayDesc._ptr if pMipmappedArrayDesc != None else NULL
-    err = cydriver.cuMipmappedArrayCreate(<cydriver.CUmipmappedArray*>pHandle._ptr, cypMipmappedArrayDesc_ptr, numMipmapLevels)
-    return (CUresult(err), pHandle)
-{{endif}}
-
-{{if 'cuMipmappedArrayGetLevel' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMipmappedArrayGetLevel(hMipmappedArray, unsigned int level):
-    """ Gets a mipmap level of a CUDA mipmapped array.
-
-    Returns in `*pLevelArray` a CUDA array that represents a single mipmap
-    level of the CUDA mipmapped array `hMipmappedArray`.
-
-    If `level` is greater than the maximum number of levels in this
-    mipmapped array, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    Parameters
-    ----------
-    hMipmappedArray : :py:obj:`~.CUmipmappedArray`
-        CUDA mipmapped array
-    level : unsigned int
-        Mipmap level
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    pLevelArray : :py:obj:`~.CUarray`
-        Returned mipmap level CUDA array
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayCreate`, :py:obj:`~.cuMipmappedArrayDestroy`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaGetMipmappedArrayLevel`
-    """
-    cdef cydriver.CUmipmappedArray cyhMipmappedArray
-    if hMipmappedArray is None:
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>0
-    elif isinstance(hMipmappedArray, (CUmipmappedArray,)):
-        phMipmappedArray = int(hMipmappedArray)
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    else:
-        phMipmappedArray = int(CUmipmappedArray(hMipmappedArray))
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    cdef CUarray pLevelArray = CUarray()
-    err = cydriver.cuMipmappedArrayGetLevel(<cydriver.CUarray*>pLevelArray._ptr, cyhMipmappedArray, level)
-    return (CUresult(err), pLevelArray)
-{{endif}}
-
-{{if 'cuMipmappedArrayDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMipmappedArrayDestroy(hMipmappedArray):
-    """ Destroys a CUDA mipmapped array.
-
-    Destroys the CUDA mipmapped array `hMipmappedArray`.
-
-    Parameters
-    ----------
-    hMipmappedArray : :py:obj:`~.CUmipmappedArray`
-        Mipmapped array to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ARRAY_IS_MAPPED`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayCreate`, :py:obj:`~.cuMipmappedArrayGetLevel`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaFreeMipmappedArray`
-    """
-    cdef cydriver.CUmipmappedArray cyhMipmappedArray
-    if hMipmappedArray is None:
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>0
-    elif isinstance(hMipmappedArray, (CUmipmappedArray,)):
-        phMipmappedArray = int(hMipmappedArray)
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    else:
-        phMipmappedArray = int(CUmipmappedArray(hMipmappedArray))
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    err = cydriver.cuMipmappedArrayDestroy(cyhMipmappedArray)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemGetHandleForAddressRange' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetHandleForAddressRange(dptr, size_t size, handleType not None : CUmemRangeHandleType, unsigned long long flags):
-    """ Retrieve handle for an address range.
-
-    Get a handle of the specified type to an address range. The address
-    range must have been obtained by a prior call to either
-    :py:obj:`~.cuMemAlloc` or :py:obj:`~.cuMemAddressReserve`. If the
-    address range was obtained via :py:obj:`~.cuMemAddressReserve`, it must
-    also be fully mapped via :py:obj:`~.cuMemMap`. The address range must
-    have been obtained by a prior call to either :py:obj:`~.cuMemAllocHost`
-    or :py:obj:`~.cuMemHostAlloc` on Tegra.
-
-    Users must ensure the `dptr` and `size` are aligned to the host page
-    size.
-
-    When requesting
-    CUmemRangeHandleType::CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, users are
-    expected to query for dma_buf support for the platform by using
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED` device attribute
-    before calling this API. The `handle` will be interpreted as a pointer
-    to an integer to store the dma_buf file descriptor. Users must ensure
-    the entire address range is backed and mapped when the address range is
-    allocated by :py:obj:`~.cuMemAddressReserve`. All the physical
-    allocations backing the address range must be resident on the same
-    device and have identical allocation properties. Users are also
-    expected to retrieve a new handle every time the underlying physical
-    allocation(s) corresponding to a previously queried VA range are
-    changed.
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        Pointer to a valid CUDA device allocation. Must be aligned to host
-        page size.
-    size : size_t
-        Length of the address range. Must be aligned to host page size.
-    handleType : :py:obj:`~.CUmemRangeHandleType`
-        Type of handle requested (defines type and size of the `handle`
-        output parameter)
-    flags : unsigned long long
-        Reserved, must be zero
-
-    Returns
-    -------
-    CUresult
-        CUDA_SUCCESS CUDA_ERROR_INVALID_VALUE CUDA_ERROR_NOT_SUPPORTED
-    handle : Any
-        Pointer to the location where the returned handle will be stored.
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef int handle = 0
-    cdef void* cyhandle_ptr = <void*>&handle
-    cdef cydriver.CUmemRangeHandleType cyhandleType = handleType.value
-    err = cydriver.cuMemGetHandleForAddressRange(cyhandle_ptr, cydptr, size, cyhandleType, flags)
-    return (CUresult(err), handle)
-{{endif}}
-
-{{if 'cuMemAddressReserve' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAddressReserve(size_t size, size_t alignment, addr, unsigned long long flags):
-    """ Allocate an address range reservation.
-
-    Reserves a virtual address range based on the given parameters, giving
-    the starting address of the range in `ptr`. This API requires a system
-    that supports UVA. The size and address parameters must be a multiple
-    of the host page size and the alignment must be a power of two or zero
-    for default alignment.
-
-    Parameters
-    ----------
-    size : size_t
-        Size of the reserved virtual address range requested
-    alignment : size_t
-        Alignment of the reserved virtual address range requested
-    addr : :py:obj:`~.CUdeviceptr`
-        Fixed starting address range requested
-    flags : unsigned long long
-        Currently unused, must be zero
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    ptr : :py:obj:`~.CUdeviceptr`
-        Resulting pointer to start of virtual address range allocated
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAddressFree`
-    """
-    cdef cydriver.CUdeviceptr cyaddr
-    if addr is None:
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(addr, (CUdeviceptr,)):
-        paddr = int(addr)
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    else:
-        paddr = int(CUdeviceptr(addr))
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    cdef CUdeviceptr ptr = CUdeviceptr()
-    err = cydriver.cuMemAddressReserve(<cydriver.CUdeviceptr*>ptr._ptr, size, alignment, cyaddr, flags)
-    return (CUresult(err), ptr)
-{{endif}}
-
-{{if 'cuMemAddressFree' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAddressFree(ptr, size_t size):
-    """ Free an address range reservation.
-
-    Frees a virtual address range reserved by cuMemAddressReserve. The size
-    must match what was given to memAddressReserve and the ptr given must
-    match what was returned from memAddressReserve.
-
-    Parameters
-    ----------
-    ptr : :py:obj:`~.CUdeviceptr`
-        Starting address of the virtual address range to free
-    size : size_t
-        Size of the virtual address region to free
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAddressReserve`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    err = cydriver.cuMemAddressFree(cyptr, size)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemCreate(size_t size, prop : Optional[CUmemAllocationProp], unsigned long long flags):
-    """ Create a CUDA memory handle representing a memory allocation of a given size described by the given properties.
-
-    This creates a memory allocation on the target device specified through
-    the `prop` structure. The created allocation will not have any device
-    or host mappings. The generic memory `handle` for the allocation can be
-    mapped to the address space of calling process via
-    :py:obj:`~.cuMemMap`. This handle cannot be transmitted directly to
-    other processes (see :py:obj:`~.cuMemExportToShareableHandle`). On
-    Windows, the caller must also pass an LPSECURITYATTRIBUTE in `prop` to
-    be associated with this handle which limits or allows access to this
-    handle for a recipient process (see
-    :py:obj:`~.CUmemAllocationProp.win32HandleMetaData` for more). The
-    `size` of this allocation must be a multiple of the the value given via
-    :py:obj:`~.cuMemGetAllocationGranularity` with the
-    :py:obj:`~.CU_MEM_ALLOC_GRANULARITY_MINIMUM` flag. To create a CPU
-    allocation targeting a specific host NUMA node, applications must set
-    :py:obj:`~.CUmemAllocationProp`::CUmemLocation::type to
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and
-    :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must specify the
-    NUMA ID of the CPU. On systems where NUMA is not available
-    :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must be set to 0.
-    Specifying :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT` or
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` as the
-    :py:obj:`~.CUmemLocation.type` will result in
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-
-    Applications can set
-    :py:obj:`~.CUmemAllocationProp.requestedHandleTypes` to
-    :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` in order to create allocations
-    suitable for sharing within an IMEX domain. An IMEX domain is either an
-    OS instance or a group of securely connected OS instances using the
-    NVIDIA IMEX daemon. An IMEX channel is a global resource within the
-    IMEX domain that represents a logical entity that aims to provide fine
-    grained accessibility control for the participating processes. When
-    exporter and importer CUDA processes have been granted access to the
-    same IMEX channel, they can securely share memory. If the allocating
-    process does not have access setup for an IMEX channel, attempting to
-    create a :py:obj:`~.CUmemGenericAllocationHandle` with
-    :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` will result in
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. The nvidia-modprobe CLI provides
-    more information regarding setting up of IMEX channels.
-
-    If :py:obj:`~.CUmemAllocationProp`::allocFlags::usage contains
-    :py:obj:`~.CU_MEM_CREATE_USAGE_TILE_POOL` flag then the memory
-    allocation is intended only to be used as backing tile pool for sparse
-    CUDA arrays and sparse CUDA mipmapped arrays. (see
-    :py:obj:`~.cuMemMapArrayAsync`).
-
-    Parameters
-    ----------
-    size : size_t
-        Size of the allocation requested
-    prop : :py:obj:`~.CUmemAllocationProp`
-        Properties of the allocation to create.
-    flags : unsigned long long
-        flags for future use, must be zero now.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Value of handle returned. All operations on this allocation are to
-        be performed using this handle.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemRelease`, :py:obj:`~.cuMemExportToShareableHandle`, :py:obj:`~.cuMemImportFromShareableHandle`
-    """
-    cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle()
-    cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._ptr if prop != None else NULL
-    err = cydriver.cuMemCreate(<cydriver.CUmemGenericAllocationHandle*>handle._ptr, size, cyprop_ptr, flags)
-    return (CUresult(err), handle)
-{{endif}}
-
-{{if 'cuMemRelease' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemRelease(handle):
-    """ Release a memory handle representing a memory allocation which was previously allocated through cuMemCreate.
-
-    Frees the memory that was allocated on a device through cuMemCreate.
-
-    The memory allocation will be freed when all outstanding mappings to
-    the memory are unmapped and when all outstanding references to the
-    handle (including it's shareable counterparts) are also released. The
-    generic memory handle can be freed when there are still outstanding
-    mappings made with this handle. Each time a recipient process imports a
-    shareable handle, it needs to pair it with :py:obj:`~.cuMemRelease` for
-    the handle to be freed. If `handle` is not a valid handle the behavior
-    is undefined.
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Value of handle which was returned previously by cuMemCreate.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemCreate`
-    """
-    cdef cydriver.CUmemGenericAllocationHandle cyhandle
-    if handle is None:
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(handle, (CUmemGenericAllocationHandle,)):
-        phandle = int(handle)
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    else:
-        phandle = int(CUmemGenericAllocationHandle(handle))
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    err = cydriver.cuMemRelease(cyhandle)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemMap' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemMap(ptr, size_t size, size_t offset, handle, unsigned long long flags):
-    """ Maps an allocation handle to a reserved virtual address range.
-
-    Maps bytes of memory represented by `handle` starting from byte
-    `offset` to `size` to address range [`addr`, `addr` + `size`]. This
-    range must be an address reservation previously reserved with
-    :py:obj:`~.cuMemAddressReserve`, and `offset` + `size` must be less
-    than the size of the memory allocation. Both `ptr`, `size`, and
-    `offset` must be a multiple of the value given via
-    :py:obj:`~.cuMemGetAllocationGranularity` with the
-    :py:obj:`~.CU_MEM_ALLOC_GRANULARITY_MINIMUM` flag. If `handle`
-    represents a multicast object, `ptr`, `size` and `offset` must be
-    aligned to the value returned by :py:obj:`~.cuMulticastGetGranularity`
-    with the flag :py:obj:`~.CU_MULTICAST_MINIMUM_GRANULARITY`. For best
-    performance however, it is recommended that `ptr`, `size` and `offset`
-    be aligned to the value returned by
-    :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_RECOMMENDED_GRANULARITY`.
-
-    Please note calling :py:obj:`~.cuMemMap` does not make the address
-    accessible, the caller needs to update accessibility of a contiguous
-    mapped VA range by calling :py:obj:`~.cuMemSetAccess`.
-
-    Once a recipient process obtains a shareable memory handle from
-    :py:obj:`~.cuMemImportFromShareableHandle`, the process must use
-    :py:obj:`~.cuMemMap` to map the memory into its address ranges before
-    setting accessibility with :py:obj:`~.cuMemSetAccess`.
-
-    :py:obj:`~.cuMemMap` can only create mappings on VA range reservations
-    that are not currently mapped.
-
-    Parameters
-    ----------
-    ptr : :py:obj:`~.CUdeviceptr`
-        Address where memory will be mapped.
-    size : size_t
-        Size of the memory mapping.
-    offset : size_t
-        Offset into the memory represented by
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle to a shareable memory
-    flags : unsigned long long
-        flags for future use, must be zero now.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemUnmap`, :py:obj:`~.cuMemSetAccess`, :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemAddressReserve`, :py:obj:`~.cuMemImportFromShareableHandle`
-    """
-    cdef cydriver.CUmemGenericAllocationHandle cyhandle
-    if handle is None:
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(handle, (CUmemGenericAllocationHandle,)):
-        phandle = int(handle)
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    else:
-        phandle = int(CUmemGenericAllocationHandle(handle))
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    err = cydriver.cuMemMap(cyptr, size, offset, cyhandle, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemMapArrayAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemMapArrayAsync(mapInfoList : Optional[Tuple[CUarrayMapInfo] | List[CUarrayMapInfo]], unsigned int count, hStream):
-    """ Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays.
-
-    Performs map or unmap operations on subregions of sparse CUDA arrays
-    and sparse CUDA mipmapped arrays. Each operation is specified by a
-    :py:obj:`~.CUarrayMapInfo` entry in the `mapInfoList` array of size
-    `count`. The structure :py:obj:`~.CUarrayMapInfo` is defined as follow:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.CUarrayMapInfo.resourceType` specifies the type of
-    resource to be operated on. If :py:obj:`~.CUarrayMapInfo.resourceType`
-    is set to :py:obj:`~.CUresourcetype`::CU_RESOURCE_TYPE_ARRAY then
-    :py:obj:`~.CUarrayMapInfo`::resource::array must be set to a valid
-    sparse CUDA array handle. The CUDA array must be either a 2D, 2D
-    layered or 3D CUDA array and must have been allocated using
-    :py:obj:`~.cuArrayCreate` or :py:obj:`~.cuArray3DCreate` with the flag
-    :py:obj:`~.CUDA_ARRAY3D_SPARSE` or
-    :py:obj:`~.CUDA_ARRAY3D_DEFERRED_MAPPING`. For CUDA arrays obtained
-    using :py:obj:`~.cuMipmappedArrayGetLevel`,
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned. If
-    :py:obj:`~.CUarrayMapInfo.resourceType` is set to
-    :py:obj:`~.CUresourcetype`::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY then
-    :py:obj:`~.CUarrayMapInfo`::resource::mipmap must be set to a valid
-    sparse CUDA mipmapped array handle. The CUDA mipmapped array must be
-    either a 2D, 2D layered or 3D CUDA mipmapped array and must have been
-    allocated using :py:obj:`~.cuMipmappedArrayCreate` with the flag
-    :py:obj:`~.CUDA_ARRAY3D_SPARSE` or
-    :py:obj:`~.CUDA_ARRAY3D_DEFERRED_MAPPING`.
-
-    :py:obj:`~.CUarrayMapInfo.subresourceType` specifies the type of
-    subresource within the resource.
-    :py:obj:`~.CUarraySparseSubresourceType_enum` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where
-    :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL
-    indicates a sparse-miplevel which spans at least one tile in every
-    dimension. The remaining miplevels which are too small to span at least
-    one tile in any dimension constitute the mip tail region as indicated
-    by
-    :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL
-    subresource type.
-
-    If :py:obj:`~.CUarrayMapInfo.subresourceType` is set to
-    :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL
-    then :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel struct must
-    contain valid array subregion offsets and extents. The
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::offsetX,
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::offsetY and
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::offsetZ must
-    specify valid X, Y and Z offsets respectively. The
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::extentWidth,
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::extentHeight and
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::extentDepth must
-    specify valid width, height and depth extents respectively. These
-    offsets and extents must be aligned to the corresponding tile
-    dimension. For CUDA mipmapped arrays
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::level must
-    specify a valid mip level index. Otherwise, must be zero. For layered
-    CUDA arrays and layered CUDA mipmapped arrays
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::layer must
-    specify a valid layer index. Otherwise, must be zero.
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::offsetZ must be
-    zero and
-    :py:obj:`~.CUarrayMapInfo`::subresource::sparseLevel::extentDepth must
-    be set to 1 for 2D and 2D layered CUDA arrays and CUDA mipmapped
-    arrays. Tile extents can be obtained by calling
-    :py:obj:`~.cuArrayGetSparseProperties` and
-    :py:obj:`~.cuMipmappedArrayGetSparseProperties`
-
-    If :py:obj:`~.CUarrayMapInfo.subresourceType` is set to
-    :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL
-    then :py:obj:`~.CUarrayMapInfo`::subresource::miptail struct must
-    contain valid mip tail offset in
-    :py:obj:`~.CUarrayMapInfo`::subresource::miptail::offset and size in
-    :py:obj:`~.CUarrayMapInfo`::subresource::miptail::size. Both, mip tail
-    offset and mip tail size must be aligned to the tile size. For layered
-    CUDA mipmapped arrays which don't have the flag
-    :py:obj:`~.CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL` set in
-    :py:obj:`~.CUDA_ARRAY_SPARSE_PROPERTIES.flags` as returned by
-    :py:obj:`~.cuMipmappedArrayGetSparseProperties`,
-    :py:obj:`~.CUarrayMapInfo`::subresource::miptail::layer must specify a
-    valid layer index. Otherwise, must be zero.
-
-    If :py:obj:`~.CUarrayMapInfo`::resource::array or
-    :py:obj:`~.CUarrayMapInfo`::resource::mipmap was created with
-    :py:obj:`~.CUDA_ARRAY3D_DEFERRED_MAPPING` flag set the
-    :py:obj:`~.CUarrayMapInfo.subresourceType` and the contents of
-    :py:obj:`~.CUarrayMapInfo`::subresource will be ignored.
-
-    :py:obj:`~.CUarrayMapInfo.memOperationType` specifies the type of
-    operation. :py:obj:`~.CUmemOperationType` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.CUarrayMapInfo.memOperationType` is set to
-    :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_MAP then the
-    subresource will be mapped onto the tile pool memory specified by
-    :py:obj:`~.CUarrayMapInfo`::memHandle at offset
-    :py:obj:`~.CUarrayMapInfo.offset`. The tile pool allocation has to be
-    created by specifying the :py:obj:`~.CU_MEM_CREATE_USAGE_TILE_POOL`
-    flag when calling :py:obj:`~.cuMemCreate`. Also,
-    :py:obj:`~.CUarrayMapInfo.memHandleType` must be set to
-    :py:obj:`~.CUmemHandleType`::CU_MEM_HANDLE_TYPE_GENERIC.
-
-    If :py:obj:`~.CUarrayMapInfo.memOperationType` is set to
-    :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_UNMAP then an
-    unmapping operation is performed. :py:obj:`~.CUarrayMapInfo`::memHandle
-    must be NULL.
-
-    :py:obj:`~.CUarrayMapInfo.deviceBitMask` specifies the list of devices
-    that must map or unmap physical memory. Currently, this mask must have
-    exactly one bit set, and the corresponding device must match the device
-    associated with the stream. If
-    :py:obj:`~.CUarrayMapInfo.memOperationType` is set to
-    :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_MAP, the device
-    must also match the device associated with the tile pool memory
-    allocation as specified by :py:obj:`~.CUarrayMapInfo`::memHandle.
-
-    :py:obj:`~.CUarrayMapInfo.flags` and
-    :py:obj:`~.CUarrayMapInfo.reserved`[] are unused and must be set to
-    zero.
-
-    Parameters
-    ----------
-    mapInfoList : List[:py:obj:`~.CUarrayMapInfo`]
-        List of :py:obj:`~.CUarrayMapInfo`
-    count : unsigned int
-        Count of :py:obj:`~.CUarrayMapInfo` in `mapInfoList`
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier for the stream to use for map or unmap operations
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMipmappedArrayCreate`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuMemCreate`, :py:obj:`~.cuArrayGetSparseProperties`, :py:obj:`~.cuMipmappedArrayGetSparseProperties`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    mapInfoList = [] if mapInfoList is None else mapInfoList
-    if not all(isinstance(_x, (CUarrayMapInfo,)) for _x in mapInfoList):
-        raise TypeError("Argument 'mapInfoList' is not instance of type (expected Tuple[cydriver.CUarrayMapInfo,] or List[cydriver.CUarrayMapInfo,]")
-    cdef cydriver.CUarrayMapInfo* cymapInfoList = NULL
-    if len(mapInfoList) > 0:
-        cymapInfoList = <cydriver.CUarrayMapInfo*> calloc(len(mapInfoList), sizeof(cydriver.CUarrayMapInfo))
-        if cymapInfoList is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(mapInfoList)) + 'x' + str(sizeof(cydriver.CUarrayMapInfo)))
-        for idx in range(len(mapInfoList)):
-            string.memcpy(&cymapInfoList[idx], (<CUarrayMapInfo>mapInfoList[idx])._ptr, sizeof(cydriver.CUarrayMapInfo))
-    if count > len(mapInfoList): raise RuntimeError("List is too small: " + str(len(mapInfoList)) + " < " + str(count))
-    err = cydriver.cuMemMapArrayAsync((<CUarrayMapInfo>mapInfoList[0])._ptr if len(mapInfoList) == 1 else cymapInfoList, count, cyhStream)
-    if cymapInfoList is not NULL:
-        free(cymapInfoList)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemUnmap' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemUnmap(ptr, size_t size):
-    """ Unmap the backing memory of a given address range.
-
-    The range must be the entire contiguous address range that was mapped
-    to. In other words, :py:obj:`~.cuMemUnmap` cannot unmap a sub-range of
-    an address range mapped by :py:obj:`~.cuMemCreate` /
-    :py:obj:`~.cuMemMap`. Any backing memory allocations will be freed if
-    there are no existing mappings and there are no unreleased memory
-    handles.
-
-    When :py:obj:`~.cuMemUnmap` returns successfully the address range is
-    converted to an address reservation and can be used for a future calls
-    to :py:obj:`~.cuMemMap`. Any new mapping to this virtual address will
-    need to have access granted through :py:obj:`~.cuMemSetAccess`, as all
-    mappings start with no accessibility setup.
-
-    Parameters
-    ----------
-    ptr : :py:obj:`~.CUdeviceptr`
-        Starting address for the virtual address range to unmap
-    size : size_t
-        Size of the virtual address range to unmap
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemAddressReserve`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    err = cydriver.cuMemUnmap(cyptr, size)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemSetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemSetAccess(ptr, size_t size, desc : Optional[Tuple[CUmemAccessDesc] | List[CUmemAccessDesc]], size_t count):
-    """ Set the access flags for each location specified in `desc` for the given virtual address range.
-
-    Given the virtual address range via `ptr` and `size`, and the locations
-    in the array given by `desc` and `count`, set the access flags for the
-    target locations. The range must be a fully mapped address range
-    containing all allocations created by :py:obj:`~.cuMemMap` /
-    :py:obj:`~.cuMemCreate`. Users cannot specify
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` accessibility for
-    allocations created on with other location types. Note: When
-    :py:obj:`~.CUmemAccessDesc`::CUmemLocation::type is
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`,
-    :py:obj:`~.CUmemAccessDesc`::CUmemLocation::id is ignored. When setting
-    the access flags for a virtual address range mapping a multicast
-    object, `ptr` and `size` must be aligned to the value returned by
-    :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_MINIMUM_GRANULARITY`. For best performance
-    however, it is recommended that `ptr` and `size` be aligned to the
-    value returned by :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_RECOMMENDED_GRANULARITY`.
-
-    Parameters
-    ----------
-    ptr : :py:obj:`~.CUdeviceptr`
-        Starting address for the virtual address range
-    size : size_t
-        Length of the virtual address range
-    desc : List[:py:obj:`~.CUmemAccessDesc`]
-        Array of :py:obj:`~.CUmemAccessDesc` that describe how to change
-        the
-    count : size_t
-        Number of :py:obj:`~.CUmemAccessDesc` in `desc`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemSetAccess`, :py:obj:`~.cuMemCreate`, :py:obj:`~.py`:obj:`~.cuMemMap`
-    """
-    desc = [] if desc is None else desc
-    if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in desc):
-        raise TypeError("Argument 'desc' is not instance of type (expected Tuple[cydriver.CUmemAccessDesc,] or List[cydriver.CUmemAccessDesc,]")
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    cdef cydriver.CUmemAccessDesc* cydesc = NULL
-    if len(desc) > 0:
-        cydesc = <cydriver.CUmemAccessDesc*> calloc(len(desc), sizeof(cydriver.CUmemAccessDesc))
-        if cydesc is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(desc)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc)))
-        for idx in range(len(desc)):
-            string.memcpy(&cydesc[idx], (<CUmemAccessDesc>desc[idx])._ptr, sizeof(cydriver.CUmemAccessDesc))
-    if count > <size_t>len(desc): raise RuntimeError("List is too small: " + str(len(desc)) + " < " + str(count))
-    err = cydriver.cuMemSetAccess(cyptr, size, (<CUmemAccessDesc>desc[0])._ptr if len(desc) == 1 else cydesc, count)
-    if cydesc is not NULL:
-        free(cydesc)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemGetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetAccess(location : Optional[CUmemLocation], ptr):
-    """ Get the access `flags` set for the given `location` and `ptr`.
-
-    Parameters
-    ----------
-    location : :py:obj:`~.CUmemLocation`
-        Location in which to check the flags for
-    ptr : :py:obj:`~.CUdeviceptr`
-        Address in which to check the access flags for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    flags : unsigned long long
-        Flags set for this location
-
-    See Also
-    --------
-    :py:obj:`~.cuMemSetAccess`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    cdef unsigned long long flags = 0
-    cdef cydriver.CUmemLocation* cylocation_ptr = location._ptr if location != None else NULL
-    err = cydriver.cuMemGetAccess(&flags, cylocation_ptr, cyptr)
-    return (CUresult(err), flags)
-{{endif}}
-
-{{if 'cuMemExportToShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemExportToShareableHandle(handle, handleType not None : CUmemAllocationHandleType, unsigned long long flags):
-    """ Exports an allocation to a requested shareable handle type.
-
-    Given a CUDA memory handle, create a shareable memory allocation handle
-    that can be used to share the memory with other processes. The
-    recipient process can convert the shareable handle back into a CUDA
-    memory handle using :py:obj:`~.cuMemImportFromShareableHandle` and map
-    it with :py:obj:`~.cuMemMap`. The implementation of what this handle is
-    and how it can be transferred is defined by the requested handle type
-    in `handleType`
-
-    Once all shareable handles are closed and the allocation is released,
-    the allocated memory referenced will be released back to the OS and
-    uses of the CUDA handle afterward will lead to undefined behavior.
-
-    This API can also be used in conjunction with other APIs (e.g. Vulkan,
-    OpenGL) that support importing memory from the shareable type
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        CUDA handle for the memory allocation
-    handleType : :py:obj:`~.CUmemAllocationHandleType`
-        Type of shareable handle requested (defines type and size of the
-        `shareableHandle` output parameter)
-    flags : unsigned long long
-        Reserved, must be zero
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    shareableHandle : Any
-        Pointer to the location in which to store the requested handle type
-
-    See Also
-    --------
-    :py:obj:`~.cuMemImportFromShareableHandle`
-    """
-    cdef cydriver.CUmemGenericAllocationHandle cyhandle
-    if handle is None:
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(handle, (CUmemGenericAllocationHandle,)):
-        phandle = int(handle)
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    else:
-        phandle = int(CUmemGenericAllocationHandle(handle))
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    cdef utils.HelperCUmemAllocationHandleType cyshareableHandle = utils.HelperCUmemAllocationHandleType(handleType)
-    cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr
-    cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value
-    err = cydriver.cuMemExportToShareableHandle(cyshareableHandle_ptr, cyhandle, cyhandleType, flags)
-    return (CUresult(err), cyshareableHandle.pyObj())
-{{endif}}
-
-{{if 'cuMemImportFromShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemImportFromShareableHandle(osHandle, shHandleType not None : CUmemAllocationHandleType):
-    """ Imports an allocation from a requested shareable handle type.
-
-    If the current process cannot support the memory described by this
-    shareable handle, this API will error as
-    :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`.
-
-    If `shHandleType` is :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` and the
-    importer process has not been granted access to the same IMEX channel
-    as the exporter process, this API will error as
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`.
-
-    Parameters
-    ----------
-    osHandle : Any
-        Shareable Handle representing the memory allocation that is to be
-        imported.
-    shHandleType : :py:obj:`~.CUmemAllocationHandleType`
-        handle type of the exported handle
-        :py:obj:`~.CUmemAllocationHandleType`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        CUDA Memory handle for the memory allocation.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemExportToShareableHandle`, :py:obj:`~.cuMemMap`, :py:obj:`~.cuMemRelease`
-
-    Notes
-    -----
-    Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc) created on devices under an SLI group may not be supported, and thus this API will return CUDA_ERROR_NOT_SUPPORTED. There is no guarantee that the contents of `handle` will be the same CUDA memory handle for the same given OS shareable handle, or the same underlying allocation.
-    """
-    cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle()
-    cyosHandle = utils.HelperInputVoidPtr(osHandle)
-    cdef void* cyosHandle_ptr = <void*><void_ptr>cyosHandle.cptr
-    cdef cydriver.CUmemAllocationHandleType cyshHandleType = shHandleType.value
-    err = cydriver.cuMemImportFromShareableHandle(<cydriver.CUmemGenericAllocationHandle*>handle._ptr, cyosHandle_ptr, cyshHandleType)
-    return (CUresult(err), handle)
-{{endif}}
-
-{{if 'cuMemGetAllocationGranularity' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetAllocationGranularity(prop : Optional[CUmemAllocationProp], option not None : CUmemAllocationGranularity_flags):
-    """ Calculates either the minimal or recommended granularity.
-
-    Calculates either the minimal or recommended granularity for a given
-    allocation specification and returns it in granularity. This
-    granularity can be used as a multiple for alignment, size, or address
-    mapping.
-
-    Parameters
-    ----------
-    prop : :py:obj:`~.CUmemAllocationProp`
-        Property for which to determine the granularity for
-    option : :py:obj:`~.CUmemAllocationGranularity_flags`
-        Determines which granularity to return
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    granularity : int
-        Returned granularity.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemMap`
-    """
-    cdef size_t granularity = 0
-    cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._ptr if prop != None else NULL
-    cdef cydriver.CUmemAllocationGranularity_flags cyoption = option.value
-    err = cydriver.cuMemGetAllocationGranularity(&granularity, cyprop_ptr, cyoption)
-    return (CUresult(err), granularity)
-{{endif}}
-
-{{if 'cuMemGetAllocationPropertiesFromHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemGetAllocationPropertiesFromHandle(handle):
-    """ Retrieve the contents of the property structure defining properties for this handle.
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle which to perform the query on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    prop : :py:obj:`~.CUmemAllocationProp`
-        Pointer to a properties structure which will hold the information
-        about this handle
-
-    See Also
-    --------
-    :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemImportFromShareableHandle`
-    """
-    cdef cydriver.CUmemGenericAllocationHandle cyhandle
-    if handle is None:
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(handle, (CUmemGenericAllocationHandle,)):
-        phandle = int(handle)
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    else:
-        phandle = int(CUmemGenericAllocationHandle(handle))
-        cyhandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>phandle
-    cdef CUmemAllocationProp prop = CUmemAllocationProp()
-    err = cydriver.cuMemGetAllocationPropertiesFromHandle(<cydriver.CUmemAllocationProp*>prop._ptr, cyhandle)
-    return (CUresult(err), prop)
-{{endif}}
-
-{{if 'cuMemRetainAllocationHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemRetainAllocationHandle(addr):
-    """ Given an address `addr`, returns the allocation handle of the backing memory allocation.
-
-    The handle is guaranteed to be the same handle value used to map the
-    memory. If the address requested is not mapped, the function will fail.
-    The returned handle must be released with corresponding number of calls
-    to :py:obj:`~.cuMemRelease`.
-
-    Parameters
-    ----------
-    addr : Any
-        Memory address to query, that has been mapped previously.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    handle : :py:obj:`~.CUmemGenericAllocationHandle`
-        CUDA Memory handle for the backing memory allocation.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemRelease`, :py:obj:`~.cuMemMap`
-
-    Notes
-    -----
-    The address `addr`, can be any address in a range previously mapped by :py:obj:`~.cuMemMap`, and not necessarily the start address.
-    """
-    cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle()
-    cyaddr = utils.HelperInputVoidPtr(addr)
-    cdef void* cyaddr_ptr = <void*><void_ptr>cyaddr.cptr
-    err = cydriver.cuMemRetainAllocationHandle(<cydriver.CUmemGenericAllocationHandle*>handle._ptr, cyaddr_ptr)
-    return (CUresult(err), handle)
-{{endif}}
-
-{{if 'cuMemFreeAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemFreeAsync(dptr, hStream):
-    """ Frees memory with stream ordered semantics.
-
-    Inserts a free operation into `hStream`. The allocation must not be
-    accessed after stream execution reaches the free. After this API
-    returns, accessing the memory from any subsequent work launched on the
-    GPU or querying its pointer attributes results in undefined behavior.
-
-    Parameters
-    ----------
-    dptr : :py:obj:`~.CUdeviceptr`
-        memory to free
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering contract.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` (default stream specified with no current context), :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    Notes
-    -----
-    During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation.
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    err = cydriver.cuMemFreeAsync(cydptr, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemAllocAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAllocAsync(size_t bytesize, hStream):
-    """ Allocates memory with stream ordered semantics.
-
-    Inserts an allocation operation into `hStream`. A pointer to the
-    allocated memory is returned immediately in *dptr. The allocation must
-    not be accessed until the the allocation operation completes. The
-    allocation comes from the memory pool current to the stream's device.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Number of bytes to allocate
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering contract and the memory
-        pool to allocate from
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` (default stream specified with no current context), :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocFromPoolAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuMemPoolSetAttribute`
-
-    Notes
-    -----
-    The default memory pool of a device contains device memory from that device.
-
-    Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
-
-    During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    err = cydriver.cuMemAllocAsync(<cydriver.CUdeviceptr*>dptr._ptr, bytesize, cyhStream)
-    return (CUresult(err), dptr)
-{{endif}}
-
-{{if 'cuMemPoolTrimTo' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolTrimTo(pool, size_t minBytesToKeep):
-    """ Tries to release memory back to the OS.
-
-    Releases memory back to the OS until the pool contains fewer than
-    minBytesToKeep reserved bytes, or there is no more memory that the
-    allocator can safely release. The allocator cannot release OS
-    allocations that back outstanding asynchronous allocations. The OS
-    allocations may happen at different granularity from the user
-    allocations.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to trim
-    minBytesToKeep : size_t
-        If the pool has less than minBytesToKeep reserved, the TrimTo
-        operation is a no-op. Otherwise the pool will be guaranteed to have
-        at least minBytesToKeep bytes reserved after the operation.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-
-    Notes
-    -----
-    : Allocations that have not been freed count as outstanding.
-
-    : Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as outstanding.
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    err = cydriver.cuMemPoolTrimTo(cypool, minBytesToKeep)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemPoolSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolSetAttribute(pool, attr not None : CUmemPool_attribute, value):
-    """ Sets attributes of a memory pool.
-
-    Supported attributes are:
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD`: (value type =
-      cuuint64_t) Amount of reserved memory in bytes to hold onto before
-      trying to release memory back to the OS. When more than the release
-      threshold bytes of memory are held by the memory pool, the allocator
-      will try to release memory back to the OS on the next call to stream,
-      event or context synchronize. (default 0)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES`: (value
-      type = int) Allow :py:obj:`~.cuMemAllocAsync` to use memory
-      asynchronously freed in another stream as long as a stream ordering
-      dependency of the allocating stream on the free action exists. Cuda
-      events and null stream interactions can create the required stream
-      ordered dependencies. (default enabled)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC`: (value type =
-      int) Allow reuse of already completed frees when there is no
-      dependency between the free and allocation. (default enabled)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES`: (value
-      type = int) Allow :py:obj:`~.cuMemAllocAsync` to insert new stream
-      dependencies in order to establish the stream ordering required to
-      reuse a piece of memory released by :py:obj:`~.cuMemFreeAsync`
-      (default enabled).
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH`: (value type =
-      cuuint64_t) Reset the high watermark that tracks the amount of
-      backing memory that was allocated for the memory pool. It is illegal
-      to set this attribute to a non-zero value.
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_USED_MEM_HIGH`: (value type = cuuint64_t)
-      Reset the high watermark that tracks the amount of used memory that
-      was allocated for the memory pool.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to modify
-    attr : :py:obj:`~.CUmemPool_attribute`
-        The attribute to modify
-    value : Any
-        Pointer to the value to assign
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef cydriver.CUmemPool_attribute cyattr = attr.value
-    cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cydriver.cuMemPoolSetAttribute(cypool, cyattr, cyvalue_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemPoolGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolGetAttribute(pool, attr not None : CUmemPool_attribute):
-    """ Gets attributes of a memory pool.
-
-    Supported attributes are:
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD`: (value type =
-      cuuint64_t) Amount of reserved memory in bytes to hold onto before
-      trying to release memory back to the OS. When more than the release
-      threshold bytes of memory are held by the memory pool, the allocator
-      will try to release memory back to the OS on the next call to stream,
-      event or context synchronize. (default 0)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES`: (value
-      type = int) Allow :py:obj:`~.cuMemAllocAsync` to use memory
-      asynchronously freed in another stream as long as a stream ordering
-      dependency of the allocating stream on the free action exists. Cuda
-      events and null stream interactions can create the required stream
-      ordered dependencies. (default enabled)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC`: (value type =
-      int) Allow reuse of already completed frees when there is no
-      dependency between the free and allocation. (default enabled)
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES`: (value
-      type = int) Allow :py:obj:`~.cuMemAllocAsync` to insert new stream
-      dependencies in order to establish the stream ordering required to
-      reuse a piece of memory released by :py:obj:`~.cuMemFreeAsync`
-      (default enabled).
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT`: (value type =
-      cuuint64_t) Amount of backing memory currently allocated for the
-      mempool
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH`: (value type =
-      cuuint64_t) High watermark of backing memory allocated for the
-      mempool since the last time it was reset.
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_USED_MEM_CURRENT`: (value type =
-      cuuint64_t) Amount of memory from the pool that is currently in use
-      by the application.
-
-    - :py:obj:`~.CU_MEMPOOL_ATTR_USED_MEM_HIGH`: (value type = cuuint64_t)
-      High watermark of the amount of memory from the pool that was in use
-      by the application.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to get attributes of
-    attr : :py:obj:`~.CUmemPool_attribute`
-        The attribute to get
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    value : Any
-        Retrieved value
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef cydriver.CUmemPool_attribute cyattr = attr.value
-    cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cydriver.cuMemPoolGetAttribute(cypool, cyattr, cyvalue_ptr)
-    return (CUresult(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cuMemPoolSetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolSetAccess(pool, map : Optional[Tuple[CUmemAccessDesc] | List[CUmemAccessDesc]], size_t count):
-    """ Controls visibility of pools between devices.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The pool being modified
-    map : List[:py:obj:`~.CUmemAccessDesc`]
-        Array of access descriptors. Each descriptor instructs the access
-        to enable for a single gpu.
-    count : size_t
-        Number of descriptors in the map array.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-    """
-    map = [] if map is None else map
-    if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in map):
-        raise TypeError("Argument 'map' is not instance of type (expected Tuple[cydriver.CUmemAccessDesc,] or List[cydriver.CUmemAccessDesc,]")
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef cydriver.CUmemAccessDesc* cymap = NULL
-    if len(map) > 0:
-        cymap = <cydriver.CUmemAccessDesc*> calloc(len(map), sizeof(cydriver.CUmemAccessDesc))
-        if cymap is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(map)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc)))
-        for idx in range(len(map)):
-            string.memcpy(&cymap[idx], (<CUmemAccessDesc>map[idx])._ptr, sizeof(cydriver.CUmemAccessDesc))
-    if count > <size_t>len(map): raise RuntimeError("List is too small: " + str(len(map)) + " < " + str(count))
-    err = cydriver.cuMemPoolSetAccess(cypool, (<CUmemAccessDesc>map[0])._ptr if len(map) == 1 else cymap, count)
-    if cymap is not NULL:
-        free(cymap)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemPoolGetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolGetAccess(memPool, location : Optional[CUmemLocation]):
-    """ Returns the accessibility of a pool from a device.
-
-    Returns the accessibility of the pool's memory from the specified
-    location.
-
-    Parameters
-    ----------
-    memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        the pool being queried
-    location : :py:obj:`~.CUmemLocation`
-        the location accessing the pool
-
-    Returns
-    -------
-    CUresult
-
-    flags : :py:obj:`~.CUmemAccess_flags`
-        the accessibility of the pool from the specified location
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`
-    """
-    cdef cydriver.CUmemoryPool cymemPool
-    if memPool is None:
-        cymemPool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(memPool, (CUmemoryPool,)):
-        pmemPool = int(memPool)
-        cymemPool = <cydriver.CUmemoryPool><void_ptr>pmemPool
-    else:
-        pmemPool = int(CUmemoryPool(memPool))
-        cymemPool = <cydriver.CUmemoryPool><void_ptr>pmemPool
-    cdef cydriver.CUmemAccess_flags flags
-    cdef cydriver.CUmemLocation* cylocation_ptr = location._ptr if location != None else NULL
-    err = cydriver.cuMemPoolGetAccess(&flags, cymemPool, cylocation_ptr)
-    return (CUresult(err), CUmemAccess_flags(flags))
-{{endif}}
-
-{{if 'cuMemPoolCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]):
-    """ Creates a memory pool.
-
-    Creates a CUDA memory pool and returns the handle in `pool`. The
-    `poolProps` determines the properties of the pool such as the backing
-    device and IPC capabilities.
-
-    To create a memory pool targeting a specific host NUMA node,
-    applications must set :py:obj:`~.CUmemPoolProps`::CUmemLocation::type
-    to :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and
-    :py:obj:`~.CUmemPoolProps`::CUmemLocation::id must specify the NUMA ID
-    of the host memory node. Specifying
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT` or
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` as the
-    :py:obj:`~.CUmemPoolProps`::CUmemLocation::type will result in
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE`. By default, the pool's memory
-    will be accessible from the device it is allocated on. In the case of
-    pools created with :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`, their
-    default accessibility will be from the host CPU. Applications can
-    control the maximum size of the pool by specifying a non-zero value for
-    :py:obj:`~.CUmemPoolProps.maxSize`. If set to 0, the maximum size of
-    the pool will default to a system dependent value.
-
-    Applications can set :py:obj:`~.CUmemPoolProps.handleTypes` to
-    :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` in order to create
-    :py:obj:`~.CUmemoryPool` suitable for sharing within an IMEX domain. An
-    IMEX domain is either an OS instance or a group of securely connected
-    OS instances using the NVIDIA IMEX daemon. An IMEX channel is a global
-    resource within the IMEX domain that represents a logical entity that
-    aims to provide fine grained accessibility control for the
-    participating processes. When exporter and importer CUDA processes have
-    been granted access to the same IMEX channel, they can securely share
-    memory. If the allocating process does not have access setup for an
-    IMEX channel, attempting to export a :py:obj:`~.CUmemoryPool` with
-    :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` will result in
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. The nvidia-modprobe CLI provides
-    more information regarding setting up of IMEX channels.
-
-    Parameters
-    ----------
-    poolProps : :py:obj:`~.CUmemPoolProps`
-        None
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pool : :py:obj:`~.CUmemoryPool`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemAllocFromPoolAsync`, :py:obj:`~.cuMemPoolExportToShareableHandle`
-
-    Notes
-    -----
-    Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC.
-    """
-    cdef CUmemoryPool pool = CUmemoryPool()
-    cdef cydriver.CUmemPoolProps* cypoolProps_ptr = poolProps._ptr if poolProps != None else NULL
-    err = cydriver.cuMemPoolCreate(<cydriver.CUmemoryPool*>pool._ptr, cypoolProps_ptr)
-    return (CUresult(err), pool)
-{{endif}}
-
-{{if 'cuMemPoolDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolDestroy(pool):
-    """ Destroys the specified memory pool.
-
-    If any pointers obtained from this pool haven't been freed or the pool
-    has free operations that haven't completed when
-    :py:obj:`~.cuMemPoolDestroy` is invoked, the function will return
-    immediately and the resources associated with the pool will be released
-    automatically once there are no more outstanding allocations.
-
-    Destroying the current mempool of a device sets the default mempool of
-    that device as the current mempool for that device.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        None
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`
-
-    Notes
-    -----
-    A device's default memory pool cannot be destroyed.
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    err = cydriver.cuMemPoolDestroy(cypool)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemAllocFromPoolAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAllocFromPoolAsync(size_t bytesize, pool, hStream):
-    """ Allocates memory from a specified pool with stream ordered semantics.
-
-    Inserts an allocation operation into `hStream`. A pointer to the
-    allocated memory is returned immediately in *dptr. The allocation must
-    not be accessed until the the allocation operation completes. The
-    allocation comes from the specified memory pool.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Number of bytes to allocate
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The pool to allocate from
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering semantic
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` (default stream specified with no current context), :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    dptr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuMemPoolSetAttribute`
-
-    Notes
-    -----
-    During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef CUdeviceptr dptr = CUdeviceptr()
-    err = cydriver.cuMemAllocFromPoolAsync(<cydriver.CUdeviceptr*>dptr._ptr, bytesize, cypool, cyhStream)
-    return (CUresult(err), dptr)
-{{endif}}
-
-{{if 'cuMemPoolExportToShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolExportToShareableHandle(pool, handleType not None : CUmemAllocationHandleType, unsigned long long flags):
-    """ Exports a memory pool to the requested handle type.
-
-    Given an IPC capable mempool, create an OS handle to share the pool
-    with another process. A recipient process can convert the shareable
-    handle into a mempool with
-    :py:obj:`~.cuMemPoolImportFromShareableHandle`. Individual pointers can
-    then be shared with the :py:obj:`~.cuMemPoolExportPointer` and
-    :py:obj:`~.cuMemPoolImportPointer` APIs. The implementation of what the
-    shareable handle is and how it can be transferred is defined by the
-    requested handle type.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        pool to export
-    handleType : :py:obj:`~.CUmemAllocationHandleType`
-        the type of handle to create
-    flags : unsigned long long
-        must be 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    handle_out : Any
-        Returned OS handle
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cuMemPoolImportPointer`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuMemPoolSetAttribute`
-
-    Notes
-    -----
-    : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE.
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef utils.HelperCUmemAllocationHandleType cyhandle_out = utils.HelperCUmemAllocationHandleType(handleType)
-    cdef void* cyhandle_out_ptr = <void*><void_ptr>cyhandle_out.cptr
-    cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value
-    err = cydriver.cuMemPoolExportToShareableHandle(cyhandle_out_ptr, cypool, cyhandleType, flags)
-    return (CUresult(err), cyhandle_out.pyObj())
-{{endif}}
-
-{{if 'cuMemPoolImportFromShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolImportFromShareableHandle(handle, handleType not None : CUmemAllocationHandleType, unsigned long long flags):
-    """ imports a memory pool from a shared handle.
-
-    Specific allocations can be imported from the imported pool with
-    cuMemPoolImportPointer.
-
-    If `handleType` is :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC` and the
-    importer process has not been granted access to the same IMEX channel
-    as the exporter process, this API will error as
-    :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`.
-
-    Parameters
-    ----------
-    handle : Any
-        OS handle of the pool to open
-    handleType : :py:obj:`~.CUmemAllocationHandleType`
-        The type of handle being imported
-    flags : unsigned long long
-        must be 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    pool_out : :py:obj:`~.CUmemoryPool`
-        Returned memory pool
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cuMemPoolImportPointer`
-
-    Notes
-    -----
-    Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in cuDeviceSetMemPool or :py:obj:`~.cuMemAllocFromPoolAsync` calls.
-    """
-    cdef CUmemoryPool pool_out = CUmemoryPool()
-    cyhandle = utils.HelperInputVoidPtr(handle)
-    cdef void* cyhandle_ptr = <void*><void_ptr>cyhandle.cptr
-    cdef cydriver.CUmemAllocationHandleType cyhandleType = handleType.value
-    err = cydriver.cuMemPoolImportFromShareableHandle(<cydriver.CUmemoryPool*>pool_out._ptr, cyhandle_ptr, cyhandleType, flags)
-    return (CUresult(err), pool_out)
-{{endif}}
-
-{{if 'cuMemPoolExportPointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolExportPointer(ptr):
-    """ Export data to share a memory pool allocation between processes.
-
-    Constructs `shareData_out` for sharing a specific allocation from an
-    already shared memory pool. The recipient process can import the
-    allocation with the :py:obj:`~.cuMemPoolImportPointer` api. The data is
-    not a handle and may be shared through any IPC mechanism.
-
-    Parameters
-    ----------
-    ptr : :py:obj:`~.CUdeviceptr`
-        pointer to memory being exported
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    shareData_out : :py:obj:`~.CUmemPoolPtrExportData`
-        Returned export data
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cuMemPoolImportPointer`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    cdef CUmemPoolPtrExportData shareData_out = CUmemPoolPtrExportData()
-    err = cydriver.cuMemPoolExportPointer(<cydriver.CUmemPoolPtrExportData*>shareData_out._ptr, cyptr)
-    return (CUresult(err), shareData_out)
-{{endif}}
-
-{{if 'cuMemPoolImportPointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPoolImportPointer(pool, shareData : Optional[CUmemPoolPtrExportData]):
-    """ Import a memory pool allocation from another process.
-
-    Returns in `ptr_out` a pointer to the imported memory. The imported
-    memory must not be accessed before the allocation operation completes
-    in the exporting process. The imported memory must be freed from all
-    importing processes before being freed in the exporting process. The
-    pointer may be freed with cuMemFree or cuMemFreeAsync. If
-    cuMemFreeAsync is used, the free must be completed on the importing
-    process before the free operation on the exporting process.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        pool from which to import
-    shareData : :py:obj:`~.CUmemPoolPtrExportData`
-        data specifying the memory to import
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    ptr_out : :py:obj:`~.CUdeviceptr`
-        pointer to imported memory
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cuMemPoolExportPointer`
-
-    Notes
-    -----
-    The cuMemFreeAsync api may be used in the exporting process before the cuMemFreeAsync operation completes in its stream as long as the cuMemFreeAsync in the exporting process specifies a stream with a stream dependency on the importing process's cuMemFreeAsync.
-    """
-    cdef cydriver.CUmemoryPool cypool
-    if pool is None:
-        cypool = <cydriver.CUmemoryPool><void_ptr>0
-    elif isinstance(pool, (CUmemoryPool,)):
-        ppool = int(pool)
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    else:
-        ppool = int(CUmemoryPool(pool))
-        cypool = <cydriver.CUmemoryPool><void_ptr>ppool
-    cdef CUdeviceptr ptr_out = CUdeviceptr()
-    cdef cydriver.CUmemPoolPtrExportData* cyshareData_ptr = shareData._ptr if shareData != None else NULL
-    err = cydriver.cuMemPoolImportPointer(<cydriver.CUdeviceptr*>ptr_out._ptr, cypool, cyshareData_ptr)
-    return (CUresult(err), ptr_out)
-{{endif}}
-
-{{if 'cuMulticastCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastCreate(prop : Optional[CUmulticastObjectProp]):
-    """ Create a generic allocation handle representing a multicast object described by the given properties.
-
-    This creates a multicast object as described by `prop`. The number of
-    participating devices is specified by
-    :py:obj:`~.CUmulticastObjectProp.numDevices`. Devices can be added to
-    the multicast object via :py:obj:`~.cuMulticastAddDevice`. All
-    participating devices must be added to the multicast object before
-    memory can be bound to it. Memory is bound to the multicast object via
-    either :py:obj:`~.cuMulticastBindMem` or
-    :py:obj:`~.cuMulticastBindAddr`, and can be unbound via
-    :py:obj:`~.cuMulticastUnbind`. The total amount of memory that can be
-    bound per device is specified by
-    :py:obj:`~.py`:obj:`~.CUmulticastObjectProp.size`. This size must be a
-    multiple of the value returned by :py:obj:`~.cuMulticastGetGranularity`
-    with the flag :py:obj:`~.CU_MULTICAST_GRANULARITY_MINIMUM`. For best
-    performance however, the size should be aligned to the value returned
-    by :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_RECOMMENDED`.
-
-    After all participating devices have been added, multicast objects can
-    also be mapped to a device's virtual address space using the virtual
-    memory management APIs (see :py:obj:`~.cuMemMap` and
-    :py:obj:`~.cuMemSetAccess`). Multicast objects can also be shared with
-    other processes by requesting a shareable handle via
-    :py:obj:`~.cuMemExportToShareableHandle`. Note that the desired types
-    of shareable handles must be specified in the bitmask
-    :py:obj:`~.CUmulticastObjectProp.handleTypes`. Multicast objects can be
-    released using the virtual memory management API
-    :py:obj:`~.cuMemRelease`.
-
-    Parameters
-    ----------
-    prop : :py:obj:`~.CUmulticastObjectProp`
-        Properties of the multicast object to create.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    mcHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Value of handle returned.
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastAddDevice`, :py:obj:`~.cuMulticastBindMem`, :py:obj:`~.cuMulticastBindAddr`, :py:obj:`~.cuMulticastUnbind`
-
-    :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemRelease`, :py:obj:`~.cuMemExportToShareableHandle`, :py:obj:`~.cuMemImportFromShareableHandle`
-    """
-    cdef CUmemGenericAllocationHandle mcHandle = CUmemGenericAllocationHandle()
-    cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._ptr if prop != None else NULL
-    err = cydriver.cuMulticastCreate(<cydriver.CUmemGenericAllocationHandle*>mcHandle._ptr, cyprop_ptr)
-    return (CUresult(err), mcHandle)
-{{endif}}
-
-{{if 'cuMulticastAddDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastAddDevice(mcHandle, dev):
-    """ Associate a device to a multicast object.
-
-    Associates a device to a multicast object. The added device will be a
-    part of the multicast team of size specified by
-    :py:obj:`~.CUmulticastObjectProp.numDevices` during
-    :py:obj:`~.cuMulticastCreate`. The association of the device to the
-    multicast object is permanent during the life time of the multicast
-    object. All devices must be added to the multicast team before any
-    memory can be bound to any device in the team. Any calls to
-    :py:obj:`~.cuMulticastBindMem` or :py:obj:`~.cuMulticastBindAddr` will
-    block until all devices have been added. Similarly all devices must be
-    added to the multicast team before a virtual address range can be
-    mapped to the multicast object. A call to :py:obj:`~.cuMemMap` will
-    block until all devices have been added.
-
-    Parameters
-    ----------
-    mcHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle representing a multicast object.
-    dev : :py:obj:`~.CUdevice`
-        Device that will be associated to the multicast object.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastBindMem`, :py:obj:`~.cuMulticastBindAddr`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUmemGenericAllocationHandle cymcHandle
-    if mcHandle is None:
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(mcHandle, (CUmemGenericAllocationHandle,)):
-        pmcHandle = int(mcHandle)
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    else:
-        pmcHandle = int(CUmemGenericAllocationHandle(mcHandle))
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    err = cydriver.cuMulticastAddDevice(cymcHandle, cydev)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMulticastBindMem' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastBindMem(mcHandle, size_t mcOffset, memHandle, size_t memOffset, size_t size, unsigned long long flags):
-    """ Bind a memory allocation represented by a handle to a multicast object.
-
-    Binds a memory allocation specified by `memHandle` and created via
-    :py:obj:`~.cuMemCreate` to a multicast object represented by `mcHandle`
-    and created via :py:obj:`~.cuMulticastCreate`. The intended `size` of
-    the bind, the offset in the multicast range `mcOffset` as well as the
-    offset in the memory `memOffset` must be a multiple of the value
-    returned by :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_MINIMUM`. For best performance
-    however, `size`, `mcOffset` and `memOffset` should be aligned to the
-    granularity of the memory allocation(see
-    :py:obj:`~.cuMemGetAllocationGranularity`) or to the value returned by
-    :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_RECOMMENDED`.
-
-    The `size` + `memOffset` cannot be larger than the size of the
-    allocated memory. Similarly the `size` + `mcOffset` cannot be larger
-    than the size of the multicast object. The memory allocation must have
-    beeen created on one of the devices that was added to the multicast
-    team via :py:obj:`~.cuMulticastAddDevice`. Externally shareable as well
-    as imported multicast objects can be bound only to externally shareable
-    memory. Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if
-    there are insufficient resources required to perform the bind. This
-    call may also return CUDA_ERROR_SYSTEM_NOT_READY if the necessary
-    system software is not initialized or running.
-
-    Parameters
-    ----------
-    mcHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle representing a multicast object.
-    mcOffset : size_t
-        Offset into the multicast object for attachment.
-    memHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle representing a memory allocation.
-    memOffset : size_t
-        Offset into the memory for attachment.
-    size : size_t
-        Size of the memory that will be bound to the multicast object.
-    flags : unsigned long long
-        Flags for future use, must be zero for now.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_SYSTEM_NOT_READY`
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastAddDevice`, :py:obj:`~.cuMemCreate`
-    """
-    cdef cydriver.CUmemGenericAllocationHandle cymemHandle
-    if memHandle is None:
-        cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(memHandle, (CUmemGenericAllocationHandle,)):
-        pmemHandle = int(memHandle)
-        cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmemHandle
-    else:
-        pmemHandle = int(CUmemGenericAllocationHandle(memHandle))
-        cymemHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmemHandle
-    cdef cydriver.CUmemGenericAllocationHandle cymcHandle
-    if mcHandle is None:
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(mcHandle, (CUmemGenericAllocationHandle,)):
-        pmcHandle = int(mcHandle)
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    else:
-        pmcHandle = int(CUmemGenericAllocationHandle(mcHandle))
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    err = cydriver.cuMulticastBindMem(cymcHandle, mcOffset, cymemHandle, memOffset, size, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMulticastBindAddr' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastBindAddr(mcHandle, size_t mcOffset, memptr, size_t size, unsigned long long flags):
-    """ Bind a memory allocation represented by a virtual address to a multicast object.
-
-    Binds a memory allocation specified by its mapped address `memptr` to a
-    multicast object represented by `mcHandle`. The memory must have been
-    allocated via :py:obj:`~.cuMemCreate` or :py:obj:`~.cudaMallocAsync`.
-    The intended `size` of the bind, the offset in the multicast range
-    `mcOffset` and `memptr` must be a multiple of the value returned by
-    :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_MINIMUM`. For best performance
-    however, `size`, `mcOffset` and `memptr` should be aligned to the value
-    returned by :py:obj:`~.cuMulticastGetGranularity` with the flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_RECOMMENDED`.
-
-    The `size` cannot be larger than the size of the allocated memory.
-    Similarly the `size` + `mcOffset` cannot be larger than the total size
-    of the multicast object. The memory allocation must have beeen created
-    on one of the devices that was added to the multicast team via
-    :py:obj:`~.cuMulticastAddDevice`. Externally shareable as well as
-    imported multicast objects can be bound only to externally shareable
-    memory. Note that this call will return CUDA_ERROR_OUT_OF_MEMORY if
-    there are insufficient resources required to perform the bind. This
-    call may also return CUDA_ERROR_SYSTEM_NOT_READY if the necessary
-    system software is not initialized or running.
-
-    Parameters
-    ----------
-    mcHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle representing a multicast object.
-    mcOffset : size_t
-        Offset into multicast va range for attachment.
-    memptr : :py:obj:`~.CUdeviceptr`
-        Virtual address of the memory allocation.
-    size : size_t
-        Size of memory that will be bound to the multicast object.
-    flags : unsigned long long
-        Flags for future use, must be zero now.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`, :py:obj:`~.CUDA_ERROR_SYSTEM_NOT_READY`
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastAddDevice`, :py:obj:`~.cuMemCreate`
-    """
-    cdef cydriver.CUdeviceptr cymemptr
-    if memptr is None:
-        cymemptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(memptr, (CUdeviceptr,)):
-        pmemptr = int(memptr)
-        cymemptr = <cydriver.CUdeviceptr><void_ptr>pmemptr
-    else:
-        pmemptr = int(CUdeviceptr(memptr))
-        cymemptr = <cydriver.CUdeviceptr><void_ptr>pmemptr
-    cdef cydriver.CUmemGenericAllocationHandle cymcHandle
-    if mcHandle is None:
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(mcHandle, (CUmemGenericAllocationHandle,)):
-        pmcHandle = int(mcHandle)
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    else:
-        pmcHandle = int(CUmemGenericAllocationHandle(mcHandle))
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    err = cydriver.cuMulticastBindAddr(cymcHandle, mcOffset, cymemptr, size, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMulticastUnbind' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastUnbind(mcHandle, dev, size_t mcOffset, size_t size):
-    """ Unbind any memory allocations bound to a multicast object at a given offset and upto a given size.
-
-    Unbinds any memory allocations hosted on `dev` and bound to a multicast
-    object at `mcOffset` and upto a given `size`. The intended `size` of
-    the unbind and the offset in the multicast range ( `mcOffset` ) must be
-    a multiple of the value returned by
-    :py:obj:`~.cuMulticastGetGranularity` flag
-    :py:obj:`~.CU_MULTICAST_GRANULARITY_MINIMUM`. The `size` + `mcOffset`
-    cannot be larger than the total size of the multicast object.
-
-    Parameters
-    ----------
-    mcHandle : :py:obj:`~.CUmemGenericAllocationHandle`
-        Handle representing a multicast object.
-    dev : :py:obj:`~.CUdevice`
-        Device that hosts the memory allocation.
-    mcOffset : size_t
-        Offset into the multicast object.
-    size : size_t
-        Desired size to unbind.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastBindMem`, :py:obj:`~.cuMulticastBindAddr`
-
-    Notes
-    -----
-    Warning: The `mcOffset` and the `size` must match the corresponding values specified during the bind call. Any other values may result in undefined behavior.
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUmemGenericAllocationHandle cymcHandle
-    if mcHandle is None:
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>0
-    elif isinstance(mcHandle, (CUmemGenericAllocationHandle,)):
-        pmcHandle = int(mcHandle)
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    else:
-        pmcHandle = int(CUmemGenericAllocationHandle(mcHandle))
-        cymcHandle = <cydriver.CUmemGenericAllocationHandle><void_ptr>pmcHandle
-    err = cydriver.cuMulticastUnbind(cymcHandle, cydev, mcOffset, size)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMulticastGetGranularity' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMulticastGetGranularity(prop : Optional[CUmulticastObjectProp], option not None : CUmulticastGranularity_flags):
-    """ Calculates either the minimal or recommended granularity for multicast object.
-
-    Calculates either the minimal or recommended granularity for a given
-    set of multicast object properties and returns it in granularity. This
-    granularity can be used as a multiple for size, bind offsets and
-    address mappings of the multicast object.
-
-    Parameters
-    ----------
-    prop : :py:obj:`~.CUmulticastObjectProp`
-        Properties of the multicast object.
-    option : :py:obj:`~.CUmulticastGranularity_flags`
-        Determines which granularity to return.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    granularity : int
-        Returned granularity.
-
-    See Also
-    --------
-    :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastBindMem`, :py:obj:`~.cuMulticastBindAddr`, :py:obj:`~.cuMulticastUnbind`
-    """
-    cdef size_t granularity = 0
-    cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._ptr if prop != None else NULL
-    cdef cydriver.CUmulticastGranularity_flags cyoption = option.value
-    err = cydriver.cuMulticastGetGranularity(&granularity, cyprop_ptr, cyoption)
-    return (CUresult(err), granularity)
-{{endif}}
-
-{{if 'cuPointerGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuPointerGetAttribute(attribute not None : CUpointer_attribute, ptr):
-    """ Returns information about a pointer.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_CONTEXT`:
-
-    - Returns in `*data` the :py:obj:`~.CUcontext` in which `ptr` was
-      allocated or registered. The type of `data` must be
-      :py:obj:`~.CUcontext` *.
-
-    - If `ptr` was not allocated by, mapped by, or registered with a
-      :py:obj:`~.CUcontext` which uses unified virtual addressing then
-      :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MEMORY_TYPE`:
-
-    - Returns in `*data` the physical memory type of the memory that `ptr`
-      addresses as a :py:obj:`~.CUmemorytype` enumerated value. The type of
-      `data` must be unsigned int.
-
-    - If `ptr` addresses device memory then `*data` is set to
-      :py:obj:`~.CU_MEMORYTYPE_DEVICE`. The particular :py:obj:`~.CUdevice`
-      on which the memory resides is the :py:obj:`~.CUdevice` of the
-      :py:obj:`~.CUcontext` returned by the
-      :py:obj:`~.CU_POINTER_ATTRIBUTE_CONTEXT` attribute of `ptr`.
-
-    - If `ptr` addresses host memory then `*data` is set to
-      :py:obj:`~.CU_MEMORYTYPE_HOST`.
-
-    - If `ptr` was not allocated by, mapped by, or registered with a
-      :py:obj:`~.CUcontext` which uses unified virtual addressing then
-      :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    - If the current :py:obj:`~.CUcontext` does not support unified virtual
-      addressing then :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_DEVICE_POINTER`:
-
-    - Returns in `*data` the device pointer value through which `ptr` may
-      be accessed by kernels running in the current :py:obj:`~.CUcontext`.
-      The type of `data` must be CUdeviceptr *.
-
-    - If there exists no device pointer value through which kernels running
-      in the current :py:obj:`~.CUcontext` may access `ptr` then
-      :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    - If there is no current :py:obj:`~.CUcontext` then
-      :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned.
-
-    - Except in the exceptional disjoint addressing cases discussed below,
-      the value returned in `*data` will equal the input value `ptr`.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_HOST_POINTER`:
-
-    - Returns in `*data` the host pointer value through which `ptr` may be
-      accessed by by the host program. The type of `data` must be void **.
-      If there exists no host pointer value through which the host program
-      may directly access `ptr` then :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-      is returned.
-
-    - Except in the exceptional disjoint addressing cases discussed below,
-      the value returned in `*data` will equal the input value `ptr`.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_P2P_TOKENS`:
-
-    - Returns in `*data` two tokens for use with the nv-p2p.h Linux kernel
-      interface. `data` must be a struct of type
-      CUDA_POINTER_ATTRIBUTE_P2P_TOKENS.
-
-    - `ptr` must be a pointer to memory obtained from
-      :py:obj:`~.py`:obj:`~.cuMemAlloc()`. Note that p2pToken and
-      vaSpaceToken are only valid for the lifetime of the source
-      allocation. A subsequent allocation at the same address may return
-      completely different tokens. Querying this attribute has a side
-      effect of setting the attribute
-      :py:obj:`~.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS` for the region of memory
-      that `ptr` points to.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`:
-
-    - A boolean attribute which when set, ensures that synchronous memory
-      operations initiated on the region of memory that `ptr` points to
-      will always synchronize. See further documentation in the section
-      titled "API synchronization behavior" to learn more about cases when
-      synchronous memory operations can exhibit asynchronous behavior.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_BUFFER_ID`:
-
-    - Returns in `*data` a buffer ID which is guaranteed to be unique
-      within the process. `data` must point to an unsigned long long.
-
-    - `ptr` must be a pointer to memory obtained from a CUDA memory
-      allocation API. Every memory allocation from any of the CUDA memory
-      allocation APIs will have a unique ID over a process lifetime.
-      Subsequent allocations do not reuse IDs from previous freed
-      allocations. IDs are only unique within a single process.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_IS_MANAGED`:
-
-    - Returns in `*data` a boolean that indicates whether the pointer
-      points to managed memory or not.
-
-    - If `ptr` is not a valid CUDA pointer then
-      :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL`:
-
-    - Returns in `*data` an integer representing a device ordinal of a
-      device against which the memory was allocated or registered.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE`:
-
-    - Returns in `*data` a boolean that indicates if this pointer maps to
-      an allocation that is suitable for :py:obj:`~.cudaIpcGetMemHandle`.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR`:
-
-    - Returns in `*data` the starting address for the allocation referenced
-      by the device pointer `ptr`. Note that this is not necessarily the
-      address of the mapped region, but the address of the mappable address
-      range `ptr` references (e.g. from :py:obj:`~.cuMemAddressReserve`).
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_RANGE_SIZE`:
-
-    - Returns in `*data` the size for the allocation referenced by the
-      device pointer `ptr`. Note that this is not necessarily the size of
-      the mapped region, but the size of the mappable address range `ptr`
-      references (e.g. from :py:obj:`~.cuMemAddressReserve`). To retrieve
-      the size of the mapped region, see :py:obj:`~.cuMemGetAddressRange`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MAPPED`:
-
-    - Returns in `*data` a boolean that indicates if this pointer is in a
-      valid address range that is mapped to a backing allocation.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES`:
-
-    - Returns a bitmask of the allowed handle types for an allocation that
-      may be passed to :py:obj:`~.cuMemExportToShareableHandle`.
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE`:
-
-    - Returns in `*data` the handle to the mempool that the allocation was
-      obtained from.
-
-    Note that for most allocations in the unified virtual address space the
-    host and device pointer for accessing the allocation will be the same.
-    The exceptions to this are
-
-    - user memory registered using :py:obj:`~.cuMemHostRegister`
-
-    - host memory allocated using :py:obj:`~.cuMemHostAlloc` with the
-      :py:obj:`~.CU_MEMHOSTALLOC_WRITECOMBINED` flag For these types of
-      allocation there will exist separate, disjoint host and device
-      addresses for accessing the allocation. In particular
-
-    - The host address will correspond to an invalid unmapped device
-      address (which will result in an exception if accessed from the
-      device)
-
-    - The device address will correspond to an invalid unmapped host
-      address (which will result in an exception if accessed from the
-      host). For these types of allocations, querying
-      :py:obj:`~.CU_POINTER_ATTRIBUTE_HOST_POINTER` and
-      :py:obj:`~.CU_POINTER_ATTRIBUTE_DEVICE_POINTER` may be used to
-      retrieve the host and device addresses from either address.
-
-    Parameters
-    ----------
-    attribute : :py:obj:`~.CUpointer_attribute`
-        Pointer attribute to query
-    ptr : :py:obj:`~.CUdeviceptr`
-        Pointer
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    data : Any
-        Returned pointer attribute value
-
-    See Also
-    --------
-    :py:obj:`~.cuPointerSetAttribute`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuMemHostUnregister`, :py:obj:`~.cudaPointerGetAttributes`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    cdef utils.HelperCUpointer_attribute cydata = utils.HelperCUpointer_attribute(attribute, 0, is_getter=True)
-    cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr
-    cdef cydriver.CUpointer_attribute cyattribute = attribute.value
-    err = cydriver.cuPointerGetAttribute(cydata_ptr, cyattribute, cyptr)
-    return (CUresult(err), cydata.pyObj())
-{{endif}}
-
-{{if 'cuMemPrefetchAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPrefetchAsync(devPtr, size_t count, dstDevice, hStream):
-    """ Prefetches memory to the specified destination device.
-
-    Note there is a later version of this API,
-    :py:obj:`~.cuMemPrefetchAsync_v2`. It will supplant this version in
-    13.0, which is retained for minor version compatibility.
-
-    Prefetches memory to the specified destination device. `devPtr` is the
-    base device pointer of the memory to be prefetched and `dstDevice` is
-    the destination device. `count` specifies the number of bytes to copy.
-    `hStream` is the stream in which the operation is enqueued. The memory
-    range must refer to managed memory allocated via
-    :py:obj:`~.cuMemAllocManaged` or declared via managed variables or it
-    may also refer to system-allocated memory on systems with non-zero
-    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
-
-    Passing in CU_DEVICE_CPU for `dstDevice` will prefetch the data to host
-    memory. If `dstDevice` is a GPU, then the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` must be non-
-    zero. Additionally, `hStream` must be associated with a device that has
-    a non-zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`.
-
-    The start address and end address of the memory range will be rounded
-    down and rounded up respectively to be aligned to CPU page size before
-    the prefetch operation is enqueued in the stream.
-
-    If no physical memory has been allocated for this region, then this
-    memory region will be populated and mapped on the destination device.
-    If there's insufficient memory to prefetch the desired region, the
-    Unified Memory driver may evict pages from other
-    :py:obj:`~.cuMemAllocManaged` allocations to host memory in order to
-    make room. Device memory allocated using :py:obj:`~.cuMemAlloc` or
-    :py:obj:`~.cuArrayCreate` will not be evicted.
-
-    By default, any mappings to the previous location of the migrated pages
-    are removed and mappings for the new location are only setup on
-    `dstDevice`. The exact behavior however also depends on the settings
-    applied to this memory range via :py:obj:`~.cuMemAdvise` as described
-    below:
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` was set on any subset of
-    this memory range, then that subset will create a read-only copy of the
-    pages on `dstDevice`.
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` was called on any
-    subset of this memory range, then the pages will be migrated to
-    `dstDevice` even if `dstDevice` is not the preferred location of any
-    pages in the memory range.
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` was called on any subset
-    of this memory range, then mappings to those pages from all the
-    appropriate processors are updated to refer to the new location if
-    establishing such a mapping is possible. Otherwise, those mappings are
-    cleared.
-
-    Note that this API is not required for functionality and only serves to
-    improve performance by allowing the application to migrate data to a
-    suitable location before it is accessed. Memory accesses to this range
-    are always coherent and are allowed even when the data is actively
-    being migrated.
-
-    Note that this function is asynchronous with respect to the host and
-    all work on other devices.
-
-    Parameters
-    ----------
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Pointer to be prefetched
-    count : size_t
-        Size in bytes
-    dstDevice : :py:obj:`~.CUdevice`
-        Destination device to prefetch to
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue prefetch operation
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemPrefetchAsync` :py:obj:`~.cudaMemPrefetchAsync_v2`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdevice cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdevice>0
-    elif isinstance(dstDevice, (CUdevice,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdevice>pdstDevice
-    else:
-        pdstDevice = int(CUdevice(dstDevice))
-        cydstDevice = <cydriver.CUdevice>pdstDevice
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    err = cydriver.cuMemPrefetchAsync(cydevPtr, count, cydstDevice, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemPrefetchAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemPrefetchAsync_v2(devPtr, size_t count, location not None : CUmemLocation, unsigned int flags, hStream):
-    """ Prefetches memory to the specified destination location.
-
-    Prefetches memory to the specified destination location. `devPtr` is
-    the base device pointer of the memory to be prefetched and `location`
-    specifies the destination location. `count` specifies the number of
-    bytes to copy. `hStream` is the stream in which the operation is
-    enqueued. The memory range must refer to managed memory allocated via
-    :py:obj:`~.cuMemAllocManaged` or declared via managed variables.
-
-    Specifying :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` for
-    :py:obj:`~.CUmemLocation.type` will prefetch memory to GPU specified by
-    device ordinal :py:obj:`~.CUmemLocation.id` which must have non-zero
-    value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`.
-    Additionally, `hStream` must be associated with a device that has a
-    non-zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Specifying
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` as :py:obj:`~.CUmemLocation.type`
-    will prefetch data to host memory. Applications can request prefetching
-    memory to a specific host NUMA node by specifying
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` for
-    :py:obj:`~.CUmemLocation.type` and a valid host NUMA node id in
-    :py:obj:`~.CUmemLocation.id` Users can also request prefetching memory
-    to the host NUMA node closest to the current thread's CPU by specifying
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT` for
-    :py:obj:`~.CUmemLocation.type`. Note when
-    :py:obj:`~.CUmemLocation.type` is etiher
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` OR
-    :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT`,
-    :py:obj:`~.CUmemLocation.id` will be ignored.
-
-    The start address and end address of the memory range will be rounded
-    down and rounded up respectively to be aligned to CPU page size before
-    the prefetch operation is enqueued in the stream.
-
-    If no physical memory has been allocated for this region, then this
-    memory region will be populated and mapped on the destination device.
-    If there's insufficient memory to prefetch the desired region, the
-    Unified Memory driver may evict pages from other
-    :py:obj:`~.cuMemAllocManaged` allocations to host memory in order to
-    make room. Device memory allocated using :py:obj:`~.cuMemAlloc` or
-    :py:obj:`~.cuArrayCreate` will not be evicted.
-
-    By default, any mappings to the previous location of the migrated pages
-    are removed and mappings for the new location are only setup on the
-    destination location. The exact behavior however also depends on the
-    settings applied to this memory range via :py:obj:`~.cuMemAdvise` as
-    described below:
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` was set on any subset of
-    this memory range, then that subset will create a read-only copy of the
-    pages on destination location. If however the destination location is a
-    host NUMA node, then any pages of that subset that are already in
-    another host NUMA node will be transferred to the destination.
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` was called on any
-    subset of this memory range, then the pages will be migrated to
-    `location` even if `location` is not the preferred location of any
-    pages in the memory range.
-
-    If :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` was called on any subset
-    of this memory range, then mappings to those pages from all the
-    appropriate processors are updated to refer to the new location if
-    establishing such a mapping is possible. Otherwise, those mappings are
-    cleared.
-
-    Note that this API is not required for functionality and only serves to
-    improve performance by allowing the application to migrate data to a
-    suitable location before it is accessed. Memory accesses to this range
-    are always coherent and are allowed even when the data is actively
-    being migrated.
-
-    Note that this function is asynchronous with respect to the host and
-    all work on other devices.
-
-    Parameters
-    ----------
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Pointer to be prefetched
-    count : size_t
-        Size in bytes
-    dstDevice : :py:obj:`~.CUmemLocation`
-        Destination device to prefetch to
-    flags : unsigned int
-        flags for future use, must be zero now.
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue prefetch operation
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemPrefetchAsync` :py:obj:`~.cudaMemPrefetchAsync_v2`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    err = cydriver.cuMemPrefetchAsync_v2(cydevPtr, count, location._ptr[0], flags, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemAdvise' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAdvise(devPtr, size_t count, advice not None : CUmem_advise, device):
-    """ Advise about the usage of a given memory range.
-
-    Note there is a later version of this API, :py:obj:`~.cuMemAdvise_v2`.
-    It will supplant this version in 13.0, which is retained for minor
-    version compatibility.
-
-    Advise the Unified Memory subsystem about the usage pattern for the
-    memory range starting at `devPtr` with a size of `count` bytes. The
-    start address and end address of the memory range will be rounded down
-    and rounded up respectively to be aligned to CPU page size before the
-    advice is applied. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cuMemAllocManaged` or declared via managed
-    variables. The memory range could also refer to system-allocated
-    pageable memory provided it represents a valid, host-accessible region
-    of memory and all additional constraints imposed by `advice` as
-    outlined below are also satisfied. Specifying an invalid system-
-    allocated pageable memory range results in an error being returned.
-
-    The `advice` parameter can take the following values:
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`: This implies that the data
-      is mostly going to be read from and only occasionally written to. Any
-      read accesses from any processor to this region will create a read-
-      only copy of at least the accessed pages in that processor's memory.
-      Additionally, if :py:obj:`~.cuMemPrefetchAsync` is called on this
-      region, it will create a read-only copy of the data on the
-      destination processor. If any processor writes to this region, all
-      copies of the corresponding page will be invalidated except for the
-      one where the write occurred. The `device` argument is ignored for
-      this advice. Note that for a page to be read-duplicated, the
-      accessing processor must either be the CPU or a GPU that has a non-
-      zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Also, if a
-      context is created on a device that does not have the device
-      attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`
-      set, then read-duplication will not occur until all such contexts are
-      destroyed. If the memory region refers to valid system-allocated
-      pageable memory, then the accessing device must have a non-zero value
-      for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS` for a read-
-      only copy to be created on that device. Note however that if the
-      accessing device also has a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then setting this advice will not create a read-only copy when that
-      device accesses this memory region.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_READ_MOSTLY`: Undoes the effect of
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` and also prevents the
-      Unified Memory driver from attempting heuristic read-duplication on
-      the memory range. Any read-duplicated copies of the data will be
-      collapsed into a single copy. The location for the collapsed copy
-      will be the preferred location if the page has a preferred location
-      and one of the read-duplicated copies was resident at that location.
-      Otherwise, the location chosen is arbitrary.
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION`: This advice sets
-      the preferred location for the data to be the memory belonging to
-      `device`. Passing in CU_DEVICE_CPU for `device` sets the preferred
-      location as host memory. If `device` is a GPU, then it must have a
-      non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Setting
-      the preferred location does not cause data to migrate to that
-      location immediately. Instead, it guides the migration policy when a
-      fault occurs on that memory region. If the data is already in its
-      preferred location and the faulting processor can establish a mapping
-      without requiring the data to be migrated, then data migration will
-      be avoided. On the other hand, if the data is not in its preferred
-      location or if a direct mapping cannot be established, then it will
-      be migrated to the processor accessing it. It is important to note
-      that setting the preferred location does not prevent data prefetching
-      done using :py:obj:`~.cuMemPrefetchAsync`. Having a preferred
-      location can override the page thrash detection and resolution logic
-      in the Unified Memory driver. Normally, if a page is detected to be
-      constantly thrashing between for example host and device memory, the
-      page may eventually be pinned to host memory by the Unified Memory
-      driver. But if the preferred location is set as device memory, then
-      the page will continue to thrash indefinitely. If
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice, unless read
-      accesses from `device` will not result in a read-only copy being
-      created on that device as outlined in description for the advice
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`. If the memory region
-      refers to valid system-allocated pageable memory, then `device` must
-      have a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION`: Undoes the effect
-      of :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` and changes the
-      preferred location to none.
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY`: This advice implies that
-      the data will be accessed by `device`. Passing in
-      :py:obj:`~.CU_DEVICE_CPU` for `device` will set the advice for the
-      CPU. If `device` is a GPU, then the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` must be
-      non-zero. This advice does not cause data migration and has no impact
-      on the location of the data per se. Instead, it causes the data to
-      always be mapped in the specified processor's page tables, as long as
-      the location of the data permits a mapping to be established. If the
-      data gets migrated for any reason, the mappings are updated
-      accordingly. This advice is recommended in scenarios where data
-      locality is not important, but avoiding faults is. Consider for
-      example a system containing multiple GPUs with peer-to-peer access
-      enabled, where the data located on one GPU is occasionally accessed
-      by peer GPUs. In such scenarios, migrating data over to the other
-      GPUs is not as important because the accesses are infrequent and the
-      overhead of migration may be too high. But preventing faults can
-      still help improve performance, and so having a mapping set up in
-      advance is useful. Note that on CPU access of this data, the data may
-      be migrated to host memory because the CPU typically cannot access
-      device memory directly. Any GPU that had the
-      :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` flag set for this data will
-      now have its mapping updated to point to the page in host memory. If
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice. Additionally, if
-      the preferred location of this memory region or any subset of it is
-      also `device`, then the policies associated with
-      :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` will override the
-      policies of this advice. If the memory region refers to valid system-
-      allocated pageable memory, then `device` must have a non-zero value
-      for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`. Additionally,
-      if `device` has a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then this call has no effect.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_ACCESSED_BY`: Undoes the effect of
-      :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY`. Any mappings to the data
-      from `device` may be removed at any time causing accesses to result
-      in non-fatal page faults. If the memory region refers to valid
-      system-allocated pageable memory, then `device` must have a non-zero
-      value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`. Additionally,
-      if `device` has a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then this call has no effect.
-
-    Parameters
-    ----------
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Pointer to memory to set the advice for
-    count : size_t
-        Size in bytes of the memory range
-    advice : :py:obj:`~.CUmem_advise`
-        Advice to be applied for the specified memory range
-    device : :py:obj:`~.CUdevice`
-        Device to apply the advice for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cuMemAdvise_v2` :py:obj:`~.cudaMemAdvise`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    cdef cydriver.CUmem_advise cyadvice = advice.value
-    err = cydriver.cuMemAdvise(cydevPtr, count, cyadvice, cydevice)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemAdvise_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemAdvise_v2(devPtr, size_t count, advice not None : CUmem_advise, location not None : CUmemLocation):
-    """ Advise about the usage of a given memory range.
-
-    Advise the Unified Memory subsystem about the usage pattern for the
-    memory range starting at `devPtr` with a size of `count` bytes. The
-    start address and end address of the memory range will be rounded down
-    and rounded up respectively to be aligned to CPU page size before the
-    advice is applied. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cuMemAllocManaged` or declared via managed
-    variables. The memory range could also refer to system-allocated
-    pageable memory provided it represents a valid, host-accessible region
-    of memory and all additional constraints imposed by `advice` as
-    outlined below are also satisfied. Specifying an invalid system-
-    allocated pageable memory range results in an error being returned.
-
-    The `advice` parameter can take the following values:
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`: This implies that the data
-      is mostly going to be read from and only occasionally written to. Any
-      read accesses from any processor to this region will create a read-
-      only copy of at least the accessed pages in that processor's memory.
-      Additionally, if :py:obj:`~.cuMemPrefetchAsync` or
-      :py:obj:`~.cuMemPrefetchAsync_v2` is called on this region, it will
-      create a read-only copy of the data on the destination processor. If
-      the target location for :py:obj:`~.cuMemPrefetchAsync_v2` is a host
-      NUMA node and a read-only copy already exists on another host NUMA
-      node, that copy will be migrated to the targeted host NUMA node. If
-      any processor writes to this region, all copies of the corresponding
-      page will be invalidated except for the one where the write occurred.
-      If the writing processor is the CPU and the preferred location of the
-      page is a host NUMA node, then the page will also be migrated to that
-      host NUMA node. The `location` argument is ignored for this advice.
-      Note that for a page to be read-duplicated, the accessing processor
-      must either be the CPU or a GPU that has a non-zero value for the
-      device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Also, if a
-      context is created on a device that does not have the device
-      attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`
-      set, then read-duplication will not occur until all such contexts are
-      destroyed. If the memory region refers to valid system-allocated
-      pageable memory, then the accessing device must have a non-zero value
-      for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS` for a read-
-      only copy to be created on that device. Note however that if the
-      accessing device also has a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then setting this advice will not create a read-only copy when that
-      device accesses this memory region.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_READ_MOSTLY`: Undoes the effect of
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` and also prevents the
-      Unified Memory driver from attempting heuristic read-duplication on
-      the memory range. Any read-duplicated copies of the data will be
-      collapsed into a single copy. The location for the collapsed copy
-      will be the preferred location if the page has a preferred location
-      and one of the read-duplicated copies was resident at that location.
-      Otherwise, the location chosen is arbitrary. Note: The `location`
-      argument is ignored for this advice.
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION`: This advice sets
-      the preferred location for the data to be the memory belonging to
-      `location`. When :py:obj:`~.CUmemLocation.type` is
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST`, :py:obj:`~.CUmemLocation.id`
-      is ignored and the preferred location is set to be host memory. To
-      set the preferred location to a specific host NUMA node, applications
-      must set :py:obj:`~.CUmemLocation.type` to
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and
-      :py:obj:`~.CUmemLocation.id` must specify the NUMA ID of the host
-      NUMA node. If :py:obj:`~.CUmemLocation.type` is set to
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT`,
-      :py:obj:`~.CUmemLocation.id` will be ignored and the the host NUMA
-      node closest to the calling thread's CPU will be used as the
-      preferred location. If :py:obj:`~.CUmemLocation.type` is a
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE`, then
-      :py:obj:`~.CUmemLocation.id` must be a valid device ordinal and the
-      device must have a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. Setting
-      the preferred location does not cause data to migrate to that
-      location immediately. Instead, it guides the migration policy when a
-      fault occurs on that memory region. If the data is already in its
-      preferred location and the faulting processor can establish a mapping
-      without requiring the data to be migrated, then data migration will
-      be avoided. On the other hand, if the data is not in its preferred
-      location or if a direct mapping cannot be established, then it will
-      be migrated to the processor accessing it. It is important to note
-      that setting the preferred location does not prevent data prefetching
-      done using :py:obj:`~.cuMemPrefetchAsync`. Having a preferred
-      location can override the page thrash detection and resolution logic
-      in the Unified Memory driver. Normally, if a page is detected to be
-      constantly thrashing between for example host and device memory, the
-      page may eventually be pinned to host memory by the Unified Memory
-      driver. But if the preferred location is set as device memory, then
-      the page will continue to thrash indefinitely. If
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice, unless read
-      accesses from `location` will not result in a read-only copy being
-      created on that procesor as outlined in description for the advice
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`. If the memory region
-      refers to valid system-allocated pageable memory, and
-      :py:obj:`~.CUmemLocation.type` is CU_MEM_LOCATION_TYPE_DEVICE then
-      :py:obj:`~.CUmemLocation.id` must be a valid device that has a non-
-      zero alue for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION`: Undoes the effect
-      of :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` and changes the
-      preferred location to none. The `location` argument is ignored for
-      this advice.
-
-    - :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY`: This advice implies that
-      the data will be accessed by processor `location`. The
-      :py:obj:`~.CUmemLocation.type` must be either
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` with
-      :py:obj:`~.CUmemLocation.id` representing a valid device ordinal or
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` and
-      :py:obj:`~.CUmemLocation.id` will be ignored. All other location
-      types are invalid. If :py:obj:`~.CUmemLocation.id` is a GPU, then the
-      device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` must be
-      non-zero. This advice does not cause data migration and has no impact
-      on the location of the data per se. Instead, it causes the data to
-      always be mapped in the specified processor's page tables, as long as
-      the location of the data permits a mapping to be established. If the
-      data gets migrated for any reason, the mappings are updated
-      accordingly. This advice is recommended in scenarios where data
-      locality is not important, but avoiding faults is. Consider for
-      example a system containing multiple GPUs with peer-to-peer access
-      enabled, where the data located on one GPU is occasionally accessed
-      by peer GPUs. In such scenarios, migrating data over to the other
-      GPUs is not as important because the accesses are infrequent and the
-      overhead of migration may be too high. But preventing faults can
-      still help improve performance, and so having a mapping set up in
-      advance is useful. Note that on CPU access of this data, the data may
-      be migrated to host memory because the CPU typically cannot access
-      device memory directly. Any GPU that had the
-      :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` flag set for this data will
-      now have its mapping updated to point to the page in host memory. If
-      :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice. Additionally, if
-      the preferred location of this memory region or any subset of it is
-      also `location`, then the policies associated with
-      :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` will override the
-      policies of this advice. If the memory region refers to valid system-
-      allocated pageable memory, and :py:obj:`~.CUmemLocation.type` is
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` then device in
-      :py:obj:`~.CUmemLocation.id` must have a non-zero value for the
-      device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`. Additionally,
-      if :py:obj:`~.CUmemLocation.id` has a non-zero value for the device
-      attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then this call has no effect.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_ACCESSED_BY`: Undoes the effect of
-      :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY`. Any mappings to the data
-      from `location` may be removed at any time causing accesses to result
-      in non-fatal page faults. If the memory region refers to valid
-      system-allocated pageable memory, and :py:obj:`~.CUmemLocation.type`
-      is :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` then device in
-      :py:obj:`~.CUmemLocation.id` must have a non-zero value for the
-      device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`. Additionally,
-      if :py:obj:`~.CUmemLocation.id` has a non-zero value for the device
-      attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`,
-      then this call has no effect.
-
-    Parameters
-    ----------
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Pointer to memory to set the advice for
-    count : size_t
-        Size in bytes of the memory range
-    advice : :py:obj:`~.CUmem_advise`
-        Advice to be applied for the specified memory range
-    location : :py:obj:`~.CUmemLocation`
-        location to apply the advice for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cuMemAdvise` :py:obj:`~.cudaMemAdvise`
-    """
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    cdef cydriver.CUmem_advise cyadvice = advice.value
-    err = cydriver.cuMemAdvise_v2(cydevPtr, count, cyadvice, location._ptr[0])
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuMemRangeGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemRangeGetAttribute(size_t dataSize, attribute not None : CUmem_range_attribute, devPtr, size_t count):
-    """ Query an attribute of a given memory range.
-
-    Query an attribute about the memory range starting at `devPtr` with a
-    size of `count` bytes. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cuMemAllocManaged` or declared via managed
-    variables.
-
-    The `attribute` parameter can take the following values:
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY`: If this attribute is
-      specified, `data` will be interpreted as a 32-bit integer, and
-      `dataSize` must be 4. The result returned will be 1 if all pages in
-      the given memory range have read-duplication enabled, or 0 otherwise.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION`: If this
-      attribute is specified, `data` will be interpreted as a 32-bit
-      integer, and `dataSize` must be 4. The result returned will be a GPU
-      device id if all pages in the memory range have that GPU as their
-      preferred location, or it will be CU_DEVICE_CPU if all pages in the
-      memory range have the CPU as their preferred location, or it will be
-      CU_DEVICE_INVALID if either all the pages don't have the same
-      preferred location or some of the pages don't have a preferred
-      location at all. Note that the actual location of the pages in the
-      memory range at the time of the query may be different from the
-      preferred location.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY`: If this attribute is
-      specified, `data` will be interpreted as an array of 32-bit integers,
-      and `dataSize` must be a non-zero multiple of 4. The result returned
-      will be a list of device ids that had
-      :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` set for that entire memory
-      range. If any device does not have that advice set for the entire
-      memory range, that device will not be included. If `data` is larger
-      than the number of devices that have that advice set for that memory
-      range, CU_DEVICE_INVALID will be returned in all the extra space
-      provided. For ex., if `dataSize` is 12 (i.e. `data` has 3 elements)
-      and only device 0 has the advice set, then the result returned will
-      be { 0, CU_DEVICE_INVALID, CU_DEVICE_INVALID }. If `data` is smaller
-      than the number of devices that have that advice set, then only as
-      many devices will be returned as can fit in the array. There is no
-      guarantee on which specific devices will be returned, however.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION`: If this
-      attribute is specified, `data` will be interpreted as a 32-bit
-      integer, and `dataSize` must be 4. The result returned will be the
-      last location to which all pages in the memory range were prefetched
-      explicitly via :py:obj:`~.cuMemPrefetchAsync`. This will either be a
-      GPU id or CU_DEVICE_CPU depending on whether the last location for
-      prefetch was a GPU or the CPU respectively. If any page in the memory
-      range was never explicitly prefetched or if all pages were not
-      prefetched to the same location, CU_DEVICE_INVALID will be returned.
-      Note that this simply returns the last location that the application
-      requested to prefetch the memory range to. It gives no indication as
-      to whether the prefetch operation to that location has completed or
-      even begun.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE`: If this
-      attribute is specified, `data` will be interpreted as a
-      :py:obj:`~.CUmemLocationType`, and `dataSize` must be
-      sizeof(CUmemLocationType). The :py:obj:`~.CUmemLocationType` returned
-      will be :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` if all pages in the
-      memory range have the same GPU as their preferred location, or
-      :py:obj:`~.CUmemLocationType` will be
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` if all pages in the memory
-      range have the CPU as their preferred location, or it will be
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` if all the pages in the
-      memory range have the same host NUMA node ID as their preferred
-      location or it will be :py:obj:`~.CU_MEM_LOCATION_TYPE_INVALID` if
-      either all the pages don't have the same preferred location or some
-      of the pages don't have a preferred location at all. Note that the
-      actual location type of the pages in the memory range at the time of
-      the query may be different from the preferred location type.
-
-      - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID`: If this
-        attribute is specified, `data` will be interpreted as a 32-bit
-        integer, and `dataSize` must be 4. If the
-        :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE` query
-        for the same address range returns
-        :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE`, it will be a valid device
-        ordinal or if it returns
-        :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`, it will be a valid host
-        NUMA node ID or if it returns any other location type, the id
-        should be ignored.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE`: If
-      this attribute is specified, `data` will be interpreted as a
-      :py:obj:`~.CUmemLocationType`, and `dataSize` must be
-      sizeof(CUmemLocationType). The result returned will be the last
-      location to which all pages in the memory range were prefetched
-      explicitly via :py:obj:`~.cuMemPrefetchAsync`. The
-      :py:obj:`~.CUmemLocationType` returned will be
-      :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE` if the last prefetch location
-      was a GPU or :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` if it was the CPU
-      or :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` if the last prefetch
-      location was a specific host NUMA node. If any page in the memory
-      range was never explicitly prefetched or if all pages were not
-      prefetched to the same location, :py:obj:`~.CUmemLocationType` will
-      be :py:obj:`~.CU_MEM_LOCATION_TYPE_INVALID`. Note that this simply
-      returns the last location type that the application requested to
-      prefetch the memory range to. It gives no indication as to whether
-      the prefetch operation to that location has completed or even begun.
-
-      - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID`: If
-        this attribute is specified, `data` will be interpreted as a 32-bit
-        integer, and `dataSize` must be 4. If the
-        :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE`
-        query for the same address range returns
-        :py:obj:`~.CU_MEM_LOCATION_TYPE_DEVICE`, it will be a valid device
-        ordinal or if it returns
-        :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`, it will be a valid host
-        NUMA node ID or if it returns any other location type, the id
-        should be ignored.
-
-    Parameters
-    ----------
-    dataSize : size_t
-        Array containing the size of data
-    attribute : :py:obj:`~.CUmem_range_attribute`
-        The attribute to query
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Start of the range to query
-    count : size_t
-        Size of the range to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    data : Any
-        A pointers to a memory location where the result of each attribute
-        query will be written to.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemRangeGetAttributes`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cudaMemRangeGetAttribute`
-    """
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    cdef utils.HelperCUmem_range_attribute cydata = utils.HelperCUmem_range_attribute(attribute, dataSize)
-    cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr
-    cdef cydriver.CUmem_range_attribute cyattribute = attribute.value
-    err = cydriver.cuMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count)
-    return (CUresult(err), cydata.pyObj())
-{{endif}}
-
-{{if 'cuMemRangeGetAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuMemRangeGetAttributes(dataSizes : Tuple[int] | List[int], attributes : Optional[Tuple[CUmem_range_attribute] | List[CUmem_range_attribute]], size_t numAttributes, devPtr, size_t count):
-    """ Query attributes of a given memory range.
-
-    Query attributes of the memory range starting at `devPtr` with a size
-    of `count` bytes. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cuMemAllocManaged` or declared via managed
-    variables. The `attributes` array will be interpreted to have
-    `numAttributes` entries. The `dataSizes` array will also be interpreted
-    to have `numAttributes` entries. The results of the query will be
-    stored in `data`.
-
-    The list of supported attributes are given below. Please refer to
-    :py:obj:`~.cuMemRangeGetAttribute` for attribute descriptions and
-    restrictions.
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE`
-
-    - :py:obj:`~.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID`
-
-    Parameters
-    ----------
-    dataSizes : List[int]
-        Array containing the sizes of each result
-    attributes : List[:py:obj:`~.CUmem_range_attribute`]
-        An array of attributes to query (numAttributes and the number of
-        attributes in this array should match)
-    numAttributes : size_t
-        Number of attributes to query
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Start of the range to query
-    count : size_t
-        Size of the range to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    data : List[Any]
-        A two-dimensional array containing pointers to memory locations
-        where the result of each attribute query will be written to.
-
-    See Also
-    --------
-    :py:obj:`~.cuMemRangeGetAttribute`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cudaMemRangeGetAttributes`
-    """
-    cdef cydriver.CUdeviceptr cydevPtr
-    if devPtr is None:
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(devPtr, (CUdeviceptr,)):
-        pdevPtr = int(devPtr)
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    else:
-        pdevPtr = int(CUdeviceptr(devPtr))
-        cydevPtr = <cydriver.CUdeviceptr><void_ptr>pdevPtr
-    attributes = [] if attributes is None else attributes
-    if not all(isinstance(_x, (CUmem_range_attribute)) for _x in attributes):
-        raise TypeError("Argument 'attributes' is not instance of type (expected Tuple[cydriver.CUmem_range_attribute] or List[cydriver.CUmem_range_attribute]")
-    if not all(isinstance(_x, (int)) for _x in dataSizes):
-        raise TypeError("Argument 'dataSizes' is not instance of type (expected Tuple[int] or List[int]")
-    pylist = [utils.HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperdata = utils.InputVoidPtrPtrHelper(pylist)
-    cdef void** cyvoidStarHelper_ptr = <void**><void_ptr>voidStarHelperdata.cptr
-    cdef vector[size_t] cydataSizes = dataSizes
-    cdef vector[cydriver.CUmem_range_attribute] cyattributes = [pyattributes.value for pyattributes in (attributes)]
-    if numAttributes > <size_t>len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes))
-    if numAttributes > <size_t>len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes))
-    err = cydriver.cuMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count)
-    return (CUresult(err), [obj.pyObj() for obj in pylist])
-{{endif}}
-
-{{if 'cuPointerSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuPointerSetAttribute(value, attribute not None : CUpointer_attribute, ptr):
-    """ Set attributes on a previously allocated memory region.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`:
-
-    - A boolean attribute that can either be set (1) or unset (0). When
-      set, the region of memory that `ptr` points to is guaranteed to
-      always synchronize memory operations that are synchronous. If there
-      are some previously initiated synchronous memory operations that are
-      pending when this attribute is set, the function does not return
-      until those memory operations are complete. See further documentation
-      in the section titled "API synchronization behavior" to learn more
-      about cases when synchronous memory operations can exhibit
-      asynchronous behavior. `value` will be considered as a pointer to an
-      unsigned integer to which this attribute is to be set.
-
-    Parameters
-    ----------
-    value : Any
-        Pointer to memory containing the value to be set
-    attribute : :py:obj:`~.CUpointer_attribute`
-        Pointer attribute to set
-    ptr : :py:obj:`~.CUdeviceptr`
-        Pointer to a memory region allocated using CUDA memory allocation
-        APIs
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuPointerGetAttribute`, :py:obj:`~.cuPointerGetAttributes`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuMemHostUnregister`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    cdef utils.HelperCUpointer_attribute cyvalue = utils.HelperCUpointer_attribute(attribute, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    cdef cydriver.CUpointer_attribute cyattribute = attribute.value
-    err = cydriver.cuPointerSetAttribute(cyvalue_ptr, cyattribute, cyptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuPointerGetAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuPointerGetAttributes(unsigned int numAttributes, attributes : Optional[Tuple[CUpointer_attribute] | List[CUpointer_attribute]], ptr):
-    """ Returns information about a pointer.
-
-    The supported attributes are (refer to
-    :py:obj:`~.cuPointerGetAttribute` for attribute descriptions and
-    restrictions):
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_CONTEXT`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MEMORY_TYPE`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_DEVICE_POINTER`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_HOST_POINTER`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_BUFFER_ID`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_IS_MANAGED`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_RANGE_SIZE`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MAPPED`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES`
-
-    - :py:obj:`~.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE`
-
-    Unlike :py:obj:`~.cuPointerGetAttribute`, this function will not return
-    an error when the `ptr` encountered is not a valid CUDA pointer.
-    Instead, the attributes are assigned default NULL values and
-    CUDA_SUCCESS is returned.
-
-    If `ptr` was not allocated by, mapped by, or registered with a
-    :py:obj:`~.CUcontext` which uses UVA (Unified Virtual Addressing),
-    :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned.
-
-    Parameters
-    ----------
-    numAttributes : unsigned int
-        Number of attributes to query
-    attributes : List[:py:obj:`~.CUpointer_attribute`]
-        An array of attributes to query (numAttributes and the number of
-        attributes in this array should match)
-    ptr : :py:obj:`~.CUdeviceptr`
-        Pointer to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    data : List[Any]
-        A two-dimensional array containing pointers to memory locations
-        where the result of each attribute query will be written to.
-
-    See Also
-    --------
-    :py:obj:`~.cuPointerGetAttribute`, :py:obj:`~.cuPointerSetAttribute`, :py:obj:`~.cudaPointerGetAttributes`
-    """
-    cdef cydriver.CUdeviceptr cyptr
-    if ptr is None:
-        cyptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(ptr, (CUdeviceptr,)):
-        pptr = int(ptr)
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    else:
-        pptr = int(CUdeviceptr(ptr))
-        cyptr = <cydriver.CUdeviceptr><void_ptr>pptr
-    attributes = [] if attributes is None else attributes
-    if not all(isinstance(_x, (CUpointer_attribute)) for _x in attributes):
-        raise TypeError("Argument 'attributes' is not instance of type (expected Tuple[cydriver.CUpointer_attribute] or List[cydriver.CUpointer_attribute]")
-    if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes))
-    cdef vector[cydriver.CUpointer_attribute] cyattributes = [pyattributes.value for pyattributes in (attributes)]
-    pylist = [utils.HelperCUpointer_attribute(pyattributes, 0, is_getter=True) for pyattributes in attributes]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperdata = utils.InputVoidPtrPtrHelper(pylist)
-    cdef void** cyvoidStarHelper_ptr = <void**><void_ptr>voidStarHelperdata.cptr
-    err = cydriver.cuPointerGetAttributes(numAttributes, cyattributes.data(), cyvoidStarHelper_ptr, cyptr)
-    return (CUresult(err), [obj.pyObj() for obj in pylist])
-{{endif}}
-
-{{if 'cuStreamCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamCreate(unsigned int Flags):
-    """ Create a stream.
-
-    Creates a stream and returns a handle in `phStream`. The `Flags`
-    argument determines behaviors of the stream.
-
-    Valid values for `Flags` are:
-
-    - :py:obj:`~.CU_STREAM_DEFAULT`: Default stream creation flag.
-
-    - :py:obj:`~.CU_STREAM_NON_BLOCKING`: Specifies that work running in
-      the created stream may run concurrently with work in stream 0 (the
-      NULL stream), and that the created stream should perform no implicit
-      synchronization with stream 0.
-
-    Parameters
-    ----------
-    Flags : unsigned int
-        Parameters for stream creation
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phStream : :py:obj:`~.CUstream`
-        Returned newly created stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`
-    """
-    cdef CUstream phStream = CUstream()
-    err = cydriver.cuStreamCreate(<cydriver.CUstream*>phStream._ptr, Flags)
-    return (CUresult(err), phStream)
-{{endif}}
-
-{{if 'cuStreamCreateWithPriority' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamCreateWithPriority(unsigned int flags, int priority):
-    """ Create a stream with the given priority.
-
-    Creates a stream with the specified priority and returns a handle in
-    `phStream`. This affects the scheduling priority of work in the stream.
-    Priorities provide a hint to preferentially run work with higher
-    priority when possible, but do not preempt already-running work or
-    provide any other functional guarantee on execution order.
-
-    `priority` follows a convention where lower numbers represent higher
-    priorities. '0' represents default priority. The range of meaningful
-    numerical priorities can be queried using
-    :py:obj:`~.cuCtxGetStreamPriorityRange`. If the specified priority is
-    outside the numerical range returned by
-    :py:obj:`~.cuCtxGetStreamPriorityRange`, it will automatically be
-    clamped to the lowest or the highest number in the range.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Flags for stream creation. See :py:obj:`~.cuStreamCreate` for a
-        list of valid flags
-    priority : int
-        Stream priority. Lower numbers represent higher priorities. See
-        :py:obj:`~.cuCtxGetStreamPriorityRange` for more information about
-        meaningful stream priorities that can be passed.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phStream : :py:obj:`~.CUstream`
-        Returned newly created stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreateWithPriority`
-
-    Notes
-    -----
-    Stream priorities are supported only on GPUs with compute capability 3.5 or higher.
-
-    In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.
-    """
-    cdef CUstream phStream = CUstream()
-    err = cydriver.cuStreamCreateWithPriority(<cydriver.CUstream*>phStream._ptr, flags, priority)
-    return (CUresult(err), phStream)
-{{endif}}
-
-{{if 'cuStreamGetPriority' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetPriority(hStream):
-    """ Query the priority of a given stream.
-
-    Query the priority of a stream created using
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority` or
-    :py:obj:`~.cuGreenCtxStreamCreate` and return the priority in
-    `priority`. Note that if the stream was created with a priority outside
-    the numerical range returned by
-    :py:obj:`~.cuCtxGetStreamPriorityRange`, this function returns the
-    clamped priority. See :py:obj:`~.cuStreamCreateWithPriority` for
-    details about priority clamping.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    priority : int
-        Pointer to a signed integer in which the stream's priority is
-        returned
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cudaStreamGetPriority`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef int priority = 0
-    err = cydriver.cuStreamGetPriority(cyhStream, &priority)
-    return (CUresult(err), priority)
-{{endif}}
-
-{{if 'cuStreamGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetFlags(hStream):
-    """ Query the flags of a given stream.
-
-    Query the flags of a stream created using :py:obj:`~.cuStreamCreate`,
-    :py:obj:`~.cuStreamCreateWithPriority` or
-    :py:obj:`~.cuGreenCtxStreamCreate` and return the flags in `flags`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    flags : unsigned int
-        Pointer to an unsigned integer in which the stream's flags are
-        returned The value returned in `flags` is a logical 'OR' of all
-        flags that were used while creating this stream. See
-        :py:obj:`~.cuStreamCreate` for the list of valid flags
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef unsigned int flags = 0
-    err = cydriver.cuStreamGetFlags(cyhStream, &flags)
-    return (CUresult(err), flags)
-{{endif}}
-
-{{if 'cuStreamGetId' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetId(hStream):
-    """ Returns the unique Id associated with the stream handle supplied.
-
-    Returns in `streamId` the unique Id which is associated with the given
-    stream handle. The Id is unique for the life of the program.
-
-    The stream handle `hStream` can refer to any of the following:
-
-    - a stream created via any of the CUDA driver APIs such as
-      :py:obj:`~.cuStreamCreate` and
-      :py:obj:`~.cuStreamCreateWithPriority`, or their runtime API
-      equivalents such as :py:obj:`~.cudaStreamCreate`,
-      :py:obj:`~.cudaStreamCreateWithFlags` and
-      :py:obj:`~.cudaStreamCreateWithPriority`. Passing an invalid handle
-      will result in undefined behavior.
-
-    - any of the special streams such as the NULL stream,
-      :py:obj:`~.CU_STREAM_LEGACY` and :py:obj:`~.CU_STREAM_PER_THREAD`.
-      The runtime API equivalents of these are also accepted, which are
-      NULL, :py:obj:`~.cudaStreamLegacy` and
-      :py:obj:`~.cudaStreamPerThread` respectively.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    streamId : unsigned long long
-        Pointer to store the Id of the stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetId`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef unsigned long long streamId = 0
-    err = cydriver.cuStreamGetId(cyhStream, &streamId)
-    return (CUresult(err), streamId)
-{{endif}}
-
-{{if 'cuStreamGetCtx' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetCtx(hStream):
-    """ Query the context associated with a stream.
-
-    Returns the CUDA context that the stream is associated with.
-
-    Note there is a later version of this API,
-    :py:obj:`~.cuStreamGetCtx_v2`. It will supplant this version in CUDA
-    13.0. It is recommended to use :py:obj:`~.cuStreamGetCtx_v2` till then
-    as this version will return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` for
-    streams created via the API :py:obj:`~.cuGreenCtxStreamCreate`.
-
-    The stream handle `hStream` can refer to any of the following:
-
-    - a stream created via any of the CUDA driver APIs such as
-      :py:obj:`~.cuStreamCreate` and
-      :py:obj:`~.cuStreamCreateWithPriority`, or their runtime API
-      equivalents such as :py:obj:`~.cudaStreamCreate`,
-      :py:obj:`~.cudaStreamCreateWithFlags` and
-      :py:obj:`~.cudaStreamCreateWithPriority`. The returned context is the
-      context that was active in the calling thread when the stream was
-      created. Passing an invalid handle will result in undefined behavior.
-
-    - any of the special streams such as the NULL stream,
-      :py:obj:`~.CU_STREAM_LEGACY` and :py:obj:`~.CU_STREAM_PER_THREAD`.
-      The runtime API equivalents of these are also accepted, which are
-      NULL, :py:obj:`~.cudaStreamLegacy` and
-      :py:obj:`~.cudaStreamPerThread` respectively. Specifying any of the
-      special handles will return the context current to the calling
-      thread. If no context is current to the calling thread,
-      :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pctx : :py:obj:`~.CUcontext`
-        Returned context associated with the stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cuStreamGetCtx_v2`, :py:obj:`~.cudaStreamCreateWithFlags`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef CUcontext pctx = CUcontext()
-    err = cydriver.cuStreamGetCtx(cyhStream, <cydriver.CUcontext*>pctx._ptr)
-    return (CUresult(err), pctx)
-{{endif}}
-
-{{if 'cuStreamGetCtx_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetCtx_v2(hStream):
-    """ Query the contexts associated with a stream.
-
-    Returns the contexts that the stream is associated with.
-
-    If the stream is associated with a green context, the API returns the
-    green context in `pGreenCtx` and the primary context of the associated
-    device in `pCtx`.
-
-    If the stream is associated with a regular context, the API returns the
-    regular context in `pCtx` and NULL in `pGreenCtx`.
-
-    The stream handle `hStream` can refer to any of the following:
-
-    - a stream created via any of the CUDA driver APIs such as
-      :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`
-      and :py:obj:`~.cuGreenCtxStreamCreate`, or their runtime API
-      equivalents such as :py:obj:`~.cudaStreamCreate`,
-      :py:obj:`~.cudaStreamCreateWithFlags` and
-      :py:obj:`~.cudaStreamCreateWithPriority`. Passing an invalid handle
-      will result in undefined behavior.
-
-    - any of the special streams such as the NULL stream,
-      :py:obj:`~.CU_STREAM_LEGACY` and :py:obj:`~.CU_STREAM_PER_THREAD`.
-      The runtime API equivalents of these are also accepted, which are
-      NULL, :py:obj:`~.cudaStreamLegacy` and
-      :py:obj:`~.cudaStreamPerThread` respectively. If any of the special
-      handles are specified, the API will operate on the context current to
-      the calling thread. If a green context (that was converted via
-      :py:obj:`~.cuCtxFromGreenCtx()` before setting it current) is current
-      to the calling thread, the API will return the green context in
-      `pGreenCtx` and the primary context of the associated device in
-      `pCtx`. If a regular context is current, the API returns the regular
-      context in `pCtx` and NULL in `pGreenCtx`. Note that specifying
-      :py:obj:`~.CU_STREAM_PER_THREAD` or :py:obj:`~.cudaStreamPerThread`
-      will return :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` if a green context
-      is current to the calling thread. If no context is current to the
-      calling thread, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` is returned.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    pCtx : :py:obj:`~.CUcontext`
-        Returned regular context associated with the stream
-    pGreenCtx : :py:obj:`~.CUgreenCtx`
-        Returned green context if the stream is associated with a green
-        context or NULL if not
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate` :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`,
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef CUcontext pCtx = CUcontext()
-    cdef CUgreenCtx pGreenCtx = CUgreenCtx()
-    err = cydriver.cuStreamGetCtx_v2(cyhStream, <cydriver.CUcontext*>pCtx._ptr, <cydriver.CUgreenCtx*>pGreenCtx._ptr)
-    return (CUresult(err), pCtx, pGreenCtx)
-{{endif}}
-
-{{if 'cuStreamWaitEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamWaitEvent(hStream, hEvent, unsigned int Flags):
-    """ Make a compute stream wait on an event.
-
-    Makes all future work submitted to `hStream` wait for all work captured
-    in `hEvent`. See :py:obj:`~.cuEventRecord()` for details on what is
-    captured by an event. The synchronization will be performed efficiently
-    on the device when applicable. `hEvent` may be from a different context
-    or device than `hStream`.
-
-    flags include:
-
-    - :py:obj:`~.CU_EVENT_WAIT_DEFAULT`: Default event creation flag.
-
-    - :py:obj:`~.CU_EVENT_WAIT_EXTERNAL`: Event is captured in the graph as
-      an external event node when performing stream capture. This flag is
-      invalid outside of stream capture.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to wait
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait on (may not be NULL)
-    Flags : unsigned int
-        See :py:obj:`~.CUevent_capture_flags`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cudaStreamWaitEvent`
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    err = cydriver.cuStreamWaitEvent(cyhStream, cyhEvent, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamAddCallback' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamAddCallback(hStream, callback, userData, unsigned int flags):
-    """ Add a callback to a compute stream.
-
-    Adds a callback to be called on the host after all currently enqueued
-    items in the stream have completed. For each cuStreamAddCallback call,
-    the callback will be executed exactly once. The callback will block
-    later work in the stream until it is finished.
-
-    The callback may be passed :py:obj:`~.CUDA_SUCCESS` or an error code.
-    In the event of a device error, all subsequently executed callbacks
-    will receive an appropriate :py:obj:`~.CUresult`.
-
-    Callbacks must not make any CUDA API calls. Attempting to use a CUDA
-    API will result in :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`. Callbacks must
-    not perform any synchronization that may depend on outstanding device
-    work or other callbacks that are not mandated to run earlier. Callbacks
-    without a mandated order (in independent streams) execute in undefined
-    order and may be serialized.
-
-    For the purposes of Unified Memory, callback execution makes a number
-    of guarantees:
-
-    - The callback stream is considered idle for the duration of the
-      callback. Thus, for example, a callback may always use memory
-      attached to the callback stream.
-
-    - The start of execution of a callback has the same effect as
-      synchronizing an event recorded in the same stream immediately prior
-      to the callback. It thus synchronizes streams which have been
-      "joined" prior to the callback.
-
-    - Adding device work to any stream does not have the effect of making
-      the stream active until all preceding host functions and stream
-      callbacks have executed. Thus, for example, a callback might use
-      global attached memory even if work has been added to another stream,
-      if the work has been ordered behind the callback with an event.
-
-    - Completion of a callback does not cause a stream to become active
-      except as described above. The callback stream will remain idle if no
-      device work follows the callback, and will remain idle across
-      consecutive callbacks without device work in between. Thus, for
-      example, stream synchronization can be done by signaling from a
-      callback at the end of the stream.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to add callback to
-    callback : :py:obj:`~.CUstreamCallback`
-        The function to call once preceding stream operations are complete
-    userData : Any
-        User specified data to be passed to the callback function
-    flags : unsigned int
-        Reserved for future use, must be 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cudaStreamAddCallback`
-
-    Notes
-    -----
-    This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cuLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cuStreamBeginCapture` and :py:obj:`~.cuStreamEndCapture`, unlike :py:obj:`~.cuLaunchHostFunc`.
-    """
-    cdef cydriver.CUstreamCallback cycallback
-    if callback is None:
-        cycallback = <cydriver.CUstreamCallback><void_ptr>0
-    elif isinstance(callback, (CUstreamCallback,)):
-        pcallback = int(callback)
-        cycallback = <cydriver.CUstreamCallback><void_ptr>pcallback
-    else:
-        pcallback = int(CUstreamCallback(callback))
-        cycallback = <cydriver.CUstreamCallback><void_ptr>pcallback
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    err = cydriver.cuStreamAddCallback(cyhStream, cycallback, cyuserData_ptr, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamBeginCapture_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamBeginCapture(hStream, mode not None : CUstreamCaptureMode):
-    """ Begins graph capture on a stream.
-
-    Begin graph capture on `hStream`. When a stream is in capture mode, all
-    operations pushed into the stream will not be executed, but will
-    instead be captured into a graph, which will be returned via
-    :py:obj:`~.cuStreamEndCapture`. Capture may not be initiated if
-    `stream` is CU_STREAM_LEGACY. Capture must be ended on the same stream
-    in which it was initiated, and it may only be initiated if the stream
-    is not already in capture mode. The capture mode may be queried via
-    :py:obj:`~.cuStreamIsCapturing`. A unique id representing the capture
-    sequence may be queried via :py:obj:`~.cuStreamGetCaptureInfo`.
-
-    If `mode` is not :py:obj:`~.CU_STREAM_CAPTURE_MODE_RELAXED`,
-    :py:obj:`~.cuStreamEndCapture` must be called on this stream from the
-    same thread.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to initiate capture
-    mode : :py:obj:`~.CUstreamCaptureMode`
-        Controls the interaction of this capture sequence with other API
-        calls that are potentially unsafe. For more details see
-        :py:obj:`~.cuThreadExchangeStreamCaptureMode`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuStreamEndCapture`, :py:obj:`~.cuThreadExchangeStreamCaptureMode`
-
-    Notes
-    -----
-    Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamCaptureMode cymode = mode.value
-    err = cydriver.cuStreamBeginCapture(cyhStream, cymode)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamBeginCaptureToGraph' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], dependencyData : Optional[Tuple[CUgraphEdgeData] | List[CUgraphEdgeData]], size_t numDependencies, mode not None : CUstreamCaptureMode):
-    """ Begins graph capture on a stream to an existing graph.
-
-    Begin graph capture on `hStream`, placing new nodes into an existing
-    graph. When a stream is in capture mode, all operations pushed into the
-    stream will not be executed, but will instead be captured into
-    `hGraph`. The graph will not be instantiable until the user calls
-    :py:obj:`~.cuStreamEndCapture`.
-
-    Capture may not be initiated if `stream` is CU_STREAM_LEGACY. Capture
-    must be ended on the same stream in which it was initiated, and it may
-    only be initiated if the stream is not already in capture mode. The
-    capture mode may be queried via :py:obj:`~.cuStreamIsCapturing`. A
-    unique id representing the capture sequence may be queried via
-    :py:obj:`~.cuStreamGetCaptureInfo`.
-
-    If `mode` is not :py:obj:`~.CU_STREAM_CAPTURE_MODE_RELAXED`,
-    :py:obj:`~.cuStreamEndCapture` must be called on this stream from the
-    same thread.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to initiate capture.
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to capture into.
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the first node captured in the stream. Can be NULL
-        if numDependencies is 0.
-    dependencyData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional array of data associated with each dependency.
-    numDependencies : size_t
-        Number of dependencies.
-    mode : :py:obj:`~.CUstreamCaptureMode`
-        Controls the interaction of this capture sequence with other API
-        calls that are potentially unsafe. For more details see
-        :py:obj:`~.cuThreadExchangeStreamCaptureMode`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuStreamEndCapture`, :py:obj:`~.cuThreadExchangeStreamCaptureMode`, :py:obj:`~.cuGraphAddNode`,
-
-    Notes
-    -----
-    Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cydriver.CUgraphEdgeData,] or List[cydriver.CUgraphEdgeData,]")
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    cdef cydriver.CUgraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cydriver.CUgraphEdgeData*> calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<CUgraphEdgeData>dependencyData[idx])._ptr, sizeof(cydriver.CUgraphEdgeData))
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    if numDependencies > <size_t>len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies))
-    cdef cydriver.CUstreamCaptureMode cymode = mode.value
-    err = cydriver.cuStreamBeginCaptureToGraph(cyhStream, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, (<CUgraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cymode)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuThreadExchangeStreamCaptureMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuThreadExchangeStreamCaptureMode(mode not None : CUstreamCaptureMode):
-    """ Swaps the stream capture interaction mode for a thread.
-
-    Sets the calling thread's stream capture interaction mode to the value
-    contained in `*mode`, and overwrites `*mode` with the previous mode for
-    the thread. To facilitate deterministic behavior across function or
-    module boundaries, callers are encouraged to use this API in a push-pop
-    fashion:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    During stream capture (see :py:obj:`~.cuStreamBeginCapture`), some
-    actions, such as a call to :py:obj:`~.cudaMalloc`, may be unsafe. In
-    the case of :py:obj:`~.cudaMalloc`, the operation is not enqueued
-    asynchronously to a stream, and is not observed by stream capture.
-    Therefore, if the sequence of operations captured via
-    :py:obj:`~.cuStreamBeginCapture` depended on the allocation being
-    replayed whenever the graph is launched, the captured graph would be
-    invalid.
-
-    Therefore, stream capture places restrictions on API calls that can be
-    made within or concurrently to a
-    :py:obj:`~.cuStreamBeginCapture`-:py:obj:`~.cuStreamEndCapture`
-    sequence. This behavior can be controlled via this API and flags to
-    :py:obj:`~.cuStreamBeginCapture`.
-
-    A thread's mode is one of the following:
-
-    - `CU_STREAM_CAPTURE_MODE_GLOBAL:` This is the default mode. If the
-      local thread has an ongoing capture sequence that was not initiated
-      with `CU_STREAM_CAPTURE_MODE_RELAXED` at `cuStreamBeginCapture`, or
-      if any other thread has a concurrent capture sequence initiated with
-      `CU_STREAM_CAPTURE_MODE_GLOBAL`, this thread is prohibited from
-      potentially unsafe API calls.
-
-    - `CU_STREAM_CAPTURE_MODE_THREAD_LOCAL:` If the local thread has an
-      ongoing capture sequence not initiated with
-      `CU_STREAM_CAPTURE_MODE_RELAXED`, it is prohibited from potentially
-      unsafe API calls. Concurrent capture sequences in other threads are
-      ignored.
-
-    - `CU_STREAM_CAPTURE_MODE_RELAXED:` The local thread is not prohibited
-      from potentially unsafe API calls. Note that the thread is still
-      prohibited from API calls which necessarily conflict with stream
-      capture, for example, attempting :py:obj:`~.cuEventQuery` on an event
-      that was last recorded inside a capture sequence.
-
-    Parameters
-    ----------
-    mode : :py:obj:`~.CUstreamCaptureMode`
-        Pointer to mode value to swap with the current mode
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    mode : :py:obj:`~.CUstreamCaptureMode`
-        Pointer to mode value to swap with the current mode
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamBeginCapture`
-    """
-    cdef cydriver.CUstreamCaptureMode cymode = mode.value
-    err = cydriver.cuThreadExchangeStreamCaptureMode(&cymode)
-    return (CUresult(err), CUstreamCaptureMode(cymode))
-{{endif}}
-
-{{if 'cuStreamEndCapture' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamEndCapture(hStream):
-    """ Ends capture on a stream, returning the captured graph.
-
-    End capture on `hStream`, returning the captured graph via `phGraph`.
-    Capture must have been initiated on `hStream` via a call to
-    :py:obj:`~.cuStreamBeginCapture`. If capture was invalidated, due to a
-    violation of the rules of stream capture, then a NULL graph will be
-    returned.
-
-    If the `mode` argument to :py:obj:`~.cuStreamBeginCapture` was not
-    :py:obj:`~.CU_STREAM_CAPTURE_MODE_RELAXED`, this call must be from the
-    same thread as :py:obj:`~.cuStreamBeginCapture`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD`
-    phGraph : :py:obj:`~.CUgraph`
-        The captured graph
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuGraphDestroy`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef CUgraph phGraph = CUgraph()
-    err = cydriver.cuStreamEndCapture(cyhStream, <cydriver.CUgraph*>phGraph._ptr)
-    return (CUresult(err), phGraph)
-{{endif}}
-
-{{if 'cuStreamIsCapturing' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamIsCapturing(hStream):
-    """ Returns a stream's capture status.
-
-    Return the capture status of `hStream` via `captureStatus`. After a
-    successful call, `*captureStatus` will contain one of the following:
-
-    - :py:obj:`~.CU_STREAM_CAPTURE_STATUS_NONE`: The stream is not
-      capturing.
-
-    - :py:obj:`~.CU_STREAM_CAPTURE_STATUS_ACTIVE`: The stream is capturing.
-
-    - :py:obj:`~.CU_STREAM_CAPTURE_STATUS_INVALIDATED`: The stream was
-      capturing but an error has invalidated the capture sequence. The
-      capture sequence must be terminated with
-      :py:obj:`~.cuStreamEndCapture` on the stream where it was initiated
-      in order to continue using `hStream`.
-
-    Note that, if this is called on :py:obj:`~.CU_STREAM_LEGACY` (the "null
-    stream") while a blocking stream in the same context is capturing, it
-    will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT` and
-    `*captureStatus` is unspecified after the call. The blocking stream
-    capture is not invalidated.
-
-    When a blocking stream is capturing, the legacy stream is in an
-    unusable state until the blocking stream capture is terminated. The
-    legacy stream is not supported for stream capture, but attempted use
-    would have an implicit dependency on the capturing stream(s).
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT`
-    captureStatus : :py:obj:`~.CUstreamCaptureStatus`
-        Returns the stream's capture status
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamEndCapture`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamCaptureStatus captureStatus
-    err = cydriver.cuStreamIsCapturing(cyhStream, &captureStatus)
-    return (CUresult(err), CUstreamCaptureStatus(captureStatus))
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetCaptureInfo(hStream):
-    """ Query a stream's capture state.
-
-    Query stream state related to stream capture.
-
-    If called on :py:obj:`~.CU_STREAM_LEGACY` (the "null stream") while a
-    stream not created with :py:obj:`~.CU_STREAM_NON_BLOCKING` is
-    capturing, returns :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT`.
-
-    Valid data (other than capture status) is returned only if both of the
-    following are true:
-
-    - the call returns CUDA_SUCCESS
-
-    - the returned capture status is
-      :py:obj:`~.CU_STREAM_CAPTURE_STATUS_ACTIVE`
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT`
-    captureStatus_out : :py:obj:`~.CUstreamCaptureStatus`
-        Location to return the capture status of the stream; required
-    id_out : :py:obj:`~.cuuint64_t`
-        Optional location to return an id for the capture sequence, which
-        is unique over the lifetime of the process
-    graph_out : :py:obj:`~.CUgraph`
-        Optional location to return the graph being captured into. All
-        operations other than destroy and node removal are permitted on the
-        graph while the capture sequence is in progress. This API does not
-        transfer ownership of the graph, which is transferred or destroyed
-        at :py:obj:`~.cuStreamEndCapture`. Note that the graph handle may
-        be invalidated before end of capture for certain errors. Nodes that
-        are or become unreachable from the original stream at
-        :py:obj:`~.cuStreamEndCapture` due to direct actions on the graph
-        do not trigger :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNJOINED`.
-    dependencies_out : List[:py:obj:`~.CUgraphNode`]
-        Optional location to store a pointer to an array of nodes. The next
-        node to be captured in the stream will depend on this set of nodes,
-        absent operations such as event wait which modify this set. The
-        array pointer is valid until the next API call which operates on
-        the stream or until the capture is terminated. The node handles may
-        be copied out and are valid until they or the graph is destroyed.
-        The driver-owned array may also be passed directly to APIs that
-        operate on the graph (not the stream) without copying.
-    numDependencies_out : int
-        Optional location to store the size of the array returned in
-        dependencies_out.
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamGetCaptureInfo_v3` :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuStreamUpdateCaptureDependencies`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamCaptureStatus captureStatus_out
-    cdef cuuint64_t id_out = cuuint64_t()
-    cdef CUgraph graph_out = CUgraph()
-    cdef const cydriver.CUgraphNode* cydependencies_out = NULL
-    pydependencies_out = []
-    cdef size_t numDependencies_out = 0
-    err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, <cydriver.cuuint64_t*>id_out._ptr, <cydriver.CUgraph*>graph_out._ptr, &cydependencies_out, &numDependencies_out)
-    if CUresult(err) == CUresult(0):
-        pydependencies_out = [CUgraphNode(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
-    return (CUresult(err), CUstreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, numDependencies_out)
-{{endif}}
-
-{{if 'cuStreamGetCaptureInfo_v3' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetCaptureInfo_v3(hStream):
-    """ Query a stream's capture state (12.3+)
-
-    Query stream state related to stream capture.
-
-    If called on :py:obj:`~.CU_STREAM_LEGACY` (the "null stream") while a
-    stream not created with :py:obj:`~.CU_STREAM_NON_BLOCKING` is
-    capturing, returns :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT`.
-
-    Valid data (other than capture status) is returned only if both of the
-    following are true:
-
-    - the call returns CUDA_SUCCESS
-
-    - the returned capture status is
-      :py:obj:`~.CU_STREAM_CAPTURE_STATUS_ACTIVE`
-
-    If `edgeData_out` is non-NULL then `dependencies_out` must be as well.
-    If `dependencies_out` is non-NULL and `edgeData_out` is NULL, but there
-    is non-zero edge data for one or more of the current stream
-    dependencies, the call will return :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT`, :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`
-    captureStatus_out : :py:obj:`~.CUstreamCaptureStatus`
-        Location to return the capture status of the stream; required
-    id_out : :py:obj:`~.cuuint64_t`
-        Optional location to return an id for the capture sequence, which
-        is unique over the lifetime of the process
-    graph_out : :py:obj:`~.CUgraph`
-        Optional location to return the graph being captured into. All
-        operations other than destroy and node removal are permitted on the
-        graph while the capture sequence is in progress. This API does not
-        transfer ownership of the graph, which is transferred or destroyed
-        at :py:obj:`~.cuStreamEndCapture`. Note that the graph handle may
-        be invalidated before end of capture for certain errors. Nodes that
-        are or become unreachable from the original stream at
-        :py:obj:`~.cuStreamEndCapture` due to direct actions on the graph
-        do not trigger :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNJOINED`.
-    dependencies_out : List[:py:obj:`~.CUgraphNode`]
-        Optional location to store a pointer to an array of nodes. The next
-        node to be captured in the stream will depend on this set of nodes,
-        absent operations such as event wait which modify this set. The
-        array pointer is valid until the next API call which operates on
-        the stream or until the capture is terminated. The node handles may
-        be copied out and are valid until they or the graph is destroyed.
-        The driver-owned array may also be passed directly to APIs that
-        operate on the graph (not the stream) without copying.
-    edgeData_out : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional location to store a pointer to an array of graph edge
-        data. This array parallels `dependencies_out`; the next node to be
-        added has an edge to `dependencies_out`[i] with annotation
-        `edgeData_out`[i] for each `i`. The array pointer is valid until
-        the next API call which operates on the stream or until the capture
-        is terminated.
-    numDependencies_out : int
-        Optional location to store the size of the array returned in
-        dependencies_out.
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamGetCaptureInfo` :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuStreamUpdateCaptureDependencies`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamCaptureStatus captureStatus_out
-    cdef cuuint64_t id_out = cuuint64_t()
-    cdef CUgraph graph_out = CUgraph()
-    cdef const cydriver.CUgraphNode* cydependencies_out = NULL
-    pydependencies_out = []
-    cdef const cydriver.CUgraphEdgeData* cyedgeData_out = NULL
-    pyedgeData_out = []
-    cdef size_t numDependencies_out = 0
-    err = cydriver.cuStreamGetCaptureInfo_v3(cyhStream, &captureStatus_out, <cydriver.cuuint64_t*>id_out._ptr, <cydriver.CUgraph*>graph_out._ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)
-    if CUresult(err) == CUresult(0):
-        pydependencies_out = [CUgraphNode(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
-    if CUresult(err) == CUresult(0):
-        pyedgeData_out = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]
-    return (CUresult(err), CUstreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, unsigned int flags):
-    """ Update the set of dependencies in a capturing stream (11.3+)
-
-    Modifies the dependency set of a capturing stream. The dependency set
-    is the set of nodes that the next captured node in the stream will
-    depend on.
-
-    Valid flags are :py:obj:`~.CU_STREAM_ADD_CAPTURE_DEPENDENCIES` and
-    :py:obj:`~.CU_STREAM_SET_CAPTURE_DEPENDENCIES`. These control whether
-    the set passed to the API is added to the existing set or replaces it.
-    A flags value of 0 defaults to
-    :py:obj:`~.CU_STREAM_ADD_CAPTURE_DEPENDENCIES`.
-
-    Nodes that are removed from the dependency set via this API do not
-    result in :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNJOINED` if they are
-    unreachable from the stream at :py:obj:`~.cuStreamEndCapture`.
-
-    Returns :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` if the stream is not
-    capturing.
-
-    This API is new in CUDA 11.3. Developers requiring compatibility across
-    minor versions to CUDA 11.0 should not use this API or provide a
-    fallback.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to update
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        The set of dependencies to add
-    numDependencies : size_t
-        The size of the dependencies array
-    flags : unsigned int
-        See above
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamGetCaptureInfo`,
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuStreamUpdateCaptureDependencies(cyhStream, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, flags)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamUpdateCaptureDependencies_v2(hStream, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], dependencyData : Optional[Tuple[CUgraphEdgeData] | List[CUgraphEdgeData]], size_t numDependencies, unsigned int flags):
-    """ Update the set of dependencies in a capturing stream (12.3+)
-
-    Modifies the dependency set of a capturing stream. The dependency set
-    is the set of nodes that the next captured node in the stream will
-    depend on along with the edge data for those dependencies.
-
-    Valid flags are :py:obj:`~.CU_STREAM_ADD_CAPTURE_DEPENDENCIES` and
-    :py:obj:`~.CU_STREAM_SET_CAPTURE_DEPENDENCIES`. These control whether
-    the set passed to the API is added to the existing set or replaces it.
-    A flags value of 0 defaults to
-    :py:obj:`~.CU_STREAM_ADD_CAPTURE_DEPENDENCIES`.
-
-    Nodes that are removed from the dependency set via this API do not
-    result in :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNJOINED` if they are
-    unreachable from the stream at :py:obj:`~.cuStreamEndCapture`.
-
-    Returns :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` if the stream is not
-    capturing.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to update
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        The set of dependencies to add
-    dependencyData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional array of data associated with each dependency.
-    numDependencies : size_t
-        The size of the dependencies array
-    flags : unsigned int
-        See above
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamGetCaptureInfo`,
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cydriver.CUgraphEdgeData,] or List[cydriver.CUgraphEdgeData,]")
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    cdef cydriver.CUgraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cydriver.CUgraphEdgeData*> calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<CUgraphEdgeData>dependencyData[idx])._ptr, sizeof(cydriver.CUgraphEdgeData))
-    err = cydriver.cuStreamUpdateCaptureDependencies_v2(cyhStream, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, (<CUgraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, flags)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamAttachMemAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamAttachMemAsync(hStream, dptr, size_t length, unsigned int flags):
-    """ Attach memory to a stream asynchronously.
-
-    Enqueues an operation in `hStream` to specify stream association of
-    `length` bytes of memory starting from `dptr`. This function is a
-    stream-ordered operation, meaning that it is dependent on, and will
-    only take effect when, previous work in stream has completed. Any
-    previous association is automatically replaced.
-
-    `dptr` must point to one of the following types of memories:
-
-    - managed memory declared using the managed keyword or allocated with
-      :py:obj:`~.cuMemAllocManaged`.
-
-    - a valid host-accessible region of system-allocated pageable memory.
-      This type of memory may only be specified if the device associated
-      with the stream reports a non-zero value for the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS`.
-
-    For managed allocations, `length` must be either zero or the entire
-    allocation's size. Both indicate that the entire allocation's stream
-    association is being changed. Currently, it is not possible to change
-    stream association for a portion of a managed allocation.
-
-    For pageable host allocations, `length` must be non-zero.
-
-    The stream association is specified using `flags` which must be one of
-    :py:obj:`~.CUmemAttach_flags`. If the :py:obj:`~.CU_MEM_ATTACH_GLOBAL`
-    flag is specified, the memory can be accessed by any stream on any
-    device. If the :py:obj:`~.CU_MEM_ATTACH_HOST` flag is specified, the
-    program makes a guarantee that it won't access the memory on the device
-    from any stream on a device that has a zero value for the device
-    attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. If
-    the :py:obj:`~.CU_MEM_ATTACH_SINGLE` flag is specified and `hStream` is
-    associated with a device that has a zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`, the program
-    makes a guarantee that it will only access the memory on the device
-    from `hStream`. It is illegal to attach singly to the NULL stream,
-    because the NULL stream is a virtual global stream and not a specific
-    stream. An error will be returned in this case.
-
-    When memory is associated with a single stream, the Unified Memory
-    system will allow CPU access to this memory region so long as all
-    operations in `hStream` have completed, regardless of whether other
-    streams are active. In effect, this constrains exclusive ownership of
-    the managed memory region by an active GPU to per-stream activity
-    instead of whole-GPU activity.
-
-    Accessing memory on the device from streams that are not associated
-    with it will produce undefined results. No error checking is performed
-    by the Unified Memory system to ensure that kernels launched into other
-    streams do not access this region.
-
-    It is a program's responsibility to order calls to
-    :py:obj:`~.cuStreamAttachMemAsync` via events, synchronization or other
-    means to ensure legal access to memory at all times. Data visibility
-    and coherency will be changed appropriately for all kernels which
-    follow a stream-association change.
-
-    If `hStream` is destroyed while data is associated with it, the
-    association is removed and the association reverts to the default
-    visibility of the allocation as specified at
-    :py:obj:`~.cuMemAllocManaged`. For managed variables, the default
-    association is always :py:obj:`~.CU_MEM_ATTACH_GLOBAL`. Note that
-    destroying a stream is an asynchronous operation, and as a result, the
-    change to default association won't happen until all work in the stream
-    has completed.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to enqueue the attach operation
-    dptr : :py:obj:`~.CUdeviceptr`
-        Pointer to memory (must be a pointer to managed memory or to a
-        valid host-accessible region of system-allocated pageable memory)
-    length : size_t
-        Length of memory
-    flags : unsigned int
-        Must be one of :py:obj:`~.CUmemAttach_flags`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    err = cydriver.cuStreamAttachMemAsync(cyhStream, cydptr, length, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamQuery' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamQuery(hStream):
-    """ Determine status of a compute stream.
-
-    Returns :py:obj:`~.CUDA_SUCCESS` if all operations in the stream
-    specified by `hStream` have completed, or
-    :py:obj:`~.CUDA_ERROR_NOT_READY` if not.
-
-    For the purposes of Unified Memory, a return value of
-    :py:obj:`~.CUDA_SUCCESS` is equivalent to having called
-    :py:obj:`~.cuStreamSynchronize()`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to query status of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_READY`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamQuery`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    err = cydriver.cuStreamQuery(cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamSynchronize(hStream):
-    """ Wait until a stream's tasks are completed.
-
-    Waits until the device has completed all operations in the stream
-    specified by `hStream`. If the context was created with the
-    :py:obj:`~.CU_CTX_SCHED_BLOCKING_SYNC` flag, the CPU thread will block
-    until the stream is finished with all of its tasks.
-
-    \note_null_stream
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to wait for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamSynchronize`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    err = cydriver.cuStreamSynchronize(cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamDestroy_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamDestroy(hStream):
-    """ Destroys a stream.
-
-    Destroys the stream specified by `hStream`.
-
-    In case the device is still doing work in the stream `hStream` when
-    :py:obj:`~.cuStreamDestroy()` is called, the function will return
-    immediately and the resources associated with `hStream` will be
-    released automatically once the device has completed all work in
-    `hStream`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    err = cydriver.cuStreamDestroy(cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamCopyAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamCopyAttributes(dst, src):
-    """ Copies attributes from source stream to destination stream.
-
-    Copies attributes from source stream `src` to destination stream `dst`.
-    Both streams must have the same context.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Destination stream
-    src : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Source stream For list of attributes see :py:obj:`~.CUstreamAttrID`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUstream cysrc
-    if src is None:
-        cysrc = <cydriver.CUstream><void_ptr>0
-    elif isinstance(src, (CUstream,)):
-        psrc = int(src)
-        cysrc = <cydriver.CUstream><void_ptr>psrc
-    else:
-        psrc = int(CUstream(src))
-        cysrc = <cydriver.CUstream><void_ptr>psrc
-    cdef cydriver.CUstream cydst
-    if dst is None:
-        cydst = <cydriver.CUstream><void_ptr>0
-    elif isinstance(dst, (CUstream,)):
-        pdst = int(dst)
-        cydst = <cydriver.CUstream><void_ptr>pdst
-    else:
-        pdst = int(CUstream(dst))
-        cydst = <cydriver.CUstream><void_ptr>pdst
-    err = cydriver.cuStreamCopyAttributes(cydst, cysrc)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetAttribute(hStream, attr not None : CUstreamAttrID):
-    """ Queries stream attribute.
-
-    Queries attribute `attr` from `hStream` and stores it in corresponding
-    member of `value_out`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-
-    attr : :py:obj:`~.CUstreamAttrID`
-
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    value_out : :py:obj:`~.CUstreamAttrValue`
-
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamAttrID cyattr = attr.value
-    cdef CUstreamAttrValue value_out = CUstreamAttrValue()
-    err = cydriver.cuStreamGetAttribute(cyhStream, cyattr, <cydriver.CUstreamAttrValue*>value_out._ptr)
-    return (CUresult(err), value_out)
-{{endif}}
-
-{{if 'cuStreamSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamSetAttribute(hStream, attr not None : CUstreamAttrID, value : Optional[CUstreamAttrValue]):
-    """ Sets stream attribute.
-
-    Sets attribute `attr` on `hStream` from corresponding attribute of
-    `value`. The updated attribute will be applied to subsequent work
-    submitted to the stream. It will not affect previously submitted work.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-
-    attr : :py:obj:`~.CUstreamAttrID`
-
-    value : :py:obj:`~.CUstreamAttrValue`
-
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUstreamAttrID cyattr = attr.value
-    cdef cydriver.CUstreamAttrValue* cyvalue_ptr = value._ptr if value != None else NULL
-    err = cydriver.cuStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventCreate(unsigned int Flags):
-    """ Creates an event.
-
-    Creates an event *phEvent for the current context with the flags
-    specified via `Flags`. Valid flags include:
-
-    - :py:obj:`~.CU_EVENT_DEFAULT`: Default event creation flag.
-
-    - :py:obj:`~.CU_EVENT_BLOCKING_SYNC`: Specifies that the created event
-      should use blocking synchronization. A CPU thread that uses
-      :py:obj:`~.cuEventSynchronize()` to wait on an event created with
-      this flag will block until the event has actually been recorded.
-
-    - :py:obj:`~.CU_EVENT_DISABLE_TIMING`: Specifies that the created event
-      does not need to record timing data. Events created with this flag
-      specified and the :py:obj:`~.CU_EVENT_BLOCKING_SYNC` flag not
-      specified will provide the best performance when used with
-      :py:obj:`~.cuStreamWaitEvent()` and :py:obj:`~.cuEventQuery()`.
-
-    - :py:obj:`~.CU_EVENT_INTERPROCESS`: Specifies that the created event
-      may be used as an interprocess event by
-      :py:obj:`~.cuIpcGetEventHandle()`. :py:obj:`~.CU_EVENT_INTERPROCESS`
-      must be specified along with :py:obj:`~.CU_EVENT_DISABLE_TIMING`.
-
-    Parameters
-    ----------
-    Flags : unsigned int
-        Event creation flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phEvent : :py:obj:`~.CUevent`
-        Returns newly created event
-
-    See Also
-    --------
-    :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventCreateWithFlags`
-    """
-    cdef CUevent phEvent = CUevent()
-    err = cydriver.cuEventCreate(<cydriver.CUevent*>phEvent._ptr, Flags)
-    return (CUresult(err), phEvent)
-{{endif}}
-
-{{if 'cuEventRecord' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventRecord(hEvent, hStream):
-    """ Records an event.
-
-    Captures in `hEvent` the contents of `hStream` at the time of this
-    call. `hEvent` and `hStream` must be from the same context otherwise
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. Calls such as
-    :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will then
-    examine or wait for completion of the work that was captured. Uses of
-    `hStream` after this call do not modify `hEvent`. See note on default
-    stream behavior for what is captured in the default case.
-
-    :py:obj:`~.cuEventRecord()` can be called multiple times on the same
-    event and will overwrite the previously captured state. Other APIs such
-    as :py:obj:`~.cuStreamWaitEvent()` use the most recently captured state
-    at the time of the API call, and are not affected by later calls to
-    :py:obj:`~.cuEventRecord()`. Before the first call to
-    :py:obj:`~.cuEventRecord()`, an event represents an empty set of work,
-    so for example :py:obj:`~.cuEventQuery()` would return
-    :py:obj:`~.CUDA_SUCCESS`.
-
-    Parameters
-    ----------
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to record event for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecordWithFlags`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    err = cydriver.cuEventRecord(cyhEvent, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventRecordWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventRecordWithFlags(hEvent, hStream, unsigned int flags):
-    """ Records an event.
-
-    Captures in `hEvent` the contents of `hStream` at the time of this
-    call. `hEvent` and `hStream` must be from the same context otherwise
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. Calls such as
-    :py:obj:`~.cuEventQuery()` or :py:obj:`~.cuStreamWaitEvent()` will then
-    examine or wait for completion of the work that was captured. Uses of
-    `hStream` after this call do not modify `hEvent`. See note on default
-    stream behavior for what is captured in the default case.
-
-    :py:obj:`~.cuEventRecordWithFlags()` can be called multiple times on
-    the same event and will overwrite the previously captured state. Other
-    APIs such as :py:obj:`~.cuStreamWaitEvent()` use the most recently
-    captured state at the time of the API call, and are not affected by
-    later calls to :py:obj:`~.cuEventRecordWithFlags()`. Before the first
-    call to :py:obj:`~.cuEventRecordWithFlags()`, an event represents an
-    empty set of work, so for example :py:obj:`~.cuEventQuery()` would
-    return :py:obj:`~.CUDA_SUCCESS`.
-
-    flags include:
-
-    - :py:obj:`~.CU_EVENT_RECORD_DEFAULT`: Default event creation flag.
-
-    - :py:obj:`~.CU_EVENT_RECORD_EXTERNAL`: Event is captured in the graph
-      as an external event node when performing stream capture. This flag
-      is invalid outside of stream capture.
-
-    Parameters
-    ----------
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to record event for
-    flags : unsigned int
-        See :py:obj:`~.CUevent_capture_flags`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cudaEventRecord`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    err = cydriver.cuEventRecordWithFlags(cyhEvent, cyhStream, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventQuery' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventQuery(hEvent):
-    """ Queries an event's status.
-
-    Queries the status of all work currently captured by `hEvent`. See
-    :py:obj:`~.cuEventRecord()` for details on what is captured by an
-    event.
-
-    Returns :py:obj:`~.CUDA_SUCCESS` if all captured work has been
-    completed, or :py:obj:`~.CUDA_ERROR_NOT_READY` if any captured work is
-    incomplete.
-
-    For the purposes of Unified Memory, a return value of
-    :py:obj:`~.CUDA_SUCCESS` is equivalent to having called
-    :py:obj:`~.cuEventSynchronize()`.
-
-    Parameters
-    ----------
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_READY`
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventQuery`
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    err = cydriver.cuEventQuery(cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventSynchronize(hEvent):
-    """ Waits for an event to complete.
-
-    Waits until the completion of all work currently captured in `hEvent`.
-    See :py:obj:`~.cuEventRecord()` for details on what is captured by an
-    event.
-
-    Waiting for an event that was created with the
-    :py:obj:`~.CU_EVENT_BLOCKING_SYNC` flag will cause the calling CPU
-    thread to block until the event has been completed by the device. If
-    the :py:obj:`~.CU_EVENT_BLOCKING_SYNC` flag has not been set, then the
-    CPU thread will busy-wait until the event has been completed by the
-    device.
-
-    Parameters
-    ----------
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventSynchronize`
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    err = cydriver.cuEventSynchronize(cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventDestroy_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventDestroy(hEvent):
-    """ Destroys an event.
-
-    Destroys the event specified by `hEvent`.
-
-    An event may be destroyed before it is complete (i.e., while
-    :py:obj:`~.cuEventQuery()` would return
-    :py:obj:`~.CUDA_ERROR_NOT_READY`). In this case, the call does not
-    block on completion of the event, and any associated resources will
-    automatically be released asynchronously at completion.
-
-    Parameters
-    ----------
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventDestroy`
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    err = cydriver.cuEventDestroy(cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuEventElapsedTime' in found_functions}}
-
-@cython.embedsignature(True)
-def cuEventElapsedTime(hStart, hEnd):
-    """ Computes the elapsed time between two events.
-
-    Computes the elapsed time between two events (in milliseconds with a
-    resolution of around 0.5 microseconds).
-
-    If either event was last recorded in a non-NULL stream, the resulting
-    time may be greater than expected (even if both used the same stream
-    handle). This happens because the :py:obj:`~.cuEventRecord()` operation
-    takes place asynchronously and there is no guarantee that the measured
-    latency is actually just between the two events. Any number of other
-    different stream operations could execute in between the two measured
-    events, thus altering the timing in a significant way.
-
-    If :py:obj:`~.cuEventRecord()` has not been called on either event then
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. If
-    :py:obj:`~.cuEventRecord()` has been called on both events but one or
-    both of them has not yet been completed (that is,
-    :py:obj:`~.cuEventQuery()` would return
-    :py:obj:`~.CUDA_ERROR_NOT_READY` on at least one of the events),
-    :py:obj:`~.CUDA_ERROR_NOT_READY` is returned. If either event was
-    created with the :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag, then this
-    function will return :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`.
-
-    Parameters
-    ----------
-    hStart : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Starting event
-    hEnd : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Ending event
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_READY`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    pMilliseconds : float
-        Time between `hStart` and `hEnd` in ms
-
-    See Also
-    --------
-    :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cudaEventElapsedTime`
-    """
-    cdef cydriver.CUevent cyhEnd
-    if hEnd is None:
-        cyhEnd = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEnd, (CUevent,)):
-        phEnd = int(hEnd)
-        cyhEnd = <cydriver.CUevent><void_ptr>phEnd
-    else:
-        phEnd = int(CUevent(hEnd))
-        cyhEnd = <cydriver.CUevent><void_ptr>phEnd
-    cdef cydriver.CUevent cyhStart
-    if hStart is None:
-        cyhStart = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hStart, (CUevent,)):
-        phStart = int(hStart)
-        cyhStart = <cydriver.CUevent><void_ptr>phStart
-    else:
-        phStart = int(CUevent(hStart))
-        cyhStart = <cydriver.CUevent><void_ptr>phStart
-    cdef float pMilliseconds = 0
-    err = cydriver.cuEventElapsedTime(&pMilliseconds, cyhStart, cyhEnd)
-    return (CUresult(err), pMilliseconds)
-{{endif}}
-
-{{if 'cuImportExternalMemory' in found_functions}}
-
-@cython.embedsignature(True)
-def cuImportExternalMemory(memHandleDesc : Optional[CUDA_EXTERNAL_MEMORY_HANDLE_DESC]):
-    """ Imports an external memory object.
-
-    Imports an externally allocated memory object and returns a handle to
-    that in `extMem_out`.
-
-    The properties of the handle being imported must be described in
-    `memHandleDesc`. The :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`
-    structure is defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` specifies the
-    type of handle being imported. :py:obj:`~.CUexternalMemoryHandleType`
-    is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::fd must be a
-    valid file descriptor referencing a memory object. Ownership of the
-    file descriptor is transferred to the CUDA driver when the handle is
-    imported successfully. Performing any operations on the file descriptor
-    after it is imported results in undefined behavior.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32`, then exactly
-    one of
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle and
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name must
-    not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that
-    references a memory object. Ownership of this handle is not transferred
-    to CUDA after the import operation, so the application must release the
-    handle using the appropriate system call. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a memory object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle
-    must be non-NULL and
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name must
-    be NULL. The handle specified must be a globally shared KMT handle.
-    This handle does not hold a reference to the underlying object, and
-    thus will be invalid when all references to the memory object are
-    destroyed.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP`, then exactly one
-    of :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle
-    and :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that is
-    returned by ID3D12Device::CreateSharedHandle when referring to a
-    ID3D12Heap object. This handle holds a reference to the underlying
-    object. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D12Heap object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE`, then exactly
-    one of
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle and
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name must
-    not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that is
-    returned by ID3D12Device::CreateSharedHandle when referring to a
-    ID3D12Resource object. This handle holds a reference to the underlying
-    object. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D12Resource object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle
-    must represent a valid shared NT handle that is returned by
-    IDXGIResource1::CreateSharedHandle when referring to a ID3D11Resource
-    object. If
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D11Resource object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::handle
-    must represent a valid shared KMT handle that is returned by
-    IDXGIResource::GetSharedHandle when referring to a ID3D11Resource
-    object and
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::win32::name must
-    be NULL.
-
-    If :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`::handle::nvSciBufObject
-    must be non-NULL and reference a valid NvSciBuf object. If the NvSciBuf
-    object imported into CUDA is also mapped by other drivers, then the
-    application must use :py:obj:`~.cuWaitExternalSemaphoresAsync` or
-    :py:obj:`~.cuSignalExternalSemaphoresAsync` as appropriate barriers to
-    maintain coherence between CUDA and the other drivers. See
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC` and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC` for
-    memory synchronization.
-
-    The size of the memory object must be specified in
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.size`.
-
-    Specifying the flag :py:obj:`~.CUDA_EXTERNAL_MEMORY_DEDICATED` in
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.flags` indicates that the
-    resource is a dedicated resource. The definition of what a dedicated
-    resource is outside the scope of this extension. This flag must be set
-    if :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC.type` is one of the
-    following: :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE`
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE`
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT`
-
-    Parameters
-    ----------
-    memHandleDesc : :py:obj:`~.CUDA_EXTERNAL_MEMORY_HANDLE_DESC`
-        Memory import handle descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OPERATING_SYSTEM`
-    extMem_out : :py:obj:`~.CUexternalMemory`
-        Returned handle to an external memory object
-
-    See Also
-    --------
-    :py:obj:`~.cuDestroyExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedBuffer`, :py:obj:`~.cuExternalMemoryGetMappedMipmappedArray`
-
-    Notes
-    -----
-    If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization
-    and Cache Control" chapter from Vulkan specification.
-    """
-    cdef CUexternalMemory extMem_out = CUexternalMemory()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC* cymemHandleDesc_ptr = memHandleDesc._ptr if memHandleDesc != None else NULL
-    err = cydriver.cuImportExternalMemory(<cydriver.CUexternalMemory*>extMem_out._ptr, cymemHandleDesc_ptr)
-    return (CUresult(err), extMem_out)
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedBuffer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[CUDA_EXTERNAL_MEMORY_BUFFER_DESC]):
-    """ Maps a buffer onto an imported memory object.
-
-    Maps a buffer onto an imported memory object and returns a device
-    pointer in `devPtr`.
-
-    The properties of the buffer being mapped must be described in
-    `bufferDesc`. The :py:obj:`~.CUDA_EXTERNAL_MEMORY_BUFFER_DESC`
-    structure is defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.CUDA_EXTERNAL_MEMORY_BUFFER_DESC.offset` is the offset
-    in the memory object where the buffer's base address is.
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_BUFFER_DESC.size` is the size of the
-    buffer. :py:obj:`~.CUDA_EXTERNAL_MEMORY_BUFFER_DESC.flags` must be
-    zero.
-
-    The offset and size have to be suitably aligned to match the
-    requirements of the external API. Mapping two buffers whose ranges
-    overlap may or may not result in the same virtual address being
-    returned for the overlapped portion. In such cases, the application
-    must ensure that all accesses to that region from the GPU are volatile.
-    Otherwise writes made via one address are not guaranteed to be visible
-    via the other address, even if they're issued by the same thread. It is
-    recommended that applications map the combined range instead of mapping
-    separate buffers and then apply the appropriate offsets to the returned
-    pointer to derive the individual buffers.
-
-    The returned pointer `devPtr` must be freed using
-    :py:obj:`~.cuMemFree`.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.CUexternalMemory`
-        Handle to external memory object
-    bufferDesc : :py:obj:`~.CUDA_EXTERNAL_MEMORY_BUFFER_DESC`
-        Buffer descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    devPtr : :py:obj:`~.CUdeviceptr`
-        Returned device pointer to buffer
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalMemory`, :py:obj:`~.cuDestroyExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedMipmappedArray`
-    """
-    cdef cydriver.CUexternalMemory cyextMem
-    if extMem is None:
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>0
-    elif isinstance(extMem, (CUexternalMemory,)):
-        pextMem = int(extMem)
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    else:
-        pextMem = int(CUexternalMemory(extMem))
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    cdef CUdeviceptr devPtr = CUdeviceptr()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC* cybufferDesc_ptr = bufferDesc._ptr if bufferDesc != None else NULL
-    err = cydriver.cuExternalMemoryGetMappedBuffer(<cydriver.CUdeviceptr*>devPtr._ptr, cyextMem, cybufferDesc_ptr)
-    return (CUresult(err), devPtr)
-{{endif}}
-
-{{if 'cuExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC]):
-    """ Maps a CUDA mipmapped array onto an external memory object.
-
-    Maps a CUDA mipmapped array onto an external object and returns a
-    handle to it in `mipmap`.
-
-    The properties of the CUDA mipmapped array being mapped must be
-    described in `mipmapDesc`. The structure
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC` is defined as
-    follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.offset` is
-    the offset in the memory object where the base level of the mipmap
-    chain is.
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.arrayDesc`
-    describes the format, dimensions and type of the base level of the
-    mipmap chain. For further details on these parameters, please refer to
-    the documentation for :py:obj:`~.cuMipmappedArrayCreate`. Note that if
-    the mipmapped array is bound as a color target in the graphics API,
-    then the flag :py:obj:`~.CUDA_ARRAY3D_COLOR_ATTACHMENT` must be
-    specified in
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC`::arrayDesc::Flags.
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.numLevels`
-    specifies the total number of levels in the mipmap chain.
-
-    If `extMem` was imported from a handle of type
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, then
-    :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.numLevels` must be
-    equal to 1.
-
-    The returned CUDA mipmapped array must be freed using
-    :py:obj:`~.cuMipmappedArrayDestroy`.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.CUexternalMemory`
-        Handle to external memory object
-    mipmapDesc : :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC`
-        CUDA array descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    mipmap : :py:obj:`~.CUmipmappedArray`
-        Returned CUDA mipmapped array
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalMemory`, :py:obj:`~.cuDestroyExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedBuffer`
-    """
-    cdef cydriver.CUexternalMemory cyextMem
-    if extMem is None:
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>0
-    elif isinstance(extMem, (CUexternalMemory,)):
-        pextMem = int(extMem)
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    else:
-        pextMem = int(CUexternalMemory(extMem))
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    cdef CUmipmappedArray mipmap = CUmipmappedArray()
-    cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* cymipmapDesc_ptr = mipmapDesc._ptr if mipmapDesc != None else NULL
-    err = cydriver.cuExternalMemoryGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>mipmap._ptr, cyextMem, cymipmapDesc_ptr)
-    return (CUresult(err), mipmap)
-{{endif}}
-
-{{if 'cuDestroyExternalMemory' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDestroyExternalMemory(extMem):
-    """ Destroys an external memory object.
-
-    Destroys the specified external memory object. Any existing buffers and
-    CUDA mipmapped arrays mapped onto this object must no longer be used
-    and must be explicitly freed using :py:obj:`~.cuMemFree` and
-    :py:obj:`~.cuMipmappedArrayDestroy` respectively.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.CUexternalMemory`
-        External memory object to be destroyed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedBuffer`, :py:obj:`~.cuExternalMemoryGetMappedMipmappedArray`
-    """
-    cdef cydriver.CUexternalMemory cyextMem
-    if extMem is None:
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>0
-    elif isinstance(extMem, (CUexternalMemory,)):
-        pextMem = int(extMem)
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    else:
-        pextMem = int(CUexternalMemory(extMem))
-        cyextMem = <cydriver.CUexternalMemory><void_ptr>pextMem
-    err = cydriver.cuDestroyExternalMemory(cyextMem)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuImportExternalSemaphore' in found_functions}}
-
-@cython.embedsignature(True)
-def cuImportExternalSemaphore(semHandleDesc : Optional[CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC]):
-    """ Imports an external semaphore.
-
-    Imports an externally allocated synchronization object and returns a
-    handle to that in `extSem_out`.
-
-    The properties of the handle being imported must be described in
-    `semHandleDesc`. The :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC` is
-    defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` specifies
-    the type of handle being imported.
-    :py:obj:`~.CUexternalSemaphoreHandleType` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD`, then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::fd must be a
-    valid file descriptor referencing a synchronization object. Ownership
-    of the file descriptor is transferred to the CUDA driver when the
-    handle is imported successfully. Performing any operations on the file
-    descriptor after it is imported results in undefined behavior.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32`, then
-    exactly one of
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    is not NULL, then it must represent a valid shared NT handle that
-    references a synchronization object. Ownership of this handle is not
-    transferred to CUDA after the import operation, so the application must
-    release the handle using the appropriate system call. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must name a valid synchronization object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT`, then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    must be non-NULL and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name
-    must be NULL. The handle specified must be a globally shared KMT
-    handle. This handle does not hold a reference to the underlying object,
-    and thus will be invalid when all references to the synchronization
-    object are destroyed.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE`, then exactly
-    one of
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    is not NULL, then it must represent a valid shared NT handle that is
-    returned by ID3D12Device::CreateSharedHandle when referring to a
-    ID3D12Fence object. This handle holds a reference to the underlying
-    object. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must name a valid synchronization object that refers
-    to a valid ID3D12Fence object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE`, then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    represents a valid shared NT handle that is returned by
-    ID3D11Fence::CreateSharedHandle. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must name a valid synchronization object that refers
-    to a valid ID3D11Fence object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC`, then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::nvSciSyncObj
-    represents a valid NvSciSyncObj.
-
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX`, then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    represents a valid shared NT handle that is returned by
-    IDXGIResource1::CreateSharedHandle when referring to a IDXGIKeyedMutex
-    object. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must name a valid synchronization object that refers
-    to a valid IDXGIKeyedMutex object.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT`,
-    then
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    represents a valid shared KMT handle that is returned by
-    IDXGIResource::GetSharedHandle when referring to a IDXGIKeyedMutex
-    object and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name
-    must be NULL.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD`,
-    then :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::fd must
-    be a valid file descriptor referencing a synchronization object.
-    Ownership of the file descriptor is transferred to the CUDA driver when
-    the handle is imported successfully. Performing any operations on the
-    file descriptor after it is imported results in undefined behavior.
-
-    If :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC.type` is
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32`,
-    then exactly one of
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    and
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::handle
-    is not NULL, then it must represent a valid shared NT handle that
-    references a synchronization object. Ownership of this handle is not
-    transferred to CUDA after the import operation, so the application must
-    release the handle using the appropriate system call. If
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`::handle::win32::name is
-    not NULL, then it must name a valid synchronization object.
-
-    Parameters
-    ----------
-    semHandleDesc : :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC`
-        Semaphore import handle descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OPERATING_SYSTEM`
-    extSem_out : :py:obj:`~.CUexternalSemaphore`
-        Returned handle to an external semaphore
-
-    See Also
-    --------
-    :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef CUexternalSemaphore extSem_out = CUexternalSemaphore()
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* cysemHandleDesc_ptr = semHandleDesc._ptr if semHandleDesc != None else NULL
-    err = cydriver.cuImportExternalSemaphore(<cydriver.CUexternalSemaphore*>extSem_out._ptr, cysemHandleDesc_ptr)
-    return (CUresult(err), extSem_out)
-{{endif}}
-
-{{if 'cuSignalExternalSemaphoresAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSignalExternalSemaphoresAsync(extSemArray : Optional[Tuple[CUexternalSemaphore] | List[CUexternalSemaphore]], paramsArray : Optional[Tuple[CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS] | List[CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS]], unsigned int numExtSems, stream):
-    """ Signals a set of external semaphore objects.
-
-    Enqueues a signal operation on a set of externally allocated semaphore
-    object in the specified stream. The operations will be executed when
-    all prior operations in the stream complete.
-
-    The exact semantics of signaling a semaphore depends on the type of the
-    object.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT` then
-    signaling the semaphore will set it to the signaled state.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32`
-    then the semaphore will be set to the value specified in
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS`::params::fence::value.
-
-    If the semaphore object is of the type
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC` this API sets
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS`::params::nvSciSync::fence
-    to a value that can be used by subsequent waiters of the same NvSciSync
-    object to order operations with those currently submitted in `stream`.
-    Such an update will overwrite previous contents of
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS`::params::nvSciSync::fence.
-    By default, signaling such an external semaphore object causes
-    appropriate memory synchronization operations to be performed over all
-    external memory objects that are imported as
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`. This ensures that
-    any subsequent accesses made by other importers of the same set of
-    NvSciBuf memory object(s) are coherent. These operations can be skipped
-    by specifying the flag
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC`, which
-    can be used as a performance optimization when data coherency is not
-    required. But specifying this flag in scenarios where data coherency is
-    required results in undefined behavior. Also, for semaphore object of
-    the type :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC`, if
-    the NvSciSyncAttrList used to create the NvSciSyncObj had not set the
-    flags in :py:obj:`~.cuDeviceGetNvSciSyncAttributes` to
-    CUDA_NVSCISYNC_ATTR_SIGNAL, this API will return
-    CUDA_ERROR_NOT_SUPPORTED. NvSciSyncFence associated with semaphore
-    object of the type
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC` can be
-    deterministic. For this the NvSciSyncAttrList used to create the
-    semaphore object must have value of
-    NvSciSyncAttrKey_RequireDeterministicFences key set to true.
-    Deterministic fences allow users to enqueue a wait over the semaphore
-    object even before corresponding signal is enqueued. For such a
-    semaphore object, CUDA guarantees that each signal operation will
-    increment the fence value by '1'. Users are expected to track count of
-    signals enqueued on the semaphore object and insert waits accordingly.
-    When such a semaphore object is signaled from multiple streams, due to
-    concurrent stream execution, it is possible that the order in which the
-    semaphore gets signaled is indeterministic. This could lead to waiters
-    of the semaphore getting unblocked incorrectly. Users are expected to
-    handle such situations, either by not using the same semaphore object
-    with deterministic fence support enabled in different streams or by
-    adding explicit dependency amongst such streams so that the semaphore
-    is signaled in order.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT`
-    then the keyed mutex will be released with the key specified in
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_PARAMS`::params::keyedmutex::key.
-
-    Parameters
-    ----------
-    extSemArray : List[:py:obj:`~.CUexternalSemaphore`]
-        Set of external semaphores to be signaled
-    paramsArray : List[:py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS`]
-        Array of semaphore parameters
-    numExtSems : unsigned int
-        Number of semaphores to signal
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue the signal operations in
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    paramsArray = [] if paramsArray is None else paramsArray
-    if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,)) for _x in paramsArray):
-        raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,] or List[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,]")
-    extSemArray = [] if extSemArray is None else extSemArray
-    if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray):
-        raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cydriver.CUexternalSemaphore,] or List[cydriver.CUexternalSemaphore,]")
-    cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL
-    if len(extSemArray) > 0:
-        cyextSemArray = <cydriver.CUexternalSemaphore*> calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore))
-        if cyextSemArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-        else:
-            for idx in range(len(extSemArray)):
-                cyextSemArray[idx] = <cydriver.CUexternalSemaphore>(<CUexternalSemaphore>extSemArray[idx])._ptr[0]
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* cyparamsArray = NULL
-    if len(paramsArray) > 0:
-        cyparamsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS*> calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-        if cyparamsArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)))
-        for idx in range(len(paramsArray)):
-            string.memcpy(&cyparamsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS>paramsArray[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))
-    if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
-    if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
-    err = cydriver.cuSignalExternalSemaphoresAsync(<cydriver.CUexternalSemaphore*>(<CUexternalSemaphore>extSemArray[0])._ptr if len(extSemArray) == 1 else cyextSemArray, (<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS>paramsArray[0])._ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream)
-    if cyextSemArray is not NULL:
-        free(cyextSemArray)
-    if cyparamsArray is not NULL:
-        free(cyparamsArray)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuWaitExternalSemaphoresAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuWaitExternalSemaphoresAsync(extSemArray : Optional[Tuple[CUexternalSemaphore] | List[CUexternalSemaphore]], paramsArray : Optional[Tuple[CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS] | List[CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS]], unsigned int numExtSems, stream):
-    """ Waits on a set of external semaphore objects.
-
-    Enqueues a wait operation on a set of externally allocated semaphore
-    object in the specified stream. The operations will be executed when
-    all prior operations in the stream complete.
-
-    The exact semantics of waiting on a semaphore depends on the type of
-    the object.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT` then
-    waiting on the semaphore will wait until the semaphore reaches the
-    signaled state. The semaphore will then be reset to the unsignaled
-    state. Therefore for every signal operation, there can only be one wait
-    operation.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32`
-    then waiting on the semaphore will wait until the value of the
-    semaphore is greater than or equal to
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS`::params::fence::value.
-
-    If the semaphore object is of the type
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC` then, waiting
-    on the semaphore will wait until the
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS`::params::nvSciSync::fence
-    is signaled by the signaler of the NvSciSyncObj that was associated
-    with this semaphore object. By default, waiting on such an external
-    semaphore object causes appropriate memory synchronization operations
-    to be performed over all external memory objects that are imported as
-    :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`. This ensures that
-    any subsequent accesses made by other importers of the same set of
-    NvSciBuf memory object(s) are coherent. These operations can be skipped
-    by specifying the flag
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC`, which
-    can be used as a performance optimization when data coherency is not
-    required. But specifying this flag in scenarios where data coherency is
-    required results in undefined behavior. Also, for semaphore object of
-    the type :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC`, if
-    the NvSciSyncAttrList used to create the NvSciSyncObj had not set the
-    flags in :py:obj:`~.cuDeviceGetNvSciSyncAttributes` to
-    CUDA_NVSCISYNC_ATTR_WAIT, this API will return
-    CUDA_ERROR_NOT_SUPPORTED.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX`,
-    :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT`
-    then the keyed mutex will be acquired when it is released with the key
-    specified in
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS`::params::keyedmutex::key
-    or until the timeout specified by
-    :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS`::params::keyedmutex::timeoutMs
-    has lapsed. The timeout interval can either be a finite value specified
-    in milliseconds or an infinite value. In case an infinite value is
-    specified the timeout never elapses. The windows INFINITE macro must be
-    used to specify infinite timeout.
-
-    Parameters
-    ----------
-    extSemArray : List[:py:obj:`~.CUexternalSemaphore`]
-        External semaphores to be waited on
-    paramsArray : List[:py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS`]
-        Array of semaphore parameters
-    numExtSems : unsigned int
-        Number of semaphores to wait on
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue the wait operations in
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_TIMEOUT`
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    paramsArray = [] if paramsArray is None else paramsArray
-    if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,)) for _x in paramsArray):
-        raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,] or List[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,]")
-    extSemArray = [] if extSemArray is None else extSemArray
-    if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray):
-        raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cydriver.CUexternalSemaphore,] or List[cydriver.CUexternalSemaphore,]")
-    cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL
-    if len(extSemArray) > 0:
-        cyextSemArray = <cydriver.CUexternalSemaphore*> calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore))
-        if cyextSemArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore)))
-        else:
-            for idx in range(len(extSemArray)):
-                cyextSemArray[idx] = <cydriver.CUexternalSemaphore>(<CUexternalSemaphore>extSemArray[idx])._ptr[0]
-    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* cyparamsArray = NULL
-    if len(paramsArray) > 0:
-        cyparamsArray = <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS*> calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-        if cyparamsArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)))
-        for idx in range(len(paramsArray)):
-            string.memcpy(&cyparamsArray[idx], (<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS>paramsArray[idx])._ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))
-    if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
-    if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
-    err = cydriver.cuWaitExternalSemaphoresAsync(<cydriver.CUexternalSemaphore*>(<CUexternalSemaphore>extSemArray[0])._ptr if len(extSemArray) == 1 else cyextSemArray, (<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS>paramsArray[0])._ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream)
-    if cyextSemArray is not NULL:
-        free(cyextSemArray)
-    if cyparamsArray is not NULL:
-        free(cyparamsArray)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDestroyExternalSemaphore' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDestroyExternalSemaphore(extSem):
-    """ Destroys an external semaphore.
-
-    Destroys an external semaphore object and releases any references to
-    the underlying resource. Any outstanding signals or waits must have
-    completed before the semaphore is destroyed.
-
-    Parameters
-    ----------
-    extSem : :py:obj:`~.CUexternalSemaphore`
-        External semaphore to be destroyed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUexternalSemaphore cyextSem
-    if extSem is None:
-        cyextSem = <cydriver.CUexternalSemaphore><void_ptr>0
-    elif isinstance(extSem, (CUexternalSemaphore,)):
-        pextSem = int(extSem)
-        cyextSem = <cydriver.CUexternalSemaphore><void_ptr>pextSem
-    else:
-        pextSem = int(CUexternalSemaphore(extSem))
-        cyextSem = <cydriver.CUexternalSemaphore><void_ptr>pextSem
-    err = cydriver.cuDestroyExternalSemaphore(cyextSem)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamWaitValue32_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamWaitValue32(stream, addr, value, unsigned int flags):
-    """ Wait on a memory location.
-
-    Enqueues a synchronization of the stream on the given memory location.
-    Work ordered after the operation will block until the given condition
-    on the memory is satisfied. By default, the condition is to wait for
-    (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other
-    condition types can be specified via `flags`.
-
-    If the memory was registered via :py:obj:`~.cuMemHostRegister()`, the
-    device pointer should be obtained with
-    :py:obj:`~.cuMemHostGetDevicePointer()`. This function cannot be used
-    with managed memory (:py:obj:`~.cuMemAllocManaged`).
-
-    Support for CU_STREAM_WAIT_VALUE_NOR can be queried with
-    :py:obj:`~.cuDeviceGetAttribute()` and
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to synchronize on the memory location.
-    addr : :py:obj:`~.CUdeviceptr`
-        The memory location to wait on.
-    value : Any
-        The value to compare with the memory location.
-    flags : unsigned int
-        See :py:obj:`~.CUstreamWaitValue_flags`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuStreamWaitEvent`
-
-    Notes
-    -----
-    Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order.
-    """
-    cdef cydriver.cuuint32_t cyvalue
-    if value is None:
-        cyvalue = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(value, (cuuint32_t,)):
-        pvalue = int(value)
-        cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-    else:
-        pvalue = int(cuuint32_t(value))
-        cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-    cdef cydriver.CUdeviceptr cyaddr
-    if addr is None:
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(addr, (CUdeviceptr,)):
-        paddr = int(addr)
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    else:
-        paddr = int(CUdeviceptr(addr))
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    err = cydriver.cuStreamWaitValue32(cystream, cyaddr, cyvalue, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamWaitValue64_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamWaitValue64(stream, addr, value, unsigned int flags):
-    """ Wait on a memory location.
-
-    Enqueues a synchronization of the stream on the given memory location.
-    Work ordered after the operation will block until the given condition
-    on the memory is satisfied. By default, the condition is to wait for
-    (int64_t)(*addr - value) >= 0, a cyclic greater-or-equal. Other
-    condition types can be specified via `flags`.
-
-    If the memory was registered via :py:obj:`~.cuMemHostRegister()`, the
-    device pointer should be obtained with
-    :py:obj:`~.cuMemHostGetDevicePointer()`.
-
-    Support for this can be queried with :py:obj:`~.cuDeviceGetAttribute()`
-    and :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to synchronize on the memory location.
-    addr : :py:obj:`~.CUdeviceptr`
-        The memory location to wait on.
-    value : Any
-        The value to compare with the memory location.
-    flags : unsigned int
-        See :py:obj:`~.CUstreamWaitValue_flags`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuStreamWaitEvent`
-
-    Notes
-    -----
-    Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order.
-    """
-    cdef cydriver.cuuint64_t cyvalue
-    if value is None:
-        cyvalue = <cydriver.cuuint64_t><void_ptr>0
-    elif isinstance(value, (cuuint64_t,)):
-        pvalue = int(value)
-        cyvalue = <cydriver.cuuint64_t><void_ptr>pvalue
-    else:
-        pvalue = int(cuuint64_t(value))
-        cyvalue = <cydriver.cuuint64_t><void_ptr>pvalue
-    cdef cydriver.CUdeviceptr cyaddr
-    if addr is None:
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(addr, (CUdeviceptr,)):
-        paddr = int(addr)
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    else:
-        paddr = int(CUdeviceptr(addr))
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    err = cydriver.cuStreamWaitValue64(cystream, cyaddr, cyvalue, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamWriteValue32_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamWriteValue32(stream, addr, value, unsigned int flags):
-    """ Write a value to memory.
-
-    Write a value to memory.
-
-    If the memory was registered via :py:obj:`~.cuMemHostRegister()`, the
-    device pointer should be obtained with
-    :py:obj:`~.cuMemHostGetDevicePointer()`. This function cannot be used
-    with managed memory (:py:obj:`~.cuMemAllocManaged`).
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to do the write in.
-    addr : :py:obj:`~.CUdeviceptr`
-        The device address to write to.
-    value : Any
-        The value to write.
-    flags : unsigned int
-        See :py:obj:`~.CUstreamWriteValue_flags`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuEventRecord`
-    """
-    cdef cydriver.cuuint32_t cyvalue
-    if value is None:
-        cyvalue = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(value, (cuuint32_t,)):
-        pvalue = int(value)
-        cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-    else:
-        pvalue = int(cuuint32_t(value))
-        cyvalue = <cydriver.cuuint32_t><void_ptr>pvalue
-    cdef cydriver.CUdeviceptr cyaddr
-    if addr is None:
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(addr, (CUdeviceptr,)):
-        paddr = int(addr)
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    else:
-        paddr = int(CUdeviceptr(addr))
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    err = cydriver.cuStreamWriteValue32(cystream, cyaddr, cyvalue, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamWriteValue64_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamWriteValue64(stream, addr, value, unsigned int flags):
-    """ Write a value to memory.
-
-    Write a value to memory.
-
-    If the memory was registered via :py:obj:`~.cuMemHostRegister()`, the
-    device pointer should be obtained with
-    :py:obj:`~.cuMemHostGetDevicePointer()`.
-
-    Support for this can be queried with :py:obj:`~.cuDeviceGetAttribute()`
-    and :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to do the write in.
-    addr : :py:obj:`~.CUdeviceptr`
-        The device address to write to.
-    value : Any
-        The value to write.
-    flags : unsigned int
-        See :py:obj:`~.CUstreamWriteValue_flags`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuEventRecord`
-    """
-    cdef cydriver.cuuint64_t cyvalue
-    if value is None:
-        cyvalue = <cydriver.cuuint64_t><void_ptr>0
-    elif isinstance(value, (cuuint64_t,)):
-        pvalue = int(value)
-        cyvalue = <cydriver.cuuint64_t><void_ptr>pvalue
-    else:
-        pvalue = int(cuuint64_t(value))
-        cyvalue = <cydriver.cuuint64_t><void_ptr>pvalue
-    cdef cydriver.CUdeviceptr cyaddr
-    if addr is None:
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(addr, (CUdeviceptr,)):
-        paddr = int(addr)
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    else:
-        paddr = int(CUdeviceptr(addr))
-        cyaddr = <cydriver.CUdeviceptr><void_ptr>paddr
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    err = cydriver.cuStreamWriteValue64(cystream, cyaddr, cyvalue, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamBatchMemOp_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamBatchMemOp(stream, unsigned int count, paramArray : Optional[Tuple[CUstreamBatchMemOpParams] | List[CUstreamBatchMemOpParams]], unsigned int flags):
-    """ Batch operations to synchronize the stream via memory operations.
-
-    This is a batch version of :py:obj:`~.cuStreamWaitValue32()` and
-    :py:obj:`~.cuStreamWriteValue32()`. Batching operations may avoid some
-    performance overhead in both the API call and the device execution
-    versus adding them to the stream in separate API calls. The operations
-    are enqueued in the order they appear in the array.
-
-    See :py:obj:`~.CUstreamBatchMemOpType` for the full set of supported
-    operations, and :py:obj:`~.cuStreamWaitValue32()`,
-    :py:obj:`~.cuStreamWaitValue64()`, :py:obj:`~.cuStreamWriteValue32()`,
-    and :py:obj:`~.cuStreamWriteValue64()` for details of specific
-    operations.
-
-    See related APIs for details on querying support for specific
-    operations.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to enqueue the operations in.
-    count : unsigned int
-        The number of operations in the array. Must be less than 256.
-    paramArray : List[:py:obj:`~.CUstreamBatchMemOpParams`]
-        The types and parameters of the individual operations.
-    flags : unsigned int
-        Reserved for future expansion; must be 0.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuMemHostRegister`
-
-    Notes
-    -----
-    Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. For more information, see the Stream Memory Operations section in the programming guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html).
-    """
-    paramArray = [] if paramArray is None else paramArray
-    if not all(isinstance(_x, (CUstreamBatchMemOpParams,)) for _x in paramArray):
-        raise TypeError("Argument 'paramArray' is not instance of type (expected Tuple[cydriver.CUstreamBatchMemOpParams,] or List[cydriver.CUstreamBatchMemOpParams,]")
-    cdef cydriver.CUstream cystream
-    if stream is None:
-        cystream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(stream, (CUstream,)):
-        pstream = int(stream)
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    else:
-        pstream = int(CUstream(stream))
-        cystream = <cydriver.CUstream><void_ptr>pstream
-    if count > len(paramArray): raise RuntimeError("List is too small: " + str(len(paramArray)) + " < " + str(count))
-    cdef cydriver.CUstreamBatchMemOpParams* cyparamArray = NULL
-    if len(paramArray) > 0:
-        cyparamArray = <cydriver.CUstreamBatchMemOpParams*> calloc(len(paramArray), sizeof(cydriver.CUstreamBatchMemOpParams))
-        if cyparamArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramArray)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams)))
-        for idx in range(len(paramArray)):
-            string.memcpy(&cyparamArray[idx], (<CUstreamBatchMemOpParams>paramArray[idx])._ptr, sizeof(cydriver.CUstreamBatchMemOpParams))
-    err = cydriver.cuStreamBatchMemOp(cystream, count, (<CUstreamBatchMemOpParams>paramArray[0])._ptr if len(paramArray) == 1 else cyparamArray, flags)
-    if cyparamArray is not NULL:
-        free(cyparamArray)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncGetAttribute(attrib not None : CUfunction_attribute, hfunc):
-    """ Returns information about a function.
-
-    Returns in `*pi` the integer value of the attribute `attrib` on the
-    kernel given by `hfunc`. The supported attributes are:
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`: The maximum
-      number of threads per block, beyond which a launch of the function
-      would fail. This number depends on both the function and the device
-      on which the function is currently loaded.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`: The size in bytes of
-      statically-allocated shared memory per block required by this
-      function. This does not include dynamically-allocated shared memory
-      requested by the user at runtime.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`: The size in bytes of
-      user-allocated constant memory required by this function.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`: The size in bytes of
-      local memory used by each thread of this function.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NUM_REGS`: The number of registers used
-      by each thread of this function.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PTX_VERSION`: The PTX virtual
-      architecture version for which the function was compiled. This value
-      is the major PTX version * 10
-
-      - the minor PTX version, so a PTX version 1.3 function would return
-        the value 13. Note that this may return the undefined value of 0
-        for cubins compiled prior to CUDA 3.0.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_BINARY_VERSION`: The binary architecture
-      version for which the function was compiled. This value is the major
-      binary version * 10 + the minor binary version, so a binary version
-      1.3 function would return the value 13. Note that this will return a
-      value of 10 for legacy cubins that do not have a properly-encoded
-      binary architecture version.
-
-    - :py:obj:`~.CU_FUNC_CACHE_MODE_CA`: The attribute to indicate whether
-      the function has been compiled with user specified option "-Xptxas
-      --dlcm=ca" set .
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`: The
-      maximum size in bytes of dynamically-allocated shared memory.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`:
-      Preferred shared memory-L1 cache split ratio in percent of total
-      shared memory.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET`: If this
-      attribute is set, the kernel must launch with a valid cluster size
-      specified.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`: The required
-      cluster width in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`: The required
-      cluster height in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`: The required
-      cluster depth in blocks.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`:
-      Indicates whether the function can be launched with non-portable
-      cluster size. 1 is allowed, 0 is disallowed. A non-portable cluster
-      size may only function on the specific SKUs the program is tested on.
-      The launch might fail if the program is run on a different hardware
-      platform. CUDA API provides cudaOccupancyMaxActiveClusters to assist
-      with checking whether the desired size can be launched on the current
-      device. A portable cluster size is guaranteed to be functional on all
-      compute capabilities higher than the target compute capability. The
-      portable cluster size for sm_90 is 8 blocks per cluster. This value
-      may increase for future compute capabilities. The specific hardware
-      unit may support higher cluster sizes that’s not guaranteed to be
-      portable.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`:
-      The block scheduling policy of a function. The value type is
-      CUclusterSchedulingPolicy.
-
-    With a few execeptions, function attributes may also be queried on
-    unloaded function handles returned from
-    :py:obj:`~.cuModuleEnumerateFunctions`.
-    :py:obj:`~.CUDA_ERROR_FUNCTION_NOT_LOADED` is returned if the attribute
-    requires a fully loaded function but the function is not loaded. The
-    loading state of a function may be queried using
-    :py:obj:`~.cuFuncIsloaded`. :py:obj:`~.cuFuncLoad` may be called to
-    explicitly load a function before querying the following attributes
-    that require the function to be loaded:
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUfunction_attribute`
-        Attribute requested
-    hfunc : :py:obj:`~.CUfunction`
-        Function to query attribute of
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_FUNCTION_NOT_LOADED`
-    pi : int
-        Returned attribute value
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuFuncIsLoaded`, :py:obj:`~.cuFuncLoad`, :py:obj:`~.cuKernelGetAttribute`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef int pi = 0
-    cdef cydriver.CUfunction_attribute cyattrib = attrib.value
-    err = cydriver.cuFuncGetAttribute(&pi, cyattrib, cyhfunc)
-    return (CUresult(err), pi)
-{{endif}}
-
-{{if 'cuFuncSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncSetAttribute(hfunc, attrib not None : CUfunction_attribute, int value):
-    """ Sets information about a function.
-
-    This call sets the value of a specified attribute `attrib` on the
-    kernel given by `hfunc` to an integer value specified by `val` This
-    function returns CUDA_SUCCESS if the new value of the attribute could
-    be successfully set. If the set fails, this call will return an error.
-    Not all attributes can have values set. Attempting to set a value on a
-    read-only attribute will result in an error (CUDA_ERROR_INVALID_VALUE)
-
-    Supported attributes for the cuFuncSetAttribute call are:
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES`: This
-      maximum size in bytes of dynamically-allocated shared memory. The
-      value should contain the requested maximum size of dynamically-
-      allocated shared memory. The sum of this value and the function
-      attribute :py:obj:`~.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES` cannot
-      exceed the device attribute
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN`.
-      The maximal size of requestable dynamic shared memory may differ by
-      GPU architecture.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`: On
-      devices where the L1 cache and shared memory use the same hardware
-      resources, this sets the shared memory carveout preference, in
-      percent of the total shared memory. See
-      :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`
-      This is only a hint, and the driver can choose a different ratio if
-      required to execute the function.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH`: The required
-      cluster width in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT`: The required
-      cluster height in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH`: The required
-      cluster depth in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return CUDA_ERROR_NOT_PERMITTED.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED`:
-      Indicates whether the function can be launched with non-portable
-      cluster size. 1 is allowed, 0 is disallowed.
-
-    - :py:obj:`~.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`:
-      The block scheduling policy of a function. The value type is
-      CUclusterSchedulingPolicy.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Function to query attribute of
-    attrib : :py:obj:`~.CUfunction_attribute`
-        Attribute requested
-    value : int
-        The value to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef cydriver.CUfunction_attribute cyattrib = attrib.value
-    err = cydriver.cuFuncSetAttribute(cyhfunc, cyattrib, value)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncSetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncSetCacheConfig(hfunc, config not None : CUfunc_cache):
-    """ Sets the preferred cache configuration for a device function.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this sets through `config` the preferred cache configuration
-    for the device function `hfunc`. This is only a preference. The driver
-    will use the requested configuration if possible, but it is free to
-    choose a different configuration if required to execute `hfunc`. Any
-    context-wide preference set via :py:obj:`~.cuCtxSetCacheConfig()` will
-    be overridden by this per-function setting unless the per-function
-    setting is :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`. In that case, the
-    current context-wide setting will be used.
-
-    This setting does nothing on devices where the size of the L1 cache and
-    shared memory are fixed.
-
-    Launching a kernel with a different preference than the most recent
-    preference setting may insert a device-side synchronization point.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_NONE`: no preference for shared
-      memory or L1 (default)
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_SHARED`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_L1`: prefer larger L1 cache and
-      smaller shared memory
-
-    - :py:obj:`~.CU_FUNC_CACHE_PREFER_EQUAL`: prefer equal sized L1 cache
-      and shared memory
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to configure cache for
-    config : :py:obj:`~.CUfunc_cache`
-        Requested cache configuration
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuKernelSetCacheConfig`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef cydriver.CUfunc_cache cyconfig = config.value
-    err = cydriver.cuFuncSetCacheConfig(cyhfunc, cyconfig)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncGetModule' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncGetModule(hfunc):
-    """ Returns a module handle.
-
-    Returns in `*hmod` the handle of the module that function `hfunc` is
-    located in. The lifetime of the module corresponds to the lifetime of
-    the context it was loaded in or until the module is explicitly
-    unloaded.
-
-    The CUDA runtime manages its own modules loaded into the primary
-    context. If the handle returned by this API refers to a module loaded
-    by the CUDA runtime, calling :py:obj:`~.cuModuleUnload()` on that
-    module will result in undefined behavior.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Function to retrieve module for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-    hmod : :py:obj:`~.CUmodule`
-        Returned module handle
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef CUmodule hmod = CUmodule()
-    err = cydriver.cuFuncGetModule(<cydriver.CUmodule*>hmod._ptr, cyhfunc)
-    return (CUresult(err), hmod)
-{{endif}}
-
-{{if 'cuFuncGetName' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncGetName(hfunc):
-    """ Returns the function name for a :py:obj:`~.CUfunction` handle.
-
-    Returns in `**name` the function name associated with the function
-    handle `hfunc` . The function name is returned as a null-terminated
-    string. The returned name is only valid when the function handle is
-    valid. If the module is unloaded or reloaded, one must call the API
-    again to get the updated name. This API may return a mangled name if
-    the function is not declared as having C linkage. If either `**name` or
-    `hfunc` is NULL, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        The function handle to retrieve the name for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    name : bytes
-        The returned name of the function
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef const char* name = NULL
-    err = cydriver.cuFuncGetName(&name, cyhfunc)
-    return (CUresult(err), <bytes>name)
-{{endif}}
-
-{{if 'cuFuncGetParamInfo' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncGetParamInfo(func, size_t paramIndex):
-    """ Returns the offset and size of a kernel parameter in the device-side parameter layout.
-
-    Queries the kernel parameter at `paramIndex` into `func's` list of
-    parameters, and returns in `paramOffset` and `paramSize` the offset and
-    size, respectively, where the parameter will reside in the device-side
-    parameter layout. This information can be used to update kernel node
-    parameters from the device via
-    :py:obj:`~.cudaGraphKernelNodeSetParam()` and
-    :py:obj:`~.cudaGraphKernelNodeUpdatesApply()`. `paramIndex` must be
-    less than the number of parameters that `func` takes. `paramSize` can
-    be set to NULL if only the parameter offset is desired.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        The function to query
-    paramIndex : size_t
-        The parameter index to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    paramOffset : int
-        Returns the offset into the device-side parameter layout at which
-        the parameter resides
-    paramSize : int
-        Optionally returns the size of the parameter in the device-side
-        parameter layout
-
-    See Also
-    --------
-    :py:obj:`~.cuKernelGetParamInfo`
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef size_t paramOffset = 0
-    cdef size_t paramSize = 0
-    err = cydriver.cuFuncGetParamInfo(cyfunc, paramIndex, &paramOffset, &paramSize)
-    return (CUresult(err), paramOffset, paramSize)
-{{endif}}
-
-{{if 'cuFuncIsLoaded' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncIsLoaded(function):
-    """ Returns if the function is loaded.
-
-    Returns in `state` the loading state of `function`.
-
-    Parameters
-    ----------
-    function : :py:obj:`~.CUfunction`
-        the function to check
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    state : :py:obj:`~.CUfunctionLoadingState`
-        returned loading state
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncLoad`, :py:obj:`~.cuModuleEnumerateFunctions`
-    """
-    cdef cydriver.CUfunction cyfunction
-    if function is None:
-        cyfunction = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(function, (CUfunction,)):
-        pfunction = int(function)
-        cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-    else:
-        pfunction = int(CUfunction(function))
-        cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-    cdef cydriver.CUfunctionLoadingState state
-    err = cydriver.cuFuncIsLoaded(&state, cyfunction)
-    return (CUresult(err), CUfunctionLoadingState(state))
-{{endif}}
-
-{{if 'cuFuncLoad' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncLoad(function):
-    """ Loads a function.
-
-    Finalizes function loading for `function`. Calling this API with a
-    fully loaded function has no effect.
-
-    Parameters
-    ----------
-    function : :py:obj:`~.CUfunction`
-        the function to load
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleEnumerateFunctions`, :py:obj:`~.cuFuncIsLoaded`
-    """
-    cdef cydriver.CUfunction cyfunction
-    if function is None:
-        cyfunction = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(function, (CUfunction,)):
-        pfunction = int(function)
-        cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-    else:
-        pfunction = int(CUfunction(function))
-        cyfunction = <cydriver.CUfunction><void_ptr>pfunction
-    err = cydriver.cuFuncLoad(cyfunction)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchKernel' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchKernel(f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hStream, kernelParams, void_ptr extra):
-    """ Launches a CUDA function :py:obj:`~.CUfunction` or a CUDA kernel :py:obj:`~.CUkernel`.
-
-    Invokes the function :py:obj:`~.CUfunction` or the kernel
-    :py:obj:`~.CUkernel` `f` on a `gridDimX` x `gridDimY` x `gridDimZ` grid
-    of blocks. Each block contains `blockDimX` x `blockDimY` x `blockDimZ`
-    threads.
-
-    `sharedMemBytes` sets the amount of dynamic shared memory that will be
-    available to each thread block.
-
-    Kernel parameters to `f` can be specified in one of two ways:
-
-    1) Kernel parameters can be specified via `kernelParams`. If `f` has N
-    parameters, then `kernelParams` needs to be an array of N pointers.
-    Each of `kernelParams`[0] through `kernelParams`[N-1] must point to a
-    region of memory from which the actual kernel parameter will be copied.
-    The number of kernel parameters and their offsets and sizes do not need
-    to be specified as that information is retrieved directly from the
-    kernel's image.
-
-    2) Kernel parameters can also be packaged by the application into a
-    single buffer that is passed in via the `extra` parameter. This places
-    the burden on the application of knowing each kernel parameter's size
-    and alignment/padding within the buffer. Here is an example of using
-    the `extra` parameter in this manner:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The `extra` parameter exists to allow :py:obj:`~.cuLaunchKernel` to
-    take additional less commonly used arguments. `extra` specifies a list
-    of names of extra settings and their corresponding values. Each extra
-    setting name is immediately followed by the corresponding value. The
-    list must be terminated with either NULL or
-    :py:obj:`~.CU_LAUNCH_PARAM_END`.
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_END`, which indicates the end of the
-      `extra` array;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`, which specifies that the
-      next value in `extra` will be a pointer to a buffer containing all
-      the kernel parameters for launching kernel `f`;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE`, which specifies that the
-      next value in `extra` will be a pointer to a size_t containing the
-      size of the buffer specified with
-      :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`;
-
-    The error :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned if
-    kernel parameters are specified with both `kernelParams` and `extra`
-    (i.e. both `kernelParams` and `extra` are non-NULL).
-
-    Calling :py:obj:`~.cuLaunchKernel()` invalidates the persistent
-    function state set through the following deprecated APIs:
-    :py:obj:`~.cuFuncSetBlockShape()`, :py:obj:`~.cuFuncSetSharedSize()`,
-    :py:obj:`~.cuParamSetSize()`, :py:obj:`~.cuParamSeti()`,
-    :py:obj:`~.cuParamSetf()`, :py:obj:`~.cuParamSetv()`.
-
-    Note that to use :py:obj:`~.cuLaunchKernel()`, the kernel `f` must
-    either have been compiled with toolchain version 3.2 or later so that
-    it will contain kernel parameter information, or have no kernel
-    parameters. If either of these conditions is not met, then
-    :py:obj:`~.cuLaunchKernel()` will return
-    :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`.
-
-    Note that the API can also be used to launch context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to launch the
-    kernel on will either be taken from the specified stream `hStream` or
-    the current context in case of NULL stream.
-
-    Parameters
-    ----------
-    f : :py:obj:`~.CUfunction`
-        Function :py:obj:`~.CUfunction` or Kernel :py:obj:`~.CUkernel` to
-        launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : List[Any]
-        Extra options
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    cykernelParams = utils.HelperKernelParams(kernelParams)
-    err = cydriver.cuLaunchKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, <void**><void_ptr>cykernelParams.ckernelParams, <void**>extra)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchKernelEx' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchKernelEx(config : Optional[CUlaunchConfig], f, kernelParams, void_ptr extra):
-    """ Launches a CUDA function :py:obj:`~.CUfunction` or a CUDA kernel :py:obj:`~.CUkernel` with launch-time configuration.
-
-    Invokes the function :py:obj:`~.CUfunction` or the kernel
-    :py:obj:`~.CUkernel` `f` with the specified launch-time configuration
-    `config`.
-
-    The :py:obj:`~.CUlaunchConfig` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.CUlaunchConfig.gridDimX` is the width of the grid in
-      blocks.
-
-    - :py:obj:`~.CUlaunchConfig.gridDimY` is the height of the grid in
-      blocks.
-
-    - :py:obj:`~.CUlaunchConfig.gridDimZ` is the depth of the grid in
-      blocks.
-
-    - :py:obj:`~.CUlaunchConfig.blockDimX` is the X dimension of each
-      thread block.
-
-    - :py:obj:`~.CUlaunchConfig.blockDimX` is the Y dimension of each
-      thread block.
-
-    - :py:obj:`~.CUlaunchConfig.blockDimZ` is the Z dimension of each
-      thread block.
-
-    - :py:obj:`~.CUlaunchConfig.sharedMemBytes` is the dynamic shared-
-      memory size per thread block in bytes.
-
-    - :py:obj:`~.CUlaunchConfig.hStream` is the handle to the stream to
-      perform the launch in. The CUDA context associated with this stream
-      must match that associated with function f.
-
-    - :py:obj:`~.CUlaunchConfig.attrs` is an array of
-      :py:obj:`~.CUlaunchConfig.numAttrs` continguous
-      :py:obj:`~.CUlaunchAttribute` elements. The value of this pointer is
-      not considered if :py:obj:`~.CUlaunchConfig.numAttrs` is zero.
-      However, in that case, it is recommended to set the pointer to NULL.
-
-    - :py:obj:`~.CUlaunchConfig.numAttrs` is the number of attributes
-      populating the first :py:obj:`~.CUlaunchConfig.numAttrs` positions of
-      the :py:obj:`~.CUlaunchConfig.attrs` array.
-
-    Launch-time configuration is specified by adding entries to
-    :py:obj:`~.CUlaunchConfig.attrs`. Each entry is an attribute ID and a
-    corresponding attribute value.
-
-    The :py:obj:`~.CUlaunchAttribute` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.CUlaunchAttribute.id` is a unique enum identifying the
-      attribute.
-
-    - :py:obj:`~.CUlaunchAttribute.value` is a union that hold the
-      attribute value.
-
-    An example of using the `config` parameter:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The :py:obj:`~.CUlaunchAttributeID` enum is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    and the corresponding :py:obj:`~.CUlaunchAttributeValue` union as :
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Setting :py:obj:`~.CU_LAUNCH_ATTRIBUTE_COOPERATIVE` to a non-zero value
-    causes the kernel launch to be a cooperative launch, with exactly the
-    same usage and semantics of :py:obj:`~.cuLaunchCooperativeKernel`.
-
-    Setting
-    :py:obj:`~.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION` to a
-    non-zero values causes the kernel to use programmatic means to resolve
-    its stream dependency -- enabling the CUDA runtime to opportunistically
-    allow the grid's execution to overlap with the previous kernel in the
-    stream, if that kernel requests the overlap.
-
-    :py:obj:`~.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT` records an event
-    along with the kernel launch. Event recorded through this launch
-    attribute is guaranteed to only trigger after all block in the
-    associated kernel trigger the event. A block can trigger the event
-    through PTX launchdep.release or CUDA builtin function
-    cudaTriggerProgrammaticLaunchCompletion(). A trigger can also be
-    inserted at the beginning of each block's execution if
-    triggerAtBlockStart is set to non-0. Note that dependents (including
-    the CPU thread calling :py:obj:`~.cuEventSynchronize()`) are not
-    guaranteed to observe the release precisely when it is released. For
-    example, :py:obj:`~.cuEventSynchronize()` may only observe the event
-    trigger long after the associated kernel has completed. This recording
-    type is primarily meant for establishing programmatic dependency
-    between device tasks. The event supplied must not be an interprocess or
-    interop event. The event must disable timing (i.e. created with
-    :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-
-    :py:obj:`~.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT` records an
-    event along with the kernel launch. Nominally, the event is triggered
-    once all blocks of the kernel have begun execution. Currently this is a
-    best effort. If a kernel B has a launch completion dependency on a
-    kernel A, B may wait until A is complete. Alternatively, blocks of B
-    may begin before all blocks of A have begun, for example:
-
-    - If B can claim execution resources unavaiable to A, for example if
-      they run on different GPUs.
-
-    - If B is a higher priority than A.
-
-    Exercise caution if such an ordering inversion could lead to deadlock.
-    The event supplied must not be an interprocess or interop event. The
-    event must disable timing (i.e. must be created with the
-    :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-
-    Setting :py:obj:`~.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE` to
-    1 on a captured launch causes the resulting kernel node to be device-
-    updatable. This attribute is specific to graphs, and passing it to a
-    launch in a non-capturing stream results in an error. Passing a value
-    other than 0 or 1 is not allowed.
-
-    On success, a handle will be returned via
-    :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    which can be passed to the various device-side update functions to
-    update the node's kernel parameters from within another kernel. For
-    more information on the types of device updates that can be made, as
-    well as the relevant limitations thereof, see
-    :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-
-    Kernel nodes which are device-updatable have additional restrictions
-    compared to regular kernel nodes. Firstly, device-updatable nodes
-    cannot be removed from their graph via :py:obj:`~.cuGraphDestroyNode`.
-    Additionally, once opted-in to this functionality, a node cannot opt
-    out, and any attempt to set the attribute to 0 will result in an error.
-    Graphs containing one or more device-updatable node also do not allow
-    multiple instantiation.
-
-    The effect of other attributes is consistent with their effect when set
-    via persistent APIs.
-
-    See :py:obj:`~.cuStreamSetAttribute` for
-
-    - :py:obj:`~.CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW`
-
-    - :py:obj:`~.CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY`
-
-    See :py:obj:`~.cuFuncSetAttribute` for
-
-    - :py:obj:`~.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION`
-
-    - :py:obj:`~.CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE`
-
-    Kernel parameters to `f` can be specified in the same ways that they
-    can be using :py:obj:`~.cuLaunchKernel`.
-
-    Note that the API can also be used to launch context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to launch the
-    kernel on will either be taken from the specified stream
-    :py:obj:`~.CUlaunchConfig.hStream` or the current context in case of
-    NULL stream.
-
-    Parameters
-    ----------
-    config : :py:obj:`~.CUlaunchConfig`
-        Config to launch
-    f : :py:obj:`~.CUfunction`
-        Function :py:obj:`~.CUfunction` or Kernel :py:obj:`~.CUkernel` to
-        launch
-    kernelParams : Any
-        Array of pointers to kernel parameters
-    extra : List[Any]
-        Extra options
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaLaunchKernelEx`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    """
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._ptr if config != None else NULL
-    cykernelParams = utils.HelperKernelParams(kernelParams)
-    err = cydriver.cuLaunchKernelEx(cyconfig_ptr, cyf, <void**><void_ptr>cykernelParams.ckernelParams, <void**>extra)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernel' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchCooperativeKernel(f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, hStream, kernelParams):
-    """ Launches a CUDA function :py:obj:`~.CUfunction` or a CUDA kernel :py:obj:`~.CUkernel` where thread blocks can cooperate and synchronize as they execute.
-
-    Invokes the function :py:obj:`~.CUfunction` or the kernel
-    :py:obj:`~.CUkernel` `f` on a `gridDimX` x `gridDimY` x `gridDimZ` grid
-    of blocks. Each block contains `blockDimX` x `blockDimY` x `blockDimZ`
-    threads.
-
-    `sharedMemBytes` sets the amount of dynamic shared memory that will be
-    available to each thread block.
-
-    The device on which this kernel is invoked must have a non-zero value
-    for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`.
-
-    The total number of blocks launched cannot exceed the maximum number of
-    blocks per multiprocessor as returned by
-    :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor` (or
-    :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`) times
-    the number of multiprocessors as specified by the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`.
-
-    The kernel cannot make use of CUDA dynamic parallelism.
-
-    Kernel parameters must be specified via `kernelParams`. If `f` has N
-    parameters, then `kernelParams` needs to be an array of N pointers.
-    Each of `kernelParams`[0] through `kernelParams`[N-1] must point to a
-    region of memory from which the actual kernel parameter will be copied.
-    The number of kernel parameters and their offsets and sizes do not need
-    to be specified as that information is retrieved directly from the
-    kernel's image.
-
-    Calling :py:obj:`~.cuLaunchCooperativeKernel()` sets persistent
-    function state that is the same as function state set through
-    :py:obj:`~.cuLaunchKernel` API
-
-    When the kernel `f` is launched via
-    :py:obj:`~.cuLaunchCooperativeKernel()`, the previous block shape,
-    shared size and parameter info associated with `f` is overwritten.
-
-    Note that to use :py:obj:`~.cuLaunchCooperativeKernel()`, the kernel
-    `f` must either have been compiled with toolchain version 3.2 or later
-    so that it will contain kernel parameter information, or have no kernel
-    parameters. If either of these conditions is not met, then
-    :py:obj:`~.cuLaunchCooperativeKernel()` will return
-    :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`.
-
-    Note that the API can also be used to launch context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to launch the
-    kernel on will either be taken from the specified stream `hStream` or
-    the current context in case of NULL stream.
-
-    Parameters
-    ----------
-    f : :py:obj:`~.CUfunction`
-        Function :py:obj:`~.CUfunction` or Kernel :py:obj:`~.CUkernel` to
-        launch
-    gridDimX : unsigned int
-        Width of grid in blocks
-    gridDimY : unsigned int
-        Height of grid in blocks
-    gridDimZ : unsigned int
-        Depth of grid in blocks
-    blockDimX : unsigned int
-        X dimension of each thread block
-    blockDimY : unsigned int
-        Y dimension of each thread block
-    blockDimZ : unsigned int
-        Z dimension of each thread block
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-    kernelParams : Any
-        Array of pointers to kernel parameters
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`, :py:obj:`~.CUDA_ERROR_NOT_FOUND`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchCooperativeKernelMultiDevice`, :py:obj:`~.cudaLaunchCooperativeKernel`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    cykernelParams = utils.HelperKernelParams(kernelParams)
-    err = cydriver.cuLaunchCooperativeKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, <void**><void_ptr>cykernelParams.ckernelParams)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchCooperativeKernelMultiDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchCooperativeKernelMultiDevice(launchParamsList : Optional[Tuple[CUDA_LAUNCH_PARAMS] | List[CUDA_LAUNCH_PARAMS]], unsigned int numDevices, unsigned int flags):
-    """ Launches CUDA functions on multiple devices where thread blocks can cooperate and synchronize as they execute.
-
-    [Deprecated]
-
-    Invokes kernels as specified in the `launchParamsList` array where each
-    element of the array specifies all the parameters required to perform a
-    single kernel launch. These kernels can cooperate and synchronize as
-    they execute. The size of the array is specified by `numDevices`.
-
-    No two kernels can be launched on the same device. All the devices
-    targeted by this multi-device launch must be identical. All devices
-    must have a non-zero value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`.
-
-    All kernels launched must be identical with respect to the compiled
-    code. Note that any device, constant or managed variables present in
-    the module that owns the kernel launched on each device, are
-    independently instantiated on every device. It is the application's
-    responsibility to ensure these variables are initialized and used
-    appropriately.
-
-    The size of the grids as specified in blocks, the size of the blocks
-    themselves and the amount of shared memory used by each thread block
-    must also match across all launched kernels.
-
-    The streams used to launch these kernels must have been created via
-    either :py:obj:`~.cuStreamCreate` or
-    :py:obj:`~.cuStreamCreateWithPriority`. The NULL stream or
-    :py:obj:`~.CU_STREAM_LEGACY` or :py:obj:`~.CU_STREAM_PER_THREAD` cannot
-    be used.
-
-    The total number of blocks launched per kernel cannot exceed the
-    maximum number of blocks per multiprocessor as returned by
-    :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor` (or
-    :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`) times
-    the number of multiprocessors as specified by the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`. Since the total
-    number of blocks launched per device has to match across all devices,
-    the maximum number of blocks that can be launched per device will be
-    limited by the device with the least number of multiprocessors.
-
-    The kernels cannot make use of CUDA dynamic parallelism.
-
-    The :py:obj:`~.CUDA_LAUNCH_PARAMS` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.function` specifies the kernel to be
-      launched. All functions must be identical with respect to the
-      compiled code. Note that you can also specify context-less kernel
-      :py:obj:`~.CUkernel` by querying the handle using
-      :py:obj:`~.cuLibraryGetKernel()` and then casting to
-      :py:obj:`~.CUfunction`. In this case, the context to launch the
-      kernel on be taken from the specified stream
-      :py:obj:`~.CUDA_LAUNCH_PARAMS.hStream`.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.gridDimX` is the width of the grid in
-      blocks. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.gridDimY` is the height of the grid in
-      blocks. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.gridDimZ` is the depth of the grid in
-      blocks. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.blockDimX` is the X dimension of each
-      thread block. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.blockDimX` is the Y dimension of each
-      thread block. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.blockDimZ` is the Z dimension of each
-      thread block. This must match across all kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.sharedMemBytes` is the dynamic shared-
-      memory size per thread block in bytes. This must match across all
-      kernels launched.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.hStream` is the handle to the stream to
-      perform the launch in. This cannot be the NULL stream or
-      :py:obj:`~.CU_STREAM_LEGACY` or :py:obj:`~.CU_STREAM_PER_THREAD`. The
-      CUDA context associated with this stream must match that associated
-      with :py:obj:`~.CUDA_LAUNCH_PARAMS.function`.
-
-    - :py:obj:`~.CUDA_LAUNCH_PARAMS.kernelParams` is an array of pointers
-      to kernel parameters. If :py:obj:`~.CUDA_LAUNCH_PARAMS.function` has
-      N parameters, then :py:obj:`~.CUDA_LAUNCH_PARAMS.kernelParams` needs
-      to be an array of N pointers. Each of
-      :py:obj:`~.CUDA_LAUNCH_PARAMS.kernelParams`[0] through
-      :py:obj:`~.CUDA_LAUNCH_PARAMS.kernelParams`[N-1] must point to a
-      region of memory from which the actual kernel parameter will be
-      copied. The number of kernel parameters and their offsets and sizes
-      do not need to be specified as that information is retrieved directly
-      from the kernel's image.
-
-    By default, the kernel won't begin execution on any GPU until all prior
-    work in all the specified streams has completed. This behavior can be
-    overridden by specifying the flag
-    :py:obj:`~.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC`.
-    When this flag is specified, each kernel will only wait for prior work
-    in the stream corresponding to that GPU to complete before it begins
-    execution.
-
-    Similarly, by default, any subsequent work pushed in any of the
-    specified streams will not begin execution until the kernels on all
-    GPUs have completed. This behavior can be overridden by specifying the
-    flag
-    :py:obj:`~.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC`.
-    When this flag is specified, any subsequent work pushed in any of the
-    specified streams will only wait for the kernel launched on the GPU
-    corresponding to that stream to complete before it begins execution.
-
-    Calling :py:obj:`~.cuLaunchCooperativeKernelMultiDevice()` sets
-    persistent function state that is the same as function state set
-    through :py:obj:`~.cuLaunchKernel` API when called individually for
-    each element in `launchParamsList`.
-
-    When kernels are launched via
-    :py:obj:`~.cuLaunchCooperativeKernelMultiDevice()`, the previous block
-    shape, shared size and parameter info associated with each
-    :py:obj:`~.CUDA_LAUNCH_PARAMS.function` in `launchParamsList` is
-    overwritten.
-
-    Note that to use :py:obj:`~.cuLaunchCooperativeKernelMultiDevice()`,
-    the kernels must either have been compiled with toolchain version 3.2
-    or later so that it will contain kernel parameter information, or have
-    no kernel parameters. If either of these conditions is not met, then
-    :py:obj:`~.cuLaunchCooperativeKernelMultiDevice()` will return
-    :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`.
-
-    Parameters
-    ----------
-    launchParamsList : List[:py:obj:`~.CUDA_LAUNCH_PARAMS`]
-        List of launch parameters, one per device
-    numDevices : unsigned int
-        Size of the `launchParamsList` array
-    flags : unsigned int
-        Flags to control launch behavior
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_IMAGE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchCooperativeKernel`, :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice`
-    """
-    launchParamsList = [] if launchParamsList is None else launchParamsList
-    if not all(isinstance(_x, (CUDA_LAUNCH_PARAMS,)) for _x in launchParamsList):
-        raise TypeError("Argument 'launchParamsList' is not instance of type (expected Tuple[cydriver.CUDA_LAUNCH_PARAMS,] or List[cydriver.CUDA_LAUNCH_PARAMS,]")
-    cdef cydriver.CUDA_LAUNCH_PARAMS* cylaunchParamsList = NULL
-    if len(launchParamsList) > 0:
-        cylaunchParamsList = <cydriver.CUDA_LAUNCH_PARAMS*> calloc(len(launchParamsList), sizeof(cydriver.CUDA_LAUNCH_PARAMS))
-        if cylaunchParamsList is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(launchParamsList)) + 'x' + str(sizeof(cydriver.CUDA_LAUNCH_PARAMS)))
-        for idx in range(len(launchParamsList)):
-            string.memcpy(&cylaunchParamsList[idx], (<CUDA_LAUNCH_PARAMS>launchParamsList[idx])._ptr, sizeof(cydriver.CUDA_LAUNCH_PARAMS))
-    if numDevices > len(launchParamsList): raise RuntimeError("List is too small: " + str(len(launchParamsList)) + " < " + str(numDevices))
-    err = cydriver.cuLaunchCooperativeKernelMultiDevice((<CUDA_LAUNCH_PARAMS>launchParamsList[0])._ptr if len(launchParamsList) == 1 else cylaunchParamsList, numDevices, flags)
-    if cylaunchParamsList is not NULL:
-        free(cylaunchParamsList)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchHostFunc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchHostFunc(hStream, fn, userData):
-    """ Enqueues a host function call in a stream.
-
-    Enqueues a host function to run in a stream. The function will be
-    called after currently enqueued work and will block work added after
-    it.
-
-    The host function must not make any CUDA API calls. Attempting to use a
-    CUDA API may result in :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, but this
-    is not required. The host function must not perform any synchronization
-    that may depend on outstanding CUDA work not mandated to run earlier.
-    Host functions without a mandated order (such as in independent
-    streams) execute in undefined order and may be serialized.
-
-    For the purposes of Unified Memory, execution makes a number of
-    guarantees:
-
-    - The stream is considered idle for the duration of the function's
-      execution. Thus, for example, the function may always use memory
-      attached to the stream it was enqueued in.
-
-    - The start of execution of the function has the same effect as
-      synchronizing an event recorded in the same stream immediately prior
-      to the function. It thus synchronizes streams which have been
-      "joined" prior to the function.
-
-    - Adding device work to any stream does not have the effect of making
-      the stream active until all preceding host functions and stream
-      callbacks have executed. Thus, for example, a function might use
-      global attached memory even if work has been added to another stream,
-      if the work has been ordered behind the function call with an event.
-
-    - Completion of the function does not cause a stream to become active
-      except as described above. The stream will remain idle if no device
-      work follows the function, and will remain idle across consecutive
-      host functions or stream callbacks without device work in between.
-      Thus, for example, stream synchronization can be done by signaling
-      from a host function at the end of the stream.
-
-    Note that, in contrast to :py:obj:`~.cuStreamAddCallback`, the function
-    will not be called in the event of an error in the CUDA context.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue function call in
-    fn : :py:obj:`~.CUhostFn`
-        The function to call once preceding stream operations are complete
-    userData : Any
-        User-specified data to be passed to the function
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cuStreamAddCallback`
-    """
-    cdef cydriver.CUhostFn cyfn
-    if fn is None:
-        cyfn = <cydriver.CUhostFn><void_ptr>0
-    elif isinstance(fn, (CUhostFn,)):
-        pfn = int(fn)
-        cyfn = <cydriver.CUhostFn><void_ptr>pfn
-    else:
-        pfn = int(CUhostFn(fn))
-        cyfn = <cydriver.CUhostFn><void_ptr>pfn
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    err = cydriver.cuLaunchHostFunc(cyhStream, cyfn, cyuserData_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncSetBlockShape' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncSetBlockShape(hfunc, int x, int y, int z):
-    """ Sets the block-dimensions for the function.
-
-    [Deprecated]
-
-    Specifies the `x`, `y`, and `z` dimensions of the thread blocks that
-    are created when the kernel given by `hfunc` is launched.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to specify dimensions of
-    x : int
-        X dimension
-    y : int
-        Y dimension
-    z : int
-        Z dimension
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuFuncSetBlockShape(cyhfunc, x, y, z)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncSetSharedSize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncSetSharedSize(hfunc, unsigned int numbytes):
-    """ Sets the dynamic shared-memory size for the function.
-
-    [Deprecated]
-
-    Sets through `numbytes` the amount of dynamic shared memory that will
-    be available to each thread block when the kernel given by `hfunc` is
-    launched.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to specify dynamic shared-memory size for
-    numbytes : unsigned int
-        Dynamic shared-memory size per thread in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuFuncSetSharedSize(cyhfunc, numbytes)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuParamSetSize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuParamSetSize(hfunc, unsigned int numbytes):
-    """ Sets the parameter size for the function.
-
-    [Deprecated]
-
-    Sets through `numbytes` the total size in bytes needed by the function
-    parameters of the kernel corresponding to `hfunc`.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to set parameter size for
-    numbytes : unsigned int
-        Size of parameter list in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuParamSetSize(cyhfunc, numbytes)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuParamSeti' in found_functions}}
-
-@cython.embedsignature(True)
-def cuParamSeti(hfunc, int offset, unsigned int value):
-    """ Adds an integer parameter to the function's argument list.
-
-    [Deprecated]
-
-    Sets an integer parameter that will be specified the next time the
-    kernel corresponding to `hfunc` will be invoked. `offset` is a byte
-    offset.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to add parameter to
-    offset : int
-        Offset to add parameter to argument list
-    value : unsigned int
-        Value of parameter
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuParamSeti(cyhfunc, offset, value)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuParamSetf' in found_functions}}
-
-@cython.embedsignature(True)
-def cuParamSetf(hfunc, int offset, float value):
-    """ Adds a floating-point parameter to the function's argument list.
-
-    [Deprecated]
-
-    Sets a floating-point parameter that will be specified the next time
-    the kernel corresponding to `hfunc` will be invoked. `offset` is a byte
-    offset.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to add parameter to
-    offset : int
-        Offset to add parameter to argument list
-    value : float
-        Value of parameter
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuParamSetf(cyhfunc, offset, value)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuParamSetv' in found_functions}}
-
-@cython.embedsignature(True)
-def cuParamSetv(hfunc, int offset, ptr, unsigned int numbytes):
-    """ Adds arbitrary data to the function's argument list.
-
-    [Deprecated]
-
-    Copies an arbitrary amount of data (specified in `numbytes`) from `ptr`
-    into the parameter space of the kernel corresponding to `hfunc`.
-    `offset` is a byte offset.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to add data to
-    offset : int
-        Offset to add data to argument list
-    ptr : Any
-        Pointer to arbitrary data
-    numbytes : unsigned int
-        Size of data to copy in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    err = cydriver.cuParamSetv(cyhfunc, offset, cyptr_ptr, numbytes)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunch' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunch(f):
-    """ Launches a CUDA function.
-
-    [Deprecated]
-
-    Invokes the kernel `f` on a 1 x 1 x 1 grid of blocks. The block
-    contains the number of threads specified by a previous call to
-    :py:obj:`~.cuFuncSetBlockShape()`.
-
-    The block shape, dynamic shared memory size, and parameter information
-    must be set using :py:obj:`~.cuFuncSetBlockShape()`,
-    :py:obj:`~.cuFuncSetSharedSize()`, :py:obj:`~.cuParamSetSize()`,
-    :py:obj:`~.cuParamSeti()`, :py:obj:`~.cuParamSetf()`, and
-    :py:obj:`~.cuParamSetv()` prior to calling this function.
-
-    Launching a function via :py:obj:`~.cuLaunchKernel()` invalidates the
-    function's block shape, dynamic shared memory size, and parameter
-    information. After launching via cuLaunchKernel, this state must be re-
-    initialized prior to calling this function. Failure to do so results in
-    undefined behavior.
-
-    Parameters
-    ----------
-    f : :py:obj:`~.CUfunction`
-        Kernel to launch
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    err = cydriver.cuLaunch(cyf)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchGrid' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchGrid(f, int grid_width, int grid_height):
-    """ Launches a CUDA function.
-
-    [Deprecated]
-
-    Invokes the kernel `f` on a `grid_width` x `grid_height` grid of
-    blocks. Each block contains the number of threads specified by a
-    previous call to :py:obj:`~.cuFuncSetBlockShape()`.
-
-    The block shape, dynamic shared memory size, and parameter information
-    must be set using :py:obj:`~.cuFuncSetBlockShape()`,
-    :py:obj:`~.cuFuncSetSharedSize()`, :py:obj:`~.cuParamSetSize()`,
-    :py:obj:`~.cuParamSeti()`, :py:obj:`~.cuParamSetf()`, and
-    :py:obj:`~.cuParamSetv()` prior to calling this function.
-
-    Launching a function via :py:obj:`~.cuLaunchKernel()` invalidates the
-    function's block shape, dynamic shared memory size, and parameter
-    information. After launching via cuLaunchKernel, this state must be re-
-    initialized prior to calling this function. Failure to do so results in
-    undefined behavior.
-
-    Parameters
-    ----------
-    f : :py:obj:`~.CUfunction`
-        Kernel to launch
-    grid_width : int
-        Width of grid in blocks
-    grid_height : int
-        Height of grid in blocks
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel`
-    """
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    err = cydriver.cuLaunchGrid(cyf, grid_width, grid_height)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuLaunchGridAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cuLaunchGridAsync(f, int grid_width, int grid_height, hStream):
-    """ Launches a CUDA function.
-
-    [Deprecated]
-
-    Invokes the kernel `f` on a `grid_width` x `grid_height` grid of
-    blocks. Each block contains the number of threads specified by a
-    previous call to :py:obj:`~.cuFuncSetBlockShape()`.
-
-    The block shape, dynamic shared memory size, and parameter information
-    must be set using :py:obj:`~.cuFuncSetBlockShape()`,
-    :py:obj:`~.cuFuncSetSharedSize()`, :py:obj:`~.cuParamSetSize()`,
-    :py:obj:`~.cuParamSeti()`, :py:obj:`~.cuParamSetf()`, and
-    :py:obj:`~.cuParamSetv()` prior to calling this function.
-
-    Launching a function via :py:obj:`~.cuLaunchKernel()` invalidates the
-    function's block shape, dynamic shared memory size, and parameter
-    information. After launching via cuLaunchKernel, this state must be re-
-    initialized prior to calling this function. Failure to do so results in
-    undefined behavior.
-
-    \note_null_stream
-
-    Parameters
-    ----------
-    f : :py:obj:`~.CUfunction`
-        Kernel to launch
-    grid_width : int
-        Width of grid in blocks
-    grid_height : int
-        Height of grid in blocks
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_LAUNCH_FAILED`, :py:obj:`~.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`, :py:obj:`~.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`, :py:obj:`~.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`
-
-    See Also
-    --------
-    :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuParamSetv`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchKernel`
-
-    Notes
-    -----
-    In certain cases where cubins are created with no ABI (i.e., using `ptxas` `None` `no`), this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it afterwards.
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUfunction cyf
-    if f is None:
-        cyf = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(f, (CUfunction,)):
-        pf = int(f)
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    else:
-        pf = int(CUfunction(f))
-        cyf = <cydriver.CUfunction><void_ptr>pf
-    err = cydriver.cuLaunchGridAsync(cyf, grid_width, grid_height, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuParamSetTexRef' in found_functions}}
-
-@cython.embedsignature(True)
-def cuParamSetTexRef(hfunc, int texunit, hTexRef):
-    """ Adds a texture-reference to the function's argument list.
-
-    [Deprecated]
-
-    Makes the CUDA array or linear memory bound to the texture reference
-    `hTexRef` available to a device program as a texture. In this version
-    of CUDA, the texture-reference must be obtained via
-    :py:obj:`~.cuModuleGetTexRef()` and the `texunit` parameter must be set
-    to :py:obj:`~.CU_PARAM_TR_DEFAULT`.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        Kernel to add texture-reference to
-    texunit : int
-        Texture unit (must be :py:obj:`~.CU_PARAM_TR_DEFAULT`)
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture-reference to add to argument list
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    err = cydriver.cuParamSetTexRef(cyhfunc, texunit, cyhTexRef)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuFuncSetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cuFuncSetSharedMemConfig(hfunc, config not None : CUsharedconfig):
-    """ Sets the shared memory configuration for a device function.
-
-    [Deprecated]
-
-    On devices with configurable shared memory banks, this function will
-    force all subsequent launches of the specified device function to have
-    the given shared memory bank size configuration. On any given launch of
-    the function, the shared memory configuration of the device will be
-    temporarily changed if needed to suit the function's preferred
-    configuration. Changes in shared memory configuration between
-    subsequent launches of functions, may introduce a device side
-    synchronization point.
-
-    Any per-function setting of shared memory bank size set via
-    :py:obj:`~.cuFuncSetSharedMemConfig` will override the context wide
-    setting set with :py:obj:`~.cuCtxSetSharedMemConfig`.
-
-    Changing the shared memory bank size will not increase shared memory
-    usage or affect occupancy of kernels, but may have major effects on
-    performance. Larger bank sizes will allow for greater potential
-    bandwidth to shared memory, but will change what kinds of accesses to
-    shared memory will result in bank conflicts.
-
-    This function will do nothing on devices with fixed shared memory bank
-    size.
-
-    The supported bank configurations are:
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE`: use the context's
-      shared memory configuration when launching this function.
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE`: set shared
-      memory bank width to be natively four bytes when launching this
-      function.
-
-    - :py:obj:`~.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE`: set shared
-      memory bank width to be natively eight bytes when launching this
-      function.
-
-    Parameters
-    ----------
-    hfunc : :py:obj:`~.CUfunction`
-        kernel to be given a shared memory config
-    config : :py:obj:`~.CUsharedconfig`
-        requested shared memory configuration
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxSetSharedMemConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncSetSharedMemConfig`
-    """
-    cdef cydriver.CUfunction cyhfunc
-    if hfunc is None:
-        cyhfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(hfunc, (CUfunction,)):
-        phfunc = int(hfunc)
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    else:
-        phfunc = int(CUfunction(hfunc))
-        cyhfunc = <cydriver.CUfunction><void_ptr>phfunc
-    cdef cydriver.CUsharedconfig cyconfig = config.value
-    err = cydriver.cuFuncSetSharedMemConfig(cyhfunc, cyconfig)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphCreate(unsigned int flags):
-    """ Creates a graph.
-
-    Creates an empty graph, which is returned via `phGraph`.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Graph creation flags, must be 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phGraph : :py:obj:`~.CUgraph`
-        Returns newly created graph
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphDestroy`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphClone`
-    """
-    cdef CUgraph phGraph = CUgraph()
-    err = cydriver.cuGraphCreate(<cydriver.CUgraph*>phGraph._ptr, flags)
-    return (CUresult(err), phGraph)
-{{endif}}
-
-{{if 'cuGraphAddKernelNode_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddKernelNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_KERNEL_NODE_PARAMS]):
-    """ Creates a kernel execution node and adds it to a graph.
-
-    Creates a new kernel execution node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    The CUDA_KERNEL_NODE_PARAMS structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    When the graph is launched, the node will invoke kernel `func` on a
-    (`gridDimX` x `gridDimY` x `gridDimZ`) grid of blocks. Each block
-    contains (`blockDimX` x `blockDimY` x `blockDimZ`) threads.
-
-    `sharedMemBytes` sets the amount of dynamic shared memory that will be
-    available to each thread block.
-
-    Kernel parameters to `func` can be specified in one of two ways:
-
-    1) Kernel parameters can be specified via `kernelParams`. If the kernel
-    has N parameters, then `kernelParams` needs to be an array of N
-    pointers. Each pointer, from `kernelParams`[0] to `kernelParams`[N-1],
-    points to the region of memory from which the actual parameter will be
-    copied. The number of kernel parameters and their offsets and sizes do
-    not need to be specified as that information is retrieved directly from
-    the kernel's image.
-
-    2) Kernel parameters for non-cooperative kernels can also be packaged
-    by the application into a single buffer that is passed in via `extra`.
-    This places the burden on the application of knowing each kernel
-    parameter's size and alignment/padding within the buffer. The `extra`
-    parameter exists to allow this function to take additional less
-    commonly used arguments. `extra` specifies a list of names of extra
-    settings and their corresponding values. Each extra setting name is
-    immediately followed by the corresponding value. The list must be
-    terminated with either NULL or CU_LAUNCH_PARAM_END.
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_END`, which indicates the end of the
-      `extra` array;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`, which specifies that the
-      next value in `extra` will be a pointer to a buffer containing all
-      the kernel parameters for launching kernel `func`;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE`, which specifies that the
-      next value in `extra` will be a pointer to a size_t containing the
-      size of the buffer specified with
-      :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`;
-
-    The error :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned if
-    kernel parameters are specified with both `kernelParams` and `extra`
-    (i.e. both `kernelParams` and `extra` are non-NULL).
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned if `extra` is
-    used for a cooperative kernel.
-
-    The `kernelParams` or `extra` array, as well as the argument values it
-    points to, are copied during this call.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_KERNEL_NODE_PARAMS`
-        Parameters for the GPU execution node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuLaunchCooperativeKernel`, :py:obj:`~.cuGraphKernelNodeGetParams`, :py:obj:`~.cuGraphKernelNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-
-    Notes
-    -----
-    Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddKernelNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetParams_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphKernelNodeGetParams(hNode):
-    """ Returns a kernel node's parameters.
-
-    Returns the parameters of kernel node `hNode` in `nodeParams`. The
-    `kernelParams` or `extra` array returned in `nodeParams`, as well as
-    the argument values it points to, are owned by the node. This memory
-    remains valid until the node is destroyed or its parameters are
-    modified, and should not be modified directly. Use
-    :py:obj:`~.cuGraphKernelNodeSetParams` to update the parameters of this
-    node.
-
-    The params will contain either `kernelParams` or `extra`, according to
-    which of these was most recently set on the node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodeParams : :py:obj:`~.CUDA_KERNEL_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_KERNEL_NODE_PARAMS nodeParams = CUDA_KERNEL_NODE_PARAMS()
-    err = cydriver.cuGraphKernelNodeGetParams(cyhNode, <cydriver.CUDA_KERNEL_NODE_PARAMS*>nodeParams._ptr)
-    return (CUresult(err), nodeParams)
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetParams_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphKernelNodeSetParams(hNode, nodeParams : Optional[CUDA_KERNEL_NODE_PARAMS]):
-    """ Sets a kernel node's parameters.
-
-    Sets the parameters of kernel node `hNode` to `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_KERNEL_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphKernelNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddMemcpyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddMemcpyNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, copyParams : Optional[CUDA_MEMCPY3D], ctx):
-    """ Creates a memcpy node and adds it to a graph.
-
-    Creates a new memcpy node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies`. It is
-    possible for `numDependencies` to be 0, in which case the node will be
-    placed at the root of the graph. `dependencies` may not have any
-    duplicate entries. A handle to the new node will be returned in
-    `phGraphNode`.
-
-    When the graph is launched, the node will perform the memcpy described
-    by `copyParams`. See :py:obj:`~.cuMemcpy3D()` for a description of the
-    structure and its restrictions.
-
-    Memcpy nodes have some additional restrictions with regards to managed
-    memory, if the system contains at least one device which has a zero
-    value for the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS`. If one or
-    more of the operands refer to managed memory, then using the memory
-    type :py:obj:`~.CU_MEMORYTYPE_UNIFIED` is disallowed for those
-    operand(s). The managed memory will be treated as residing on either
-    the host or the device, depending on which memory type is specified.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    copyParams : :py:obj:`~.CUDA_MEMCPY3D`
-        Parameters for the memory copy
-    ctx : :py:obj:`~.CUcontext`
-        Context on which to run the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphMemcpyNodeGetParams`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._ptr if copyParams != None else NULL
-    err = cydriver.cuGraphAddMemcpyNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cycopyParams_ptr, cyctx)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemcpyNodeGetParams(hNode):
-    """ Returns a memcpy node's parameters.
-
-    Returns the parameters of memcpy node `hNode` in `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodeParams : :py:obj:`~.CUDA_MEMCPY3D`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_MEMCPY3D nodeParams = CUDA_MEMCPY3D()
-    err = cydriver.cuGraphMemcpyNodeGetParams(cyhNode, <cydriver.CUDA_MEMCPY3D*>nodeParams._ptr)
-    return (CUresult(err), nodeParams)
-{{endif}}
-
-{{if 'cuGraphMemcpyNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemcpyNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMCPY3D]):
-    """ Sets a memcpy node's parameters.
-
-    Sets the parameters of memcpy node `hNode` to `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_MEMCPY3D`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_MEMCPY3D* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphMemcpyNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddMemsetNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddMemsetNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, memsetParams : Optional[CUDA_MEMSET_NODE_PARAMS], ctx):
-    """ Creates a memset node and adds it to a graph.
-
-    Creates a new memset node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies`. It is
-    possible for `numDependencies` to be 0, in which case the node will be
-    placed at the root of the graph. `dependencies` may not have any
-    duplicate entries. A handle to the new node will be returned in
-    `phGraphNode`.
-
-    The element size must be 1, 2, or 4 bytes. When the graph is launched,
-    the node will perform the memset described by `memsetParams`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    memsetParams : :py:obj:`~.CUDA_MEMSET_NODE_PARAMS`
-        Parameters for the memory set
-    ctx : :py:obj:`~.CUcontext`
-        Context on which to run the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphMemsetNodeGetParams`, :py:obj:`~.cuGraphMemsetNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._ptr if memsetParams != None else NULL
-    err = cydriver.cuGraphAddMemsetNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cymemsetParams_ptr, cyctx)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphMemsetNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemsetNodeGetParams(hNode):
-    """ Returns a memset node's parameters.
-
-    Returns the parameters of memset node `hNode` in `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodeParams : :py:obj:`~.CUDA_MEMSET_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_MEMSET_NODE_PARAMS nodeParams = CUDA_MEMSET_NODE_PARAMS()
-    err = cydriver.cuGraphMemsetNodeGetParams(cyhNode, <cydriver.CUDA_MEMSET_NODE_PARAMS*>nodeParams._ptr)
-    return (CUresult(err), nodeParams)
-{{endif}}
-
-{{if 'cuGraphMemsetNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemsetNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMSET_NODE_PARAMS]):
-    """ Sets a memset node's parameters.
-
-    Sets the parameters of memset node `hNode` to `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_MEMSET_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphMemsetNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddHostNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddHostNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_HOST_NODE_PARAMS]):
-    """ Creates a host execution node and adds it to a graph.
-
-    Creates a new CPU execution node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    When the graph is launched, the node will invoke the specified CPU
-    function. Host nodes are not supported under MPS with pre-Volta GPUs.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_HOST_NODE_PARAMS`
-        Parameters for the host node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphHostNodeGetParams`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddHostNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphHostNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphHostNodeGetParams(hNode):
-    """ Returns a host node's parameters.
-
-    Returns the parameters of host node `hNode` in `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodeParams : :py:obj:`~.CUDA_HOST_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_HOST_NODE_PARAMS nodeParams = CUDA_HOST_NODE_PARAMS()
-    err = cydriver.cuGraphHostNodeGetParams(cyhNode, <cydriver.CUDA_HOST_NODE_PARAMS*>nodeParams._ptr)
-    return (CUresult(err), nodeParams)
-{{endif}}
-
-{{if 'cuGraphHostNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphHostNodeSetParams(hNode, nodeParams : Optional[CUDA_HOST_NODE_PARAMS]):
-    """ Sets a host node's parameters.
-
-    Sets the parameters of host node `hNode` to `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_HOST_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphHostNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddChildGraphNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddChildGraphNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, childGraph):
-    """ Creates a child graph node and adds it to a graph.
-
-    Creates a new node which executes an embedded graph, and adds it to
-    `hGraph` with `numDependencies` dependencies specified via
-    `dependencies`. It is possible for `numDependencies` to be 0, in which
-    case the node will be placed at the root of the graph. `dependencies`
-    may not have any duplicate entries. A handle to the new node will be
-    returned in `phGraphNode`.
-
-    If `hGraph` contains allocation or free nodes, this call will return an
-    error.
-
-    The node executes an embedded child graph. The child graph is cloned in
-    this call.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to clone into this node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphClone`
-    """
-    cdef cydriver.CUgraph cychildGraph
-    if childGraph is None:
-        cychildGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(childGraph, (CUgraph,)):
-        pchildGraph = int(childGraph)
-        cychildGraph = <cydriver.CUgraph><void_ptr>pchildGraph
-    else:
-        pchildGraph = int(CUgraph(childGraph))
-        cychildGraph = <cydriver.CUgraph><void_ptr>pchildGraph
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuGraphAddChildGraphNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cychildGraph)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphChildGraphNodeGetGraph' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphChildGraphNodeGetGraph(hNode):
-    """ Gets a handle to the embedded graph of a child graph node.
-
-    Gets a handle to the embedded graph in a child graph node. This call
-    does not clone the graph. Changes to the graph will be reflected in the
-    node, and the node retains ownership of the graph.
-
-    Allocation and free nodes cannot be added to the returned graph.
-    Attempting to do so will return an error.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the embedded graph for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    phGraph : :py:obj:`~.CUgraph`
-        Location to store a handle to the graph
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphNodeFindInClone`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUgraph phGraph = CUgraph()
-    err = cydriver.cuGraphChildGraphNodeGetGraph(cyhNode, <cydriver.CUgraph*>phGraph._ptr)
-    return (CUresult(err), phGraph)
-{{endif}}
-
-{{if 'cuGraphAddEmptyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddEmptyNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies):
-    """ Creates an empty node and adds it to a graph.
-
-    Creates a new node which performs no operation, and adds it to `hGraph`
-    with `numDependencies` dependencies specified via `dependencies`. It is
-    possible for `numDependencies` to be 0, in which case the node will be
-    placed at the root of the graph. `dependencies` may not have any
-    duplicate entries. A handle to the new node will be returned in
-    `phGraphNode`.
-
-    An empty node performs no operation during execution, but can be used
-    for transitive ordering. For example, a phased execution graph with 2
-    groups of n nodes with a barrier between them can be represented using
-    an empty node and 2*n dependency edges, rather than no empty node and
-    n^2 dependency edges.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuGraphAddEmptyNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphAddEventRecordNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddEventRecordNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, event):
-    """ Creates an event record node and adds it to a graph.
-
-    Creates a new event record node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and event
-    specified in `event`. It is possible for `numDependencies` to be 0, in
-    which case the node will be placed at the root of the graph.
-    `dependencies` may not have any duplicate entries. A handle to the new
-    node will be returned in `phGraphNode`.
-
-    Each launch of the graph will record `event` to capture execution of
-    the node's dependencies.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuGraphAddEventRecordNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cyevent)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeGetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphEventRecordNodeGetEvent(hNode):
-    """ Returns the event associated with an event record node.
-
-    Returns the event of event record node `hNode` in `event_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the event for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    event_out : :py:obj:`~.CUevent`
-        Pointer to return the event
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUevent event_out = CUevent()
-    err = cydriver.cuGraphEventRecordNodeGetEvent(cyhNode, <cydriver.CUevent*>event_out._ptr)
-    return (CUresult(err), event_out)
-{{endif}}
-
-{{if 'cuGraphEventRecordNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphEventRecordNodeSetEvent(hNode, event):
-    """ Sets an event record node's event.
-
-    Sets the event of event record node `hNode` to `event`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the event for
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to use
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    err = cydriver.cuGraphEventRecordNodeSetEvent(cyhNode, cyevent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddEventWaitNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddEventWaitNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, event):
-    """ Creates an event wait node and adds it to a graph.
-
-    Creates a new event wait node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and event
-    specified in `event`. It is possible for `numDependencies` to be 0, in
-    which case the node will be placed at the root of the graph.
-    `dependencies` may not have any duplicate entries. A handle to the new
-    node will be returned in `phGraphNode`.
-
-    The graph node will wait for all work captured in `event`. See
-    :py:obj:`~.cuEventRecord()` for details on what is captured by an
-    event. `event` may be from a different context or device than the
-    launch stream.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuGraphAddEventWaitNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cyevent)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeGetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphEventWaitNodeGetEvent(hNode):
-    """ Returns the event associated with an event wait node.
-
-    Returns the event of event wait node `hNode` in `event_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the event for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    event_out : :py:obj:`~.CUevent`
-        Pointer to return the event
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUevent event_out = CUevent()
-    err = cydriver.cuGraphEventWaitNodeGetEvent(cyhNode, <cydriver.CUevent*>event_out._ptr)
-    return (CUresult(err), event_out)
-{{endif}}
-
-{{if 'cuGraphEventWaitNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphEventWaitNodeSetEvent(hNode, event):
-    """ Sets an event wait node's event.
-
-    Sets the event of event wait node `hNode` to `event`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the event for
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to use
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    err = cydriver.cuGraphEventWaitNodeSetEvent(cyhNode, cyevent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddExternalSemaphoresSignalNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_EXT_SEM_SIGNAL_NODE_PARAMS]):
-    """ Creates an external semaphore signal node and adds it to a graph.
-
-    Creates a new external semaphore signal node and adds it to `hGraph`
-    with `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    Performs a signal operation on a set of externally allocated semaphore
-    objects when the node is launched. The operation(s) will occur after
-    all of the node's dependencies have completed.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS`
-        Parameters for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddExternalSemaphoresSignalNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExternalSemaphoresSignalNodeGetParams(hNode):
-    """ Returns an external semaphore signal node's parameters.
-
-    Returns the parameters of an external semaphore signal node `hNode` in
-    `params_out`. The `extSemArray` and `paramsArray` returned in
-    `params_out`, are owned by the node. This memory remains valid until
-    the node is destroyed or its parameters are modified, and should not be
-    modified directly. Use
-    :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams` to update the
-    parameters of this node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    params_out : :py:obj:`~.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS()
-    err = cydriver.cuGraphExternalSemaphoresSignalNodeGetParams(cyhNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>params_out._ptr)
-    return (CUresult(err), params_out)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[CUDA_EXT_SEM_SIGNAL_NODE_PARAMS]):
-    """ Sets an external semaphore signal node's parameters.
-
-    Sets the parameters of an external semaphore signal node `hNode` to
-    `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddExternalSemaphoresWaitNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_EXT_SEM_WAIT_NODE_PARAMS]):
-    """ Creates an external semaphore wait node and adds it to a graph.
-
-    Creates a new external semaphore wait node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    Performs a wait operation on a set of externally allocated semaphore
-    objects when the node is launched. The node's dependencies will not be
-    launched until the wait operation has completed.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_WAIT_NODE_PARAMS`
-        Parameters for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddExternalSemaphoresWaitNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExternalSemaphoresWaitNodeGetParams(hNode):
-    """ Returns an external semaphore wait node's parameters.
-
-    Returns the parameters of an external semaphore wait node `hNode` in
-    `params_out`. The `extSemArray` and `paramsArray` returned in
-    `params_out`, are owned by the node. This memory remains valid until
-    the node is destroyed or its parameters are modified, and should not be
-    modified directly. Use
-    :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams` to update the
-    parameters of this node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    params_out : :py:obj:`~.CUDA_EXT_SEM_WAIT_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out = CUDA_EXT_SEM_WAIT_NODE_PARAMS()
-    err = cydriver.cuGraphExternalSemaphoresWaitNodeGetParams(cyhNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>params_out._ptr)
-    return (CUresult(err), params_out)
-{{endif}}
-
-{{if 'cuGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[CUDA_EXT_SEM_WAIT_NODE_PARAMS]):
-    """ Sets an external semaphore wait node's parameters.
-
-    Sets the parameters of an external semaphore wait node `hNode` to
-    `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_WAIT_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddBatchMemOpNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddBatchMemOpNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_BATCH_MEM_OP_NODE_PARAMS]):
-    """ Creates a batch memory operation node and adds it to a graph.
-
-    Creates a new batch memory operation node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    When the node is added, the paramArray inside `nodeParams` is copied
-    and therefore it can be freed after the call returns.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_BATCH_MEM_OP_NODE_PARAMS`
-        Parameters for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuGraphBatchMemOpNodeGetParams`, :py:obj:`~.cuGraphBatchMemOpNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-
-    Notes
-    -----
-    Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. For more information, see the Stream Memory Operations section in the programming guide(https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html).
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddBatchMemOpNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphBatchMemOpNodeGetParams(hNode):
-    """ Returns a batch mem op node's parameters.
-
-    Returns the parameters of batch mem op node `hNode` in
-    `nodeParams_out`. The `paramArray` returned in `nodeParams_out` is
-    owned by the node. This memory remains valid until the node is
-    destroyed or its parameters are modified, and should not be modified
-    directly. Use :py:obj:`~.cuGraphBatchMemOpNodeSetParams` to update the
-    parameters of this node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodeParams_out : :py:obj:`~.CUDA_BATCH_MEM_OP_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_BATCH_MEM_OP_NODE_PARAMS nodeParams_out = CUDA_BATCH_MEM_OP_NODE_PARAMS()
-    err = cydriver.cuGraphBatchMemOpNodeGetParams(cyhNode, <cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS*>nodeParams_out._ptr)
-    return (CUresult(err), nodeParams_out)
-{{endif}}
-
-{{if 'cuGraphBatchMemOpNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphBatchMemOpNodeSetParams(hNode, nodeParams : Optional[CUDA_BATCH_MEM_OP_NODE_PARAMS]):
-    """ Sets a batch mem op node's parameters.
-
-    Sets the parameters of batch mem op node `hNode` to `nodeParams`.
-
-    The paramArray inside `nodeParams` is copied and therefore it can be
-    freed after the call returns.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUDA_BATCH_MEM_OP_NODE_PARAMS`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphBatchMemOpNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecBatchMemOpNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_BATCH_MEM_OP_NODE_PARAMS]):
-    """ Sets the parameters for a batch mem op node in the given graphExec.
-
-    Sets the parameters of a batch mem op node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    The following fields on operations may be modified on an executable
-    graph:
-
-    op.waitValue.address op.waitValue.value[64] op.waitValue.flags bits
-    corresponding to wait type (i.e. CU_STREAM_WAIT_VALUE_FLUSH bit cannot
-    be modified) op.writeValue.address op.writeValue.value[64]
-
-    Other fields, such as the context, count or type of operations, and
-    other types of operations such as membars, may not be modified.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    The paramArray inside `nodeParams` is copied and therefore it can be
-    freed after the call returns.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Batch mem op node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.CUDA_BATCH_MEM_OP_NODE_PARAMS`
-        Updated Parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeGetParams`, :py:obj:`~.cuGraphBatchMemOpNodeSetParams`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecBatchMemOpNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddMemAllocNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddMemAllocNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUDA_MEM_ALLOC_NODE_PARAMS]):
-    """ Creates an allocation node and adds it to a graph.
-
-    Creates a new allocation node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    When :py:obj:`~.cuGraphAddMemAllocNode` creates an allocation node, it
-    returns the address of the allocation in `nodeParams.dptr`. The
-    allocation's address remains fixed across instantiations and launches.
-
-    If the allocation is freed in the same graph, by creating a free node
-    using :py:obj:`~.cuGraphAddMemFreeNode`, the allocation can be accessed
-    by nodes ordered after the allocation node but before the free node.
-    These allocations cannot be freed outside the owning graph, and they
-    can only be freed once in the owning graph.
-
-    If the allocation is not freed in the same graph, then it can be
-    accessed not only by nodes in the graph which are ordered after the
-    allocation node, but also by stream operations ordered after the
-    graph's execution but before the allocation is freed.
-
-    Allocations which are not freed in the same graph can be freed by:
-
-    - passing the allocation to :py:obj:`~.cuMemFreeAsync` or
-      :py:obj:`~.cuMemFree`;
-
-    - launching a graph with a free node for that allocation; or
-
-    - specifying
-      :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH` during
-      instantiation, which makes each launch behave as though it called
-      :py:obj:`~.cuMemFreeAsync` for every unfreed allocation.
-
-    It is not possible to free an allocation in both the owning graph and
-    another graph. If the allocation is freed in the same graph, a free
-    node cannot be added to another graph. If the allocation is freed in
-    another graph, a free node can no longer be added to the owning graph.
-
-    The following restrictions apply to graphs which contain allocation
-    and/or memory free nodes:
-
-    - Nodes and edges of the graph cannot be deleted.
-
-    - The graph cannot be used in a child node.
-
-    - Only one instantiation of the graph may exist at any point in time.
-
-    - The graph cannot be cloned.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUDA_MEM_ALLOC_NODE_PARAMS`
-        Parameters for the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddMemFreeNode`, :py:obj:`~.cuGraphMemAllocNodeGetParams`, :py:obj:`~.cuDeviceGraphMemTrim`, :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddMemAllocNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphMemAllocNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemAllocNodeGetParams(hNode):
-    """ Returns a memory alloc node's parameters.
-
-    Returns the parameters of a memory alloc node `hNode` in `params_out`.
-    The `poolProps` and `accessDescs` returned in `params_out`, are owned
-    by the node. This memory remains valid until the node is destroyed. The
-    returned parameters must not be modified.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    params_out : :py:obj:`~.CUDA_MEM_ALLOC_NODE_PARAMS`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphMemFreeNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUDA_MEM_ALLOC_NODE_PARAMS params_out = CUDA_MEM_ALLOC_NODE_PARAMS()
-    err = cydriver.cuGraphMemAllocNodeGetParams(cyhNode, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS*>params_out._ptr)
-    return (CUresult(err), params_out)
-{{endif}}
-
-{{if 'cuGraphAddMemFreeNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddMemFreeNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, dptr):
-    """ Creates a memory free node and adds it to a graph.
-
-    Creates a new memory free node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `phGraphNode`.
-
-    :py:obj:`~.cuGraphAddMemFreeNode` will return
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if the user attempts to free:
-
-    - an allocation twice in the same graph.
-
-    - an address that was not returned by an allocation node.
-
-    - an invalid address.
-
-    The following restrictions apply to graphs which contain allocation
-    and/or memory free nodes:
-
-    - Nodes and edges of the graph cannot be deleted.
-
-    - The graph cannot be used in a child node.
-
-    - Only one instantiation of the graph may exist at any point in time.
-
-    - The graph cannot be cloned.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    dptr : :py:obj:`~.CUdeviceptr`
-        Address of memory to free
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphMemFreeNodeGetParams`, :py:obj:`~.cuDeviceGraphMemTrim`, :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cydriver.cuGraphAddMemFreeNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cydptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphMemFreeNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphMemFreeNodeGetParams(hNode):
-    """ Returns a memory free node's parameters.
-
-    Returns the address of a memory free node `hNode` in `dptr_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    dptr_out : :py:obj:`~.CUdeviceptr`
-        Pointer to return the device address
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddMemFreeNode`, :py:obj:`~.cuGraphMemAllocNodeGetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef CUdeviceptr dptr_out = CUdeviceptr()
-    err = cydriver.cuGraphMemFreeNodeGetParams(cyhNode, <cydriver.CUdeviceptr*>dptr_out._ptr)
-    return (CUresult(err), dptr_out)
-{{endif}}
-
-{{if 'cuDeviceGraphMemTrim' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGraphMemTrim(device):
-    """ Free unused memory that was cached on the specified device for use with graphs back to the OS.
-
-    Blocks which are not in use by a graph that is either currently
-    executing or scheduled to execute are freed back to the operating
-    system.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        The device for which cached memory should be freed.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphAddMemFreeNode`, :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuDeviceGetGraphMemAttribute`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    err = cydriver.cuDeviceGraphMemTrim(cydevice)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceGetGraphMemAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetGraphMemAttribute(device, attr not None : CUgraphMem_attribute):
-    """ Query asynchronous allocation attributes related to graphs.
-
-    Valid attributes are:
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT`: Amount of memory, in
-      bytes, currently associated with graphs
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`: High watermark of
-      memory, in bytes, associated with graphs since the last time it was
-      reset. High watermark can only be reset to zero.
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT`: Amount of memory,
-      in bytes, currently allocated for use by the CUDA graphs asynchronous
-      allocator.
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`: High watermark of
-      memory, in bytes, currently allocated for use by the CUDA graphs
-      asynchronous allocator.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        Specifies the scope of the query
-    attr : :py:obj:`~.CUgraphMem_attribute`
-        attribute to get
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    value : Any
-        retrieved value
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphAddMemFreeNode`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef cydriver.CUgraphMem_attribute cyattr = attr.value
-    cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cydriver.cuDeviceGetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr)
-    return (CUresult(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cuDeviceSetGraphMemAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceSetGraphMemAttribute(device, attr not None : CUgraphMem_attribute, value):
-    """ Set asynchronous allocation attributes related to graphs.
-
-    Valid attributes are:
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH`: High watermark of
-      memory, in bytes, associated with graphs since the last time it was
-      reset. High watermark can only be reset to zero.
-
-    - :py:obj:`~.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH`: High watermark of
-      memory, in bytes, currently allocated for use by the CUDA graphs
-      asynchronous allocator.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        Specifies the scope of the query
-    attr : :py:obj:`~.CUgraphMem_attribute`
-        attribute to get
-    value : Any
-        pointer to value to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphAddMemFreeNode`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef cydriver.CUgraphMem_attribute cyattr = attr.value
-    cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cydriver.cuDeviceSetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphClone' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphClone(originalGraph):
-    """ Clones a graph.
-
-    This function creates a copy of `originalGraph` and returns it in
-    `phGraphClone`. All parameters are copied into the cloned graph. The
-    original graph may be modified after this call without affecting the
-    clone.
-
-    Child graph nodes in the original graph are recursively copied into the
-    clone.
-
-    Parameters
-    ----------
-    originalGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to clone
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phGraphClone : :py:obj:`~.CUgraph`
-        Returns newly created cloned graph
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphNodeFindInClone`
-    """
-    cdef cydriver.CUgraph cyoriginalGraph
-    if originalGraph is None:
-        cyoriginalGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(originalGraph, (CUgraph,)):
-        poriginalGraph = int(originalGraph)
-        cyoriginalGraph = <cydriver.CUgraph><void_ptr>poriginalGraph
-    else:
-        poriginalGraph = int(CUgraph(originalGraph))
-        cyoriginalGraph = <cydriver.CUgraph><void_ptr>poriginalGraph
-    cdef CUgraph phGraphClone = CUgraph()
-    err = cydriver.cuGraphClone(<cydriver.CUgraph*>phGraphClone._ptr, cyoriginalGraph)
-    return (CUresult(err), phGraphClone)
-{{endif}}
-
-{{if 'cuGraphNodeFindInClone' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeFindInClone(hOriginalNode, hClonedGraph):
-    """ Finds a cloned version of a node.
-
-    This function returns the node in `hClonedGraph` corresponding to
-    `hOriginalNode` in the original graph.
-
-    `hClonedGraph` must have been cloned from `hOriginalGraph` via
-    :py:obj:`~.cuGraphClone`. `hOriginalNode` must have been in
-    `hOriginalGraph` at the time of the call to :py:obj:`~.cuGraphClone`,
-    and the corresponding cloned node in `hClonedGraph` must not have been
-    removed. The cloned node is then returned via `phClonedNode`.
-
-    Parameters
-    ----------
-    hOriginalNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Handle to the original node
-    hClonedGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Cloned graph to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    phNode : :py:obj:`~.CUgraphNode`
-        Returns handle to the cloned node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphClone`
-    """
-    cdef cydriver.CUgraph cyhClonedGraph
-    if hClonedGraph is None:
-        cyhClonedGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hClonedGraph, (CUgraph,)):
-        phClonedGraph = int(hClonedGraph)
-        cyhClonedGraph = <cydriver.CUgraph><void_ptr>phClonedGraph
-    else:
-        phClonedGraph = int(CUgraph(hClonedGraph))
-        cyhClonedGraph = <cydriver.CUgraph><void_ptr>phClonedGraph
-    cdef cydriver.CUgraphNode cyhOriginalNode
-    if hOriginalNode is None:
-        cyhOriginalNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hOriginalNode, (CUgraphNode,)):
-        phOriginalNode = int(hOriginalNode)
-        cyhOriginalNode = <cydriver.CUgraphNode><void_ptr>phOriginalNode
-    else:
-        phOriginalNode = int(CUgraphNode(hOriginalNode))
-        cyhOriginalNode = <cydriver.CUgraphNode><void_ptr>phOriginalNode
-    cdef CUgraphNode phNode = CUgraphNode()
-    err = cydriver.cuGraphNodeFindInClone(<cydriver.CUgraphNode*>phNode._ptr, cyhOriginalNode, cyhClonedGraph)
-    return (CUresult(err), phNode)
-{{endif}}
-
-{{if 'cuGraphNodeGetType' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetType(hNode):
-    """ Returns a node's type.
-
-    Returns the node type of `hNode` in `typename`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    typename : :py:obj:`~.CUgraphNodeType`
-        Pointer to return the node type
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphKernelNodeGetParams`, :py:obj:`~.cuGraphKernelNodeSetParams`, :py:obj:`~.cuGraphHostNodeGetParams`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphMemcpyNodeGetParams`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphMemsetNodeGetParams`, :py:obj:`~.cuGraphMemsetNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNodeType typename
-    err = cydriver.cuGraphNodeGetType(cyhNode, &typename)
-    return (CUresult(err), CUgraphNodeType(typename))
-{{endif}}
-
-{{if 'cuGraphGetNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphGetNodes(hGraph, size_t numNodes = 0):
-    """ Returns a graph's nodes.
-
-    Returns a list of `hGraph's` nodes. `nodes` may be NULL, in which case
-    this function will return the number of nodes in `numNodes`. Otherwise,
-    `numNodes` entries will be filled in. If `numNodes` is higher than the
-    actual number of nodes, the remaining entries in `nodes` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `numNodes`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to query
-    numNodes : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    nodes : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the nodes
-    numNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetType`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numNodes
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cynodes = NULL
-    pynodes = []
-    if _graph_length != 0:
-        cynodes = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cynodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes)
-    if CUresult(err) == CUresult(0):
-        pynodes = [CUgraphNode(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]
-    if cynodes is not NULL:
-        free(cynodes)
-    return (CUresult(err), pynodes, numNodes)
-{{endif}}
-
-{{if 'cuGraphGetRootNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphGetRootNodes(hGraph, size_t numRootNodes = 0):
-    """ Returns a graph's root nodes.
-
-    Returns a list of `hGraph's` root nodes. `rootNodes` may be NULL, in
-    which case this function will return the number of root nodes in
-    `numRootNodes`. Otherwise, `numRootNodes` entries will be filled in. If
-    `numRootNodes` is higher than the actual number of root nodes, the
-    remaining entries in `rootNodes` will be set to NULL, and the number of
-    nodes actually obtained will be returned in `numRootNodes`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to query
-    numRootNodes : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    rootNodes : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the root nodes
-    numRootNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetType`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numRootNodes
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyrootNodes = NULL
-    pyrootNodes = []
-    if _graph_length != 0:
-        cyrootNodes = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cyrootNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes)
-    if CUresult(err) == CUresult(0):
-        pyrootNodes = [CUgraphNode(init_value=<void_ptr>cyrootNodes[idx]) for idx in range(_graph_length)]
-    if cyrootNodes is not NULL:
-        free(cyrootNodes)
-    return (CUresult(err), pyrootNodes, numRootNodes)
-{{endif}}
-
-{{if 'cuGraphGetEdges' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphGetEdges(hGraph, size_t numEdges = 0):
-    """ Returns a graph's dependency edges.
-
-    Returns a list of `hGraph's` dependency edges. Edges are returned via
-    corresponding indices in `from` and `to`; that is, the node in `to`[i]
-    has a dependency on the node in `from`[i]. `from` and `to` may both be
-    NULL, in which case this function only returns the number of edges in
-    `numEdges`. Otherwise, `numEdges` entries will be filled in. If
-    `numEdges` is higher than the actual number of edges, the remaining
-    entries in `from` and `to` will be set to NULL, and the number of edges
-    actually returned will be written to `numEdges`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to get the edges from
-    numEdges : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    from : List[:py:obj:`~.CUgraphNode`]
-        Location to return edge endpoints
-    to : List[:py:obj:`~.CUgraphNode`]
-        Location to return edge endpoints
-    numEdges : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numEdges
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    pyfrom_ = []
-    if _graph_length != 0:
-        cyfrom_ = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    cdef cydriver.CUgraphNode* cyto = NULL
-    pyto = []
-    if _graph_length != 0:
-        cyto = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, &numEdges)
-    if CUresult(err) == CUresult(0):
-        pyfrom_ = [CUgraphNode(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if CUresult(err) == CUresult(0):
-        pyto = [CUgraphNode(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
-    if cyto is not NULL:
-        free(cyto)
-    return (CUresult(err), pyfrom_, pyto, numEdges)
-{{endif}}
-
-{{if 'cuGraphGetEdges_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphGetEdges_v2(hGraph, size_t numEdges = 0):
-    """ Returns a graph's dependency edges (12.3+)
-
-    Returns a list of `hGraph's` dependency edges. Edges are returned via
-    corresponding indices in `from`, `to` and `edgeData`; that is, the node
-    in `to`[i] has a dependency on the node in `from`[i] with data
-    `edgeData`[i]. `from` and `to` may both be NULL, in which case this
-    function only returns the number of edges in `numEdges`. Otherwise,
-    `numEdges` entries will be filled in. If `numEdges` is higher than the
-    actual number of edges, the remaining entries in `from` and `to` will
-    be set to NULL, and the number of edges actually returned will be
-    written to `numEdges`. `edgeData` may alone be NULL, in which case the
-    edges must all have default (zeroed) edge data. Attempting a lossy
-    query via NULL `edgeData` will result in
-    :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`. If `edgeData` is non-NULL then
-    `from` and `to` must be as well.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to get the edges from
-    numEdges : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    from : List[:py:obj:`~.CUgraphNode`]
-        Location to return edge endpoints
-    to : List[:py:obj:`~.CUgraphNode`]
-        Location to return edge endpoints
-    edgeData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional location to return edge data
-    numEdges : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numEdges
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    pyfrom_ = []
-    if _graph_length != 0:
-        cyfrom_ = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    cdef cydriver.CUgraphNode* cyto = NULL
-    pyto = []
-    if _graph_length != 0:
-        cyto = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    cdef cydriver.CUgraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cydriver.CUgraphEdgeData*>calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-    err = cydriver.cuGraphGetEdges_v2(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges)
-    if CUresult(err) == CUresult(0):
-        pyfrom_ = [CUgraphNode(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if CUresult(err) == CUresult(0):
-        pyto = [CUgraphNode(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
-    if cyto is not NULL:
-        free(cyto)
-    if CUresult(err) == CUresult(0):
-        pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (CUresult(err), pyfrom_, pyto, pyedgeData, numEdges)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetDependencies(hNode, size_t numDependencies = 0):
-    """ Returns a node's dependencies.
-
-    Returns a list of `node's` dependencies. `dependencies` may be NULL, in
-    which case this function will return the number of dependencies in
-    `numDependencies`. Otherwise, `numDependencies` entries will be filled
-    in. If `numDependencies` is higher than the actual number of
-    dependencies, the remaining entries in `dependencies` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `numDependencies`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    numDependencies : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the dependencies
-    numDependencies : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeGetDependentNodes`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = numDependencies
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    pydependencies = []
-    if _graph_length != 0:
-        cydependencies = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, &numDependencies)
-    if CUresult(err) == CUresult(0):
-        pydependencies = [CUgraphNode(init_value=<void_ptr>cydependencies[idx]) for idx in range(_graph_length)]
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), pydependencies, numDependencies)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetDependencies_v2(hNode, size_t numDependencies = 0):
-    """ Returns a node's dependencies (12.3+)
-
-    Returns a list of `node's` dependencies. `dependencies` may be NULL, in
-    which case this function will return the number of dependencies in
-    `numDependencies`. Otherwise, `numDependencies` entries will be filled
-    in. If `numDependencies` is higher than the actual number of
-    dependencies, the remaining entries in `dependencies` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `numDependencies`.
-
-    Note that if an edge has non-zero (non-default) edge data and
-    `edgeData` is NULL, this API will return
-    :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`. If `edgeData` is non-NULL, then
-    `dependencies` must be as well.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    numDependencies : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the dependencies
-    edgeData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional array to return edge data for each dependency
-    numDependencies : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeGetDependentNodes`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = numDependencies
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    pydependencies = []
-    if _graph_length != 0:
-        cydependencies = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    cdef cydriver.CUgraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cydriver.CUgraphEdgeData*>calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-    err = cydriver.cuGraphNodeGetDependencies_v2(cyhNode, cydependencies, cyedgeData, &numDependencies)
-    if CUresult(err) == CUresult(0):
-        pydependencies = [CUgraphNode(init_value=<void_ptr>cydependencies[idx]) for idx in range(_graph_length)]
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if CUresult(err) == CUresult(0):
-        pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (CUresult(err), pydependencies, pyedgeData, numDependencies)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetDependentNodes(hNode, size_t numDependentNodes = 0):
-    """ Returns a node's dependent nodes.
-
-    Returns a list of `node's` dependent nodes. `dependentNodes` may be
-    NULL, in which case this function will return the number of dependent
-    nodes in `numDependentNodes`. Otherwise, `numDependentNodes` entries
-    will be filled in. If `numDependentNodes` is higher than the actual
-    number of dependent nodes, the remaining entries in `dependentNodes`
-    will be set to NULL, and the number of nodes actually obtained will be
-    returned in `numDependentNodes`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    numDependentNodes : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    dependentNodes : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the dependent nodes
-    numDependentNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = numDependentNodes
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNode* cydependentNodes = NULL
-    pydependentNodes = []
-    if _graph_length != 0:
-        cydependentNodes = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cydependentNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, &numDependentNodes)
-    if CUresult(err) == CUresult(0):
-        pydependentNodes = [CUgraphNode(init_value=<void_ptr>cydependentNodes[idx]) for idx in range(_graph_length)]
-    if cydependentNodes is not NULL:
-        free(cydependentNodes)
-    return (CUresult(err), pydependentNodes, numDependentNodes)
-{{endif}}
-
-{{if 'cuGraphNodeGetDependentNodes_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetDependentNodes_v2(hNode, size_t numDependentNodes = 0):
-    """ Returns a node's dependent nodes (12.3+)
-
-    Returns a list of `node's` dependent nodes. `dependentNodes` may be
-    NULL, in which case this function will return the number of dependent
-    nodes in `numDependentNodes`. Otherwise, `numDependentNodes` entries
-    will be filled in. If `numDependentNodes` is higher than the actual
-    number of dependent nodes, the remaining entries in `dependentNodes`
-    will be set to NULL, and the number of nodes actually obtained will be
-    returned in `numDependentNodes`.
-
-    Note that if an edge has non-zero (non-default) edge data and
-    `edgeData` is NULL, this API will return
-    :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`. If `edgeData` is non-NULL, then
-    `dependentNodes` must be as well.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    numDependentNodes : int
-        See description
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_LOSSY_QUERY`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    dependentNodes : List[:py:obj:`~.CUgraphNode`]
-        Pointer to return the dependent nodes
-    edgeData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional pointer to return edge data for dependent nodes
-    numDependentNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = numDependentNodes
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNode* cydependentNodes = NULL
-    pydependentNodes = []
-    if _graph_length != 0:
-        cydependentNodes = <cydriver.CUgraphNode*>calloc(_graph_length, sizeof(cydriver.CUgraphNode))
-        if cydependentNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-    cdef cydriver.CUgraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cydriver.CUgraphEdgeData*>calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-    err = cydriver.cuGraphNodeGetDependentNodes_v2(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes)
-    if CUresult(err) == CUresult(0):
-        pydependentNodes = [CUgraphNode(init_value=<void_ptr>cydependentNodes[idx]) for idx in range(_graph_length)]
-    if cydependentNodes is not NULL:
-        free(cydependentNodes)
-    if CUresult(err) == CUresult(0):
-        pyedgeData = [CUgraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (CUresult(err), pydependentNodes, pyedgeData, numDependentNodes)
-{{endif}}
-
-{{if 'cuGraphAddDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddDependencies(hGraph, from_ : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], to : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies):
-    """ Adds dependency edges to a graph.
-
-    The number of dependencies to be added is defined by `numDependencies`
-    Elements in `from` and `to` at corresponding indices define a
-    dependency. Each node in `from` and `to` must belong to `hGraph`.
-
-    If `numDependencies` is 0, elements in `from` and `to` will be ignored.
-    Specifying an existing dependency will return an error.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which dependencies are added
-    from : List[:py:obj:`~.CUgraphNode`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.CUgraphNode`]
-        Array of dependent nodes
-    numDependencies : size_t
-        Number of dependencies to be added
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    to = [] if to is None else to
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cydriver.CUgraphNode*> calloc(len(from_), sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cydriver.CUgraphNode>(<CUgraphNode>from_[idx])._ptr[0]
-    cdef cydriver.CUgraphNode* cyto = NULL
-    if len(to) > 0:
-        cyto = <cydriver.CUgraphNode*> calloc(len(to), sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cydriver.CUgraphNode>(<CUgraphNode>to[idx])._ptr[0]
-    err = cydriver.cuGraphAddDependencies(cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cydriver.CUgraphNode*>(<CUgraphNode>to[0])._ptr if len(to) == 1 else cyto, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddDependencies_v2(hGraph, from_ : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], to : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], edgeData : Optional[Tuple[CUgraphEdgeData] | List[CUgraphEdgeData]], size_t numDependencies):
-    """ Adds dependency edges to a graph (12.3+)
-
-    The number of dependencies to be added is defined by `numDependencies`
-    Elements in `from` and `to` at corresponding indices define a
-    dependency. Each node in `from` and `to` must belong to `hGraph`.
-
-    If `numDependencies` is 0, elements in `from` and `to` will be ignored.
-    Specifying an existing dependency will return an error.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which dependencies are added
-    from : List[:py:obj:`~.CUgraphNode`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.CUgraphNode`]
-        Array of dependent nodes
-    edgeData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional array of edge data. If NULL, default (zeroed) edge data is
-        assumed.
-    numDependencies : size_t
-        Number of dependencies to be added
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    edgeData = [] if edgeData is None else edgeData
-    if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData):
-        raise TypeError("Argument 'edgeData' is not instance of type (expected Tuple[cydriver.CUgraphEdgeData,] or List[cydriver.CUgraphEdgeData,]")
-    to = [] if to is None else to
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cydriver.CUgraphNode*> calloc(len(from_), sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cydriver.CUgraphNode>(<CUgraphNode>from_[idx])._ptr[0]
-    cdef cydriver.CUgraphNode* cyto = NULL
-    if len(to) > 0:
-        cyto = <cydriver.CUgraphNode*> calloc(len(to), sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cydriver.CUgraphNode>(<CUgraphNode>to[idx])._ptr[0]
-    cdef cydriver.CUgraphEdgeData* cyedgeData = NULL
-    if len(edgeData) > 0:
-        cyedgeData = <cydriver.CUgraphEdgeData*> calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-        for idx in range(len(edgeData)):
-            string.memcpy(&cyedgeData[idx], (<CUgraphEdgeData>edgeData[idx])._ptr, sizeof(cydriver.CUgraphEdgeData))
-    err = cydriver.cuGraphAddDependencies_v2(cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cydriver.CUgraphNode*>(<CUgraphNode>to[0])._ptr if len(to) == 1 else cyto, (<CUgraphEdgeData>edgeData[0])._ptr if len(edgeData) == 1 else cyedgeData, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphRemoveDependencies(hGraph, from_ : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], to : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies):
-    """ Removes dependency edges from a graph.
-
-    The number of `dependencies` to be removed is defined by
-    `numDependencies`. Elements in `from` and `to` at corresponding indices
-    define a dependency. Each node in `from` and `to` must belong to
-    `hGraph`.
-
-    If `numDependencies` is 0, elements in `from` and `to` will be ignored.
-    Specifying a non-existing dependency will return an error.
-
-    Dependencies cannot be removed from graphs which contain allocation or
-    free nodes. Any attempt to do so will return an error.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph from which to remove dependencies
-    from : List[:py:obj:`~.CUgraphNode`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.CUgraphNode`]
-        Array of dependent nodes
-    numDependencies : size_t
-        Number of dependencies to be removed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    to = [] if to is None else to
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cydriver.CUgraphNode*> calloc(len(from_), sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cydriver.CUgraphNode>(<CUgraphNode>from_[idx])._ptr[0]
-    cdef cydriver.CUgraphNode* cyto = NULL
-    if len(to) > 0:
-        cyto = <cydriver.CUgraphNode*> calloc(len(to), sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cydriver.CUgraphNode>(<CUgraphNode>to[idx])._ptr[0]
-    err = cydriver.cuGraphRemoveDependencies(cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cydriver.CUgraphNode*>(<CUgraphNode>to[0])._ptr if len(to) == 1 else cyto, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphRemoveDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphRemoveDependencies_v2(hGraph, from_ : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], to : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], edgeData : Optional[Tuple[CUgraphEdgeData] | List[CUgraphEdgeData]], size_t numDependencies):
-    """ Removes dependency edges from a graph (12.3+)
-
-    The number of `dependencies` to be removed is defined by
-    `numDependencies`. Elements in `from` and `to` at corresponding indices
-    define a dependency. Each node in `from` and `to` must belong to
-    `hGraph`.
-
-    If `numDependencies` is 0, elements in `from` and `to` will be ignored.
-    Specifying an edge that does not exist in the graph, with data matching
-    `edgeData`, results in an error. `edgeData` is nullable, which is
-    equivalent to passing default (zeroed) data for each edge.
-
-    Dependencies cannot be removed from graphs which contain allocation or
-    free nodes. Any attempt to do so will return an error.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph from which to remove dependencies
-    from : List[:py:obj:`~.CUgraphNode`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.CUgraphNode`]
-        Array of dependent nodes
-    edgeData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional array of edge data. If NULL, edge data is assumed to be
-        default (zeroed).
-    numDependencies : size_t
-        Number of dependencies to be removed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes`
-    """
-    edgeData = [] if edgeData is None else edgeData
-    if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData):
-        raise TypeError("Argument 'edgeData' is not instance of type (expected Tuple[cydriver.CUgraphEdgeData,] or List[cydriver.CUgraphEdgeData,]")
-    to = [] if to is None else to
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphNode* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cydriver.CUgraphNode*> calloc(len(from_), sizeof(cydriver.CUgraphNode))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cydriver.CUgraphNode>(<CUgraphNode>from_[idx])._ptr[0]
-    cdef cydriver.CUgraphNode* cyto = NULL
-    if len(to) > 0:
-        cyto = <cydriver.CUgraphNode*> calloc(len(to), sizeof(cydriver.CUgraphNode))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cydriver.CUgraphNode>(<CUgraphNode>to[idx])._ptr[0]
-    cdef cydriver.CUgraphEdgeData* cyedgeData = NULL
-    if len(edgeData) > 0:
-        cyedgeData = <cydriver.CUgraphEdgeData*> calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-        for idx in range(len(edgeData)):
-            string.memcpy(&cyedgeData[idx], (<CUgraphEdgeData>edgeData[idx])._ptr, sizeof(cydriver.CUgraphEdgeData))
-    err = cydriver.cuGraphRemoveDependencies_v2(cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cydriver.CUgraphNode*>(<CUgraphNode>to[0])._ptr if len(to) == 1 else cyto, (<CUgraphEdgeData>edgeData[0])._ptr if len(edgeData) == 1 else cyedgeData, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphDestroyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphDestroyNode(hNode):
-    """ Remove a node from the graph.
-
-    Removes `hNode` from its graph. This operation also severs any
-    dependencies of other nodes on `hNode` and vice versa.
-
-    Nodes which belong to a graph which contains allocation or free nodes
-    cannot be destroyed. Any attempt to do so will return an error.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to remove
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    err = cydriver.cuGraphDestroyNode(cyhNode)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphInstantiateWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphInstantiate(hGraph, unsigned long long flags):
-    """ Creates an executable graph from a graph.
-
-    Instantiates `hGraph` as an executable graph. The graph is validated
-    for any structural constraints or intra-node constraints which were not
-    previously validated. If instantiation is successful, a handle to the
-    instantiated graph is returned in `phGraphExec`.
-
-    The `flags` parameter controls the behavior of instantiation and
-    subsequent graph launches. Valid flags are:
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH`, which
-      configures a graph containing memory allocation nodes to
-      automatically free any unfreed memory allocations before the graph is
-      relaunched.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH`, which
-      configures the graph for launch from the device. If this flag is
-      passed, the executable graph handle returned can be used to launch
-      the graph from both the host and device. This flag can only be used
-      on platforms which support unified addressing. This flag cannot be
-      used in conjunction with
-      :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH`.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY`, which
-      causes the graph to use the priorities from the per-node attributes
-      rather than the priority of the launch stream during execution. Note
-      that priorities are only available on kernel nodes, and are copied
-      from stream priority during stream capture.
-
-    If `hGraph` contains any allocation or free nodes, there can be at most
-    one executable graph in existence for that graph at a time. An attempt
-    to instantiate a second executable graph before destroying the first
-    with :py:obj:`~.cuGraphExecDestroy` will result in an error. The same
-    also applies if `hGraph` contains any device-updatable kernel nodes.
-
-    If `hGraph` contains kernels which call device-side cudaGraphLaunch()
-    from multiple contexts, this will result in an error.
-
-    Graphs instantiated for launch on the device have additional
-    restrictions which do not apply to host graphs:
-
-    - The graph's nodes must reside on a single context.
-
-    - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
-      and child graph nodes.
-
-    - The graph cannot be empty and must contain at least one kernel,
-      memcpy, or memset node. Operation-specific restrictions are outlined
-      below.
-
-    - Kernel nodes:
-
-      - Use of CUDA Dynamic Parallelism is not permitted.
-
-      - Cooperative launches are permitted as long as MPS is not in use.
-
-    - Memcpy nodes:
-
-      - Only copies involving device memory and/or pinned device-mapped
-        host memory are permitted.
-
-      - Copies involving CUDA arrays are not permitted.
-
-      - Both operands must be accessible from the current context, and the
-        current context must match the context of other nodes in the graph.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to instantiate
-    flags : unsigned long long
-        Flags to control instantiation. See
-        :py:obj:`~.CUgraphInstantiate_flags`.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phGraphExec : :py:obj:`~.CUgraphExec`
-        Returns instantiated graph
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphUpload`, :py:obj:`~.cuGraphLaunch`, :py:obj:`~.cuGraphExecDestroy`
-    """
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphExec phGraphExec = CUgraphExec()
-    err = cydriver.cuGraphInstantiate(<cydriver.CUgraphExec*>phGraphExec._ptr, cyhGraph, flags)
-    return (CUresult(err), phGraphExec)
-{{endif}}
-
-{{if 'cuGraphInstantiateWithParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphInstantiateWithParams(hGraph, instantiateParams : Optional[CUDA_GRAPH_INSTANTIATE_PARAMS]):
-    """ Creates an executable graph from a graph.
-
-    Instantiates `hGraph` as an executable graph according to the
-    `instantiateParams` structure. The graph is validated for any
-    structural constraints or intra-node constraints which were not
-    previously validated. If instantiation is successful, a handle to the
-    instantiated graph is returned in `phGraphExec`.
-
-    `instantiateParams` controls the behavior of instantiation and
-    subsequent graph launches, as well as returning more detailed
-    information in the event of an error.
-    :py:obj:`~.CUDA_GRAPH_INSTANTIATE_PARAMS` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The `flags` field controls the behavior of instantiation and subsequent
-    graph launches. Valid flags are:
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH`, which
-      configures a graph containing memory allocation nodes to
-      automatically free any unfreed memory allocations before the graph is
-      relaunched.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD`, which will perform an
-      upload of the graph into `hUploadStream` once the graph has been
-      instantiated.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH`, which
-      configures the graph for launch from the device. If this flag is
-      passed, the executable graph handle returned can be used to launch
-      the graph from both the host and device. This flag can only be used
-      on platforms which support unified addressing. This flag cannot be
-      used in conjunction with
-      :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH`.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY`, which
-      causes the graph to use the priorities from the per-node attributes
-      rather than the priority of the launch stream during execution. Note
-      that priorities are only available on kernel nodes, and are copied
-      from stream priority during stream capture.
-
-    If `hGraph` contains any allocation or free nodes, there can be at most
-    one executable graph in existence for that graph at a time. An attempt
-    to instantiate a second executable graph before destroying the first
-    with :py:obj:`~.cuGraphExecDestroy` will result in an error. The same
-    also applies if `hGraph` contains any device-updatable kernel nodes.
-
-    If `hGraph` contains kernels which call device-side cudaGraphLaunch()
-    from multiple contexts, this will result in an error.
-
-    Graphs instantiated for launch on the device have additional
-    restrictions which do not apply to host graphs:
-
-    - The graph's nodes must reside on a single context.
-
-    - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
-      and child graph nodes.
-
-    - The graph cannot be empty and must contain at least one kernel,
-      memcpy, or memset node. Operation-specific restrictions are outlined
-      below.
-
-    - Kernel nodes:
-
-      - Use of CUDA Dynamic Parallelism is not permitted.
-
-      - Cooperative launches are permitted as long as MPS is not in use.
-
-    - Memcpy nodes:
-
-      - Only copies involving device memory and/or pinned device-mapped
-        host memory are permitted.
-
-      - Copies involving CUDA arrays are not permitted.
-
-      - Both operands must be accessible from the current context, and the
-        current context must match the context of other nodes in the graph.
-
-    In the event of an error, the `result_out` and `hErrNode_out` fields
-    will contain more information about the nature of the error. Possible
-    error reporting includes:
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_ERROR`, if passed an invalid value
-      or if an unexpected error occurred which is described by the return
-      value of the function. `hErrNode_out` will be set to NULL.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE`, if the graph
-      structure is invalid. `hErrNode_out` will be set to one of the
-      offending nodes.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED`, if
-      the graph is instantiated for device launch but contains a node of an
-      unsupported node type, or a node which performs unsupported
-      operations, such as use of CUDA dynamic parallelism within a kernel
-      node. `hErrNode_out` will be set to this node.
-
-    - :py:obj:`~.CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED`, if
-      the graph is instantiated for device launch but a node’s context
-      differs from that of another node. This error can also be returned if
-      a graph is not instantiated for device launch and it contains kernels
-      which call device-side cudaGraphLaunch() from multiple contexts.
-      `hErrNode_out` will be set to this node.
-
-    If instantiation is successful, `result_out` will be set to
-    :py:obj:`~.CUDA_GRAPH_INSTANTIATE_SUCCESS`, and `hErrNode_out` will be
-    set to NULL.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to instantiate
-    instantiateParams : :py:obj:`~.CUDA_GRAPH_INSTANTIATE_PARAMS`
-        Instantiation parameters
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    phGraphExec : :py:obj:`~.CUgraphExec`
-        Returns instantiated graph
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphExecDestroy`
-    """
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphExec phGraphExec = CUgraphExec()
-    cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS* cyinstantiateParams_ptr = instantiateParams._ptr if instantiateParams != None else NULL
-    err = cydriver.cuGraphInstantiateWithParams(<cydriver.CUgraphExec*>phGraphExec._ptr, cyhGraph, cyinstantiateParams_ptr)
-    return (CUresult(err), phGraphExec)
-{{endif}}
-
-{{if 'cuGraphExecGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecGetFlags(hGraphExec):
-    """ Query the instantiation flags of an executable graph.
-
-    Returns the flags that were passed to instantiation for the given
-    executable graph. :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD` will
-    not be returned by this API as it does not affect the resulting
-    executable graph.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph to query
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    flags : :py:obj:`~.cuuint64_t`
-        Returns the instantiation flags
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphInstantiateWithParams`
-    """
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cuuint64_t flags = cuuint64_t()
-    err = cydriver.cuGraphExecGetFlags(cyhGraphExec, <cydriver.cuuint64_t*>flags._ptr)
-    return (CUresult(err), flags)
-{{endif}}
-
-{{if 'cuGraphExecKernelNodeSetParams_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_KERNEL_NODE_PARAMS]):
-    """ Sets the parameters for a kernel node in the given graphExec.
-
-    Sets the parameters of a kernel node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    `hNode` must not have been removed from the original graph. All
-    `nodeParams` fields may change, but the following restrictions apply to
-    `func` updates:
-
-    - The owning context of the function cannot change.
-
-    - A node whose function originally did not use CUDA dynamic parallelism
-      cannot be updated to a function which uses CDP
-
-    - A node whose function originally did not make device-side update
-      calls cannot be updated to a function which makes device-side update
-      calls.
-
-    - If `hGraphExec` was not instantiated for device launch, a node whose
-      function originally did not use device-side cudaGraphLaunch() cannot
-      be updated to a function which uses device-side cudaGraphLaunch()
-      unless the node resides on the same context as nodes which contained
-      such calls at instantiate-time. If no such calls were present at
-      instantiation, these updates cannot be performed at all.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    If `hNode` is a device-updatable kernel node, the next upload/launch of
-    `hGraphExec` will overwrite any previous device-side updates.
-    Additionally, applying host updates to a device-updatable kernel node
-    while it is being updated from the device will result in undefined
-    behavior.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        kernel node from the graph from which graphExec was instantiated
-    nodeParams : :py:obj:`~.CUDA_KERNEL_NODE_PARAMS`
-        Updated Parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecKernelNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecMemcpyNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams : Optional[CUDA_MEMCPY3D], ctx):
-    """ Sets the parameters for a memcpy node in the given graphExec.
-
-    Updates the work represented by `hNode` in `hGraphExec` as though
-    `hNode` had contained `copyParams` at instantiation. hNode must remain
-    in the graph which was used to instantiate `hGraphExec`. Changed edges
-    to and from hNode are ignored.
-
-    The source and destination memory in `copyParams` must be allocated
-    from the same contexts as the original source and destination memory.
-    Both the instantiation-time memory operands and the memory operands in
-    `copyParams` must be 1-dimensional. Zero-length operations are not
-    supported.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. hNode is also not modified by this call.
-
-    Returns CUDA_ERROR_INVALID_VALUE if the memory operands' mappings
-    changed or either the original or new memory operands are
-    multidimensional.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Memcpy node from the graph which was used to instantiate graphExec
-    copyParams : :py:obj:`~.CUDA_MEMCPY3D`
-        The updated parameters to set
-    ctx : :py:obj:`~.CUcontext`
-        Context on which to run the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._ptr if copyParams != None else NULL
-    err = cydriver.cuGraphExecMemcpyNodeSetParams(cyhGraphExec, cyhNode, cycopyParams_ptr, cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecMemsetNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams : Optional[CUDA_MEMSET_NODE_PARAMS], ctx):
-    """ Sets the parameters for a memset node in the given graphExec.
-
-    Updates the work represented by `hNode` in `hGraphExec` as though
-    `hNode` had contained `memsetParams` at instantiation. hNode must
-    remain in the graph which was used to instantiate `hGraphExec`. Changed
-    edges to and from hNode are ignored.
-
-    Zero sized operations are not supported.
-
-    The new destination pointer in memsetParams must be to the same kind of
-    allocation as the original destination pointer and have the same
-    context association and device mapping as the original destination
-    pointer.
-
-    Both the value and pointer address may be updated.   Changing other
-    aspects of the memset (width, height, element size or pitch) may cause
-    the update to be rejected. Specifically, for 2d memsets, all dimension
-    changes are rejected. For 1d memsets, changes in height are explicitly
-    rejected and other changes are oportunistically allowed if the
-    resulting work maps onto the work resources already allocated for the
-    node.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. hNode is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Memset node from the graph which was used to instantiate graphExec
-    memsetParams : :py:obj:`~.CUDA_MEMSET_NODE_PARAMS`
-        The updated parameters to set
-    ctx : :py:obj:`~.CUcontext`
-        Context on which to run the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._ptr if memsetParams != None else NULL
-    err = cydriver.cuGraphExecMemsetNodeSetParams(cyhGraphExec, cyhNode, cymemsetParams_ptr, cyctx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecHostNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_HOST_NODE_PARAMS]):
-    """ Sets the parameters for a host node in the given graphExec.
-
-    Updates the work represented by `hNode` in `hGraphExec` as though
-    `hNode` had contained `nodeParams` at instantiation. hNode must remain
-    in the graph which was used to instantiate `hGraphExec`. Changed edges
-    to and from hNode are ignored.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. hNode is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Host node from the graph which was used to instantiate graphExec
-    nodeParams : :py:obj:`~.CUDA_HOST_NODE_PARAMS`
-        The updated parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecHostNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecChildGraphNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecChildGraphNodeSetParams(hGraphExec, hNode, childGraph):
-    """ Updates node parameters in the child graph node in the given graphExec.
-
-    Updates the work represented by `hNode` in `hGraphExec` as though the
-    nodes contained in `hNode's` graph had the parameters contained in
-    `childGraph's` nodes at instantiation. `hNode` must remain in the graph
-    which was used to instantiate `hGraphExec`. Changed edges to and from
-    `hNode` are ignored.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    The topology of `childGraph`, as well as the node insertion order, must
-    match that of the graph contained in `hNode`. See
-    :py:obj:`~.cuGraphExecUpdate()` for a list of restrictions on what can
-    be updated in an instantiated graph. The update is recursive, so child
-    graph nodes contained within the top level child graph will also be
-    updated.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Host node from the graph which was used to instantiate graphExec
-    childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph supplying the updated parameters
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraph cychildGraph
-    if childGraph is None:
-        cychildGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(childGraph, (CUgraph,)):
-        pchildGraph = int(childGraph)
-        cychildGraph = <cydriver.CUgraph><void_ptr>pchildGraph
-    else:
-        pchildGraph = int(CUgraph(childGraph))
-        cychildGraph = <cydriver.CUgraph><void_ptr>pchildGraph
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphExecChildGraphNodeSetParams(cyhGraphExec, cyhNode, cychildGraph)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event):
-    """ Sets the event for an event record node in the given graphExec.
-
-    Sets the event of an event record node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        event record node from the graph from which graphExec was
-        instantiated
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Updated event to use
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event):
-    """ Sets the event for an event wait node in the given graphExec.
-
-    Sets the event of an event wait node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        event wait node from the graph from which graphExec was
-        instantiated
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Updated event to use
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUevent cyevent
-    if event is None:
-        cyevent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(event, (CUevent,)):
-        pevent = int(event)
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    else:
-        pevent = int(CUevent(event))
-        cyevent = <cydriver.CUevent><void_ptr>pevent
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_EXT_SEM_SIGNAL_NODE_PARAMS]):
-    """ Sets the parameters for an external semaphore signal node in the given graphExec.
-
-    Sets the parameters of an external semaphore signal node in an
-    executable graph `hGraphExec`. The node is identified by the
-    corresponding node `hNode` in the non-executable graph, from which the
-    executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Changing `nodeParams->numExtSems` is not supported.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        semaphore signal node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS`
-        Updated Parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_EXT_SEM_WAIT_NODE_PARAMS]):
-    """ Sets the parameters for an external semaphore wait node in the given graphExec.
-
-    Sets the parameters of an external semaphore wait node in an executable
-    graph `hGraphExec`. The node is identified by the corresponding node
-    `hNode` in the non-executable graph, from which the executable graph
-    was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Changing `nodeParams->numExtSems` is not supported.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        semaphore wait node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.CUDA_EXT_SEM_WAIT_NODE_PARAMS`
-        Updated Parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphNodeSetEnabled' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled):
-    """ Enables or disables the specified node in the given graphExec.
-
-    Sets `hNode` to be either enabled or disabled. Disabled nodes are
-    functionally equivalent to empty nodes until they are reenabled.
-    Existing node parameters are not affected by disabling/enabling the
-    node.
-
-    The node is identified by the corresponding node `hNode` in the non-
-    executable graph, from which the executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    If `hNode` is a device-updatable kernel node, the next upload/launch of
-    `hGraphExec` will overwrite any previous device-side updates.
-    Additionally, applying host updates to a device-updatable kernel node
-    while it is being updated from the device will result in undefined
-    behavior.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node from the graph from which graphExec was instantiated
-    isEnabled : unsigned int
-        Node is enabled if != 0, otherwise the node is disabled
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeGetEnabled`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` :py:obj:`~.cuGraphLaunch`
-
-    Notes
-    -----
-    Currently only kernel, memset and memcpy nodes are supported.
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphNodeGetEnabled' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeGetEnabled(hGraphExec, hNode):
-    """ Query whether a node in the given graphExec is enabled.
-
-    Sets isEnabled to 1 if `hNode` is enabled, or 0 if `hNode` is disabled.
-
-    The node is identified by the corresponding node `hNode` in the non-
-    executable graph, from which the executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node from the graph from which graphExec was instantiated
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`,
-    isEnabled : unsigned int
-        Location to return the enabled status of the node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphNodeSetEnabled`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` :py:obj:`~.cuGraphLaunch`
-
-    Notes
-    -----
-    Currently only kernel, memset and memcpy nodes are supported.
-
-    This function will not reflect device-side updates for device-updatable kernel nodes.
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef unsigned int isEnabled = 0
-    err = cydriver.cuGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled)
-    return (CUresult(err), isEnabled)
-{{endif}}
-
-{{if 'cuGraphUpload' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphUpload(hGraphExec, hStream):
-    """ Uploads an executable graph in a stream.
-
-    Uploads `hGraphExec` to the device in `hStream` without executing it.
-    Uploads of the same `hGraphExec` will be serialized. Each upload is
-    ordered behind both any previous work in `hStream` and any previous
-    launches of `hGraphExec`. Uses memory cached by `stream` to back the
-    allocations owned by `hGraphExec`.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to upload
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to upload the graph
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphLaunch`, :py:obj:`~.cuGraphExecDestroy`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphUpload(cyhGraphExec, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphLaunch' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphLaunch(hGraphExec, hStream):
-    """ Launches an executable graph in a stream.
-
-    Executes `hGraphExec` in `hStream`. Only one instance of `hGraphExec`
-    may be executing at a time. Each launch is ordered behind both any
-    previous work in `hStream` and any previous launches of `hGraphExec`.
-    To execute a graph concurrently, it must be instantiated multiple times
-    into multiple executable graphs.
-
-    If any allocations created by `hGraphExec` remain unfreed (from a
-    previous launch) and `hGraphExec` was not instantiated with
-    :py:obj:`~.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH`, the launch
-    will fail with :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to launch
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to launch the graph
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphUpload`, :py:obj:`~.cuGraphExecDestroy`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphLaunch(cyhGraphExec, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecDestroy(hGraphExec):
-    """ Destroys an executable graph.
-
-    Destroys the executable graph specified by `hGraphExec`, as well as all
-    of its executable nodes. If the executable graph is in-flight, it will
-    not be terminated, but rather freed asynchronously on completion.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphUpload`, :py:obj:`~.cuGraphLaunch`
-    """
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    err = cydriver.cuGraphExecDestroy(cyhGraphExec)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphDestroy(hGraph):
-    """ Destroys a graph.
-
-    Destroys the graph specified by `hGraph`, as well as all of its nodes.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    err = cydriver.cuGraphDestroy(cyhGraph)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecUpdate_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecUpdate(hGraphExec, hGraph):
-    """ Check whether an executable graph can be updated with a graph and perform the update if possible.
-
-    Updates the node parameters in the instantiated graph specified by
-    `hGraphExec` with the node parameters in a topologically identical
-    graph specified by `hGraph`.
-
-    Limitations:
-
-    - Kernel nodes:
-
-      - The owning context of the function cannot change.
-
-      - A node whose function originally did not use CUDA dynamic
-        parallelism cannot be updated to a function which uses CDP.
-
-      - A node whose function originally did not make device-side update
-        calls cannot be updated to a function which makes device-side
-        update calls.
-
-      - A cooperative node cannot be updated to a non-cooperative node, and
-        vice-versa.
-
-      - If the graph was instantiated with
-        CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY, the priority
-        attribute cannot change. Equality is checked on the originally
-        requested priority values, before they are clamped to the device's
-        supported range.
-
-      - If `hGraphExec` was not instantiated for device launch, a node
-        whose function originally did not use device-side cudaGraphLaunch()
-        cannot be updated to a function which uses device-side
-        cudaGraphLaunch() unless the node resides on the same context as
-        nodes which contained such calls at instantiate-time. If no such
-        calls were present at instantiation, these updates cannot be
-        performed at all.
-
-      - Neither `hGraph` nor `hGraphExec` may contain device-updatable
-        kernel nodes.
-
-    - Memset and memcpy nodes:
-
-      - The CUDA device(s) to which the operand(s) was allocated/mapped
-        cannot change.
-
-      - The source/destination memory must be allocated from the same
-        contexts as the original source/destination memory.
-
-      - For 2d memsets, only address and assinged value may be updated.
-
-      - For 1d memsets, updating dimensions is also allowed, but may fail
-        if the resulting operation doesn't map onto the work resources
-        already allocated for the node.
-
-    - Additional memcpy node restrictions:
-
-      - Changing either the source or destination memory type(i.e.
-        CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
-
-    - External semaphore wait nodes and record nodes:
-
-      - Changing the number of semaphores is not supported.
-
-    - Conditional nodes:
-
-      - Changing node parameters is not supported.
-
-      - Changeing parameters of nodes within the conditional body graph is
-        subject to the rules above.
-
-      - Conditional handle flags and default values are updated as part of
-        the graph update.
-
-    Note: The API may add further restrictions in future releases. The
-    return code should always be checked.
-
-    cuGraphExecUpdate sets the result member of `resultInfo` to
-    CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED under the following
-    conditions:
-
-    - The count of nodes directly in `hGraphExec` and `hGraph` differ, in
-      which case resultInfo->errorNode is set to NULL.
-
-    - `hGraph` has more exit nodes than `hGraph`, in which case
-      resultInfo->errorNode is set to one of the exit nodes in hGraph.
-
-    - A node in `hGraph` has a different number of dependencies than the
-      node from `hGraphExec` it is paired with, in which case
-      resultInfo->errorNode is set to the node from `hGraph`.
-
-    - A node in `hGraph` has a dependency that does not match with the
-      corresponding dependency of the paired node from `hGraphExec`.
-      resultInfo->errorNode will be set to the node from `hGraph`.
-      resultInfo->errorFromNode will be set to the mismatched dependency.
-      The dependencies are paired based on edge order and a dependency does
-      not match when the nodes are already paired based on other edges
-      examined in the graph.
-
-    cuGraphExecUpdate sets the result member of `resultInfo` to:
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value.
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology
-      changed
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node
-      changed, in which case `hErrorNode_out` is set to the node from
-      `hGraph`.
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE if the
-      function changed in an unsupported way(see note above), in which case
-      `hErrorNode_out` is set to the node from `hGraph`
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a
-      node changed in a way that is not supported, in which case
-      `hErrorNode_out` is set to the node from `hGraph`.
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED if any attributes of a
-      node changed in a way that is not supported, in which case
-      `hErrorNode_out` is set to the node from `hGraph`.
-
-    - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is
-      unsupported, like the node's type or configuration, in which case
-      `hErrorNode_out` is set to the node from `hGraph`
-
-    If the update fails for a reason not listed above, the result member of
-    `resultInfo` will be set to CU_GRAPH_EXEC_UPDATE_ERROR. If the update
-    succeeds, the result member will be set to
-    CU_GRAPH_EXEC_UPDATE_SUCCESS.
-
-    cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed
-    successfully. It returns CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the
-    graph update was not performed because it included changes which
-    violated constraints specific to instantiated graph update.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The instantiated graph to be updated
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph containing the updated parameters
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE`,
-    resultInfo : :py:obj:`~.CUgraphExecUpdateResultInfo`
-        the error info structure
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef CUgraphExecUpdateResultInfo resultInfo = CUgraphExecUpdateResultInfo()
-    err = cydriver.cuGraphExecUpdate(cyhGraphExec, cyhGraph, <cydriver.CUgraphExecUpdateResultInfo*>resultInfo._ptr)
-    return (CUresult(err), resultInfo)
-{{endif}}
-
-{{if 'cuGraphKernelNodeCopyAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphKernelNodeCopyAttributes(dst, src):
-    """ Copies attributes from source node to destination node.
-
-    Copies attributes from source node `src` to destination node `dst`.
-    Both node must have the same context.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Destination node
-    src : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Source node For list of attributes see
-        :py:obj:`~.CUkernelNodeAttrID`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUgraphNode cysrc
-    if src is None:
-        cysrc = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(src, (CUgraphNode,)):
-        psrc = int(src)
-        cysrc = <cydriver.CUgraphNode><void_ptr>psrc
-    else:
-        psrc = int(CUgraphNode(src))
-        cysrc = <cydriver.CUgraphNode><void_ptr>psrc
-    cdef cydriver.CUgraphNode cydst
-    if dst is None:
-        cydst = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(dst, (CUgraphNode,)):
-        pdst = int(dst)
-        cydst = <cydriver.CUgraphNode><void_ptr>pdst
-    else:
-        pdst = int(CUgraphNode(dst))
-        cydst = <cydriver.CUgraphNode><void_ptr>pdst
-    err = cydriver.cuGraphKernelNodeCopyAttributes(cydst, cysrc)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphKernelNodeGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphKernelNodeGetAttribute(hNode, attr not None : CUkernelNodeAttrID):
-    """ Queries node attribute.
-
-    Queries attribute `attr` from node `hNode` and stores it in
-    corresponding member of `value_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-
-    attr : :py:obj:`~.CUkernelNodeAttrID`
-
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-    value_out : :py:obj:`~.CUkernelNodeAttrValue`
-
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUkernelNodeAttrID cyattr = attr.value
-    cdef CUkernelNodeAttrValue value_out = CUkernelNodeAttrValue()
-    err = cydriver.cuGraphKernelNodeGetAttribute(cyhNode, cyattr, <cydriver.CUkernelNodeAttrValue*>value_out._ptr)
-    return (CUresult(err), value_out)
-{{endif}}
-
-{{if 'cuGraphKernelNodeSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphKernelNodeSetAttribute(hNode, attr not None : CUkernelNodeAttrID, value : Optional[CUkernelNodeAttrValue]):
-    """ Sets node attribute.
-
-    Sets attribute `attr` on node `hNode` from corresponding attribute of
-    `value`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-
-    attr : :py:obj:`~.CUkernelNodeAttrID`
-
-    value : :py:obj:`~.CUkernelNodeAttrValue`
-
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`
-
-    See Also
-    --------
-    :py:obj:`~.CUaccessPolicyWindow`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUkernelNodeAttrID cyattr = attr.value
-    cdef cydriver.CUkernelNodeAttrValue* cyvalue_ptr = value._ptr if value != None else NULL
-    err = cydriver.cuGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphDebugDotPrint' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphDebugDotPrint(hGraph, char* path, unsigned int flags):
-    """ Write a DOT file describing graph structure.
-
-    Using the provided `hGraph`, write to `path` a DOT formatted
-    description of the graph. By default this includes the graph topology,
-    node types, node id, kernel names and memcpy direction. `flags` can be
-    specified to write more detailed information about each node type such
-    as parameter values, kernel attributes, node and function handles.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to create a DOT file from
-    path : bytes
-        The path to write the DOT file to
-    flags : unsigned int
-        Flags from CUgraphDebugDot_flags for specifying which additional
-        node information to write
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OPERATING_SYSTEM`
-    """
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    err = cydriver.cuGraphDebugDotPrint(cyhGraph, path, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuUserObjectCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int flags):
-    """ Create a user object.
-
-    Create a user object with the specified destructor callback and initial
-    reference count. The initial references are owned by the caller.
-
-    Destructor callbacks cannot make CUDA API calls and should avoid
-    blocking behavior, as they are executed by a shared internal thread.
-    Another thread may be signaled to perform such actions, if it does not
-    block forward progress of tasks scheduled through CUDA.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    ptr : Any
-        The pointer to pass to the destroy function
-    destroy : :py:obj:`~.CUhostFn`
-        Callback to free the user object when it is no longer in use
-    initialRefcount : unsigned int
-        The initial refcount to create the object with, typically 1. The
-        initial references are owned by the calling thread.
-    flags : unsigned int
-        Currently it is required to pass
-        :py:obj:`~.CU_USER_OBJECT_NO_DESTRUCTOR_SYNC`, which is the only
-        defined flag. This indicates that the destroy callback cannot be
-        waited on by any CUDA API. Users requiring synchronization of the
-        callback should signal its completion manually.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    object_out : :py:obj:`~.CUuserObject`
-        Location to return the user object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUhostFn cydestroy
-    if destroy is None:
-        cydestroy = <cydriver.CUhostFn><void_ptr>0
-    elif isinstance(destroy, (CUhostFn,)):
-        pdestroy = int(destroy)
-        cydestroy = <cydriver.CUhostFn><void_ptr>pdestroy
-    else:
-        pdestroy = int(CUhostFn(destroy))
-        cydestroy = <cydriver.CUhostFn><void_ptr>pdestroy
-    cdef CUuserObject object_out = CUuserObject()
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    err = cydriver.cuUserObjectCreate(<cydriver.CUuserObject*>object_out._ptr, cyptr_ptr, cydestroy, initialRefcount, flags)
-    return (CUresult(err), object_out)
-{{endif}}
-
-{{if 'cuUserObjectRetain' in found_functions}}
-
-@cython.embedsignature(True)
-def cuUserObjectRetain(object, unsigned int count):
-    """ Retain a reference to a user object.
-
-    Retains new references to a user object. The new references are owned
-    by the caller.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    object : :py:obj:`~.CUuserObject`
-        The object to retain
-    count : unsigned int
-        The number of references to retain, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUuserObject cyobject
-    if object is None:
-        cyobject = <cydriver.CUuserObject><void_ptr>0
-    elif isinstance(object, (CUuserObject,)):
-        pobject = int(object)
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    else:
-        pobject = int(CUuserObject(object))
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    err = cydriver.cuUserObjectRetain(cyobject, count)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuUserObjectRelease' in found_functions}}
-
-@cython.embedsignature(True)
-def cuUserObjectRelease(object, unsigned int count):
-    """ Release a reference to a user object.
-
-    Releases user object references owned by the caller. The object's
-    destructor is invoked if the reference count reaches zero.
-
-    It is undefined behavior to release references not owned by the caller,
-    or to use a user object handle after all references are released.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    object : :py:obj:`~.CUuserObject`
-        The object to release
-    count : unsigned int
-        The number of references to release, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUuserObject cyobject
-    if object is None:
-        cyobject = <cydriver.CUuserObject><void_ptr>0
-    elif isinstance(object, (CUuserObject,)):
-        pobject = int(object)
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    else:
-        pobject = int(CUuserObject(object))
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    err = cydriver.cuUserObjectRelease(cyobject, count)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphRetainUserObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphRetainUserObject(graph, object, unsigned int count, unsigned int flags):
-    """ Retain a reference to a user object from a graph.
-
-    Creates or moves user object references that will be owned by a CUDA
-    graph.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to associate the reference with
-    object : :py:obj:`~.CUuserObject`
-        The user object to retain a reference for
-    count : unsigned int
-        The number of references to add to the graph, typically 1. Must be
-        nonzero and not larger than INT_MAX.
-    flags : unsigned int
-        The optional flag :py:obj:`~.CU_GRAPH_USER_OBJECT_MOVE` transfers
-        references from the calling thread, rather than create new
-        references. Pass 0 to create new references.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUuserObject cyobject
-    if object is None:
-        cyobject = <cydriver.CUuserObject><void_ptr>0
-    elif isinstance(object, (CUuserObject,)):
-        pobject = int(object)
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    else:
-        pobject = int(CUuserObject(object))
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    cdef cydriver.CUgraph cygraph
-    if graph is None:
-        cygraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(graph, (CUgraph,)):
-        pgraph = int(graph)
-        cygraph = <cydriver.CUgraph><void_ptr>pgraph
-    else:
-        pgraph = int(CUgraph(graph))
-        cygraph = <cydriver.CUgraph><void_ptr>pgraph
-    err = cydriver.cuGraphRetainUserObject(cygraph, cyobject, count, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphReleaseUserObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphReleaseUserObject(graph, object, unsigned int count):
-    """ Release a user object reference from a graph.
-
-    Releases user object references owned by a graph.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph that will release the reference
-    object : :py:obj:`~.CUuserObject`
-        The user object to release a reference for
-    count : unsigned int
-        The number of references to release, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphCreate`
-    """
-    cdef cydriver.CUuserObject cyobject
-    if object is None:
-        cyobject = <cydriver.CUuserObject><void_ptr>0
-    elif isinstance(object, (CUuserObject,)):
-        pobject = int(object)
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    else:
-        pobject = int(CUuserObject(object))
-        cyobject = <cydriver.CUuserObject><void_ptr>pobject
-    cdef cydriver.CUgraph cygraph
-    if graph is None:
-        cygraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(graph, (CUgraph,)):
-        pgraph = int(graph)
-        cygraph = <cydriver.CUgraph><void_ptr>pgraph
-    else:
-        pgraph = int(CUgraph(graph))
-        cygraph = <cydriver.CUgraph><void_ptr>pgraph
-    err = cydriver.cuGraphReleaseUserObject(cygraph, cyobject, count)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphAddNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddNode(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], size_t numDependencies, nodeParams : Optional[CUgraphNodeParams]):
-    """ Adds a node of arbitrary type to a graph.
-
-    Creates a new node in `hGraph` described by `nodeParams` with
-    `numDependencies` dependencies specified via `dependencies`.
-    `numDependencies` may be 0. `dependencies` may be null if
-    `numDependencies` is 0. `dependencies` may not have any duplicate
-    entries.
-
-    `nodeParams` is a tagged union. The node type should be specified in
-    the `typename` field, and type-specific parameters in the corresponding
-    union member. All unused bytes - that is, `reserved0` and all bytes
-    past the utilized union member - must be set to zero. It is recommended
-    to use brace initialization or memset to ensure all bytes are
-    initialized.
-
-    Note that for some node types, `nodeParams` may contain "out
-    parameters" which are modified during the call, such as
-    `nodeParams->alloc.dptr`.
-
-    A handle to the new node will be returned in `phGraphNode`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUgraphNodeParams`
-        Specification of the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphExecNodeSetParams`
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddNode(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphAddNode_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphAddNode_v2(hGraph, dependencies : Optional[Tuple[CUgraphNode] | List[CUgraphNode]], dependencyData : Optional[Tuple[CUgraphEdgeData] | List[CUgraphEdgeData]], size_t numDependencies, nodeParams : Optional[CUgraphNodeParams]):
-    """ Adds a node of arbitrary type to a graph (12.3+)
-
-    Creates a new node in `hGraph` described by `nodeParams` with
-    `numDependencies` dependencies specified via `dependencies`.
-    `numDependencies` may be 0. `dependencies` may be null if
-    `numDependencies` is 0. `dependencies` may not have any duplicate
-    entries.
-
-    `nodeParams` is a tagged union. The node type should be specified in
-    the `typename` field, and type-specific parameters in the corresponding
-    union member. All unused bytes - that is, `reserved0` and all bytes
-    past the utilized union member - must be set to zero. It is recommended
-    to use brace initialization or memset to ensure all bytes are
-    initialized.
-
-    Note that for some node types, `nodeParams` may contain "out
-    parameters" which are modified during the call, such as
-    `nodeParams->alloc.dptr`.
-
-    A handle to the new node will be returned in `phGraphNode`.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.CUgraphNode`]
-        Dependencies of the node
-    dependencyData : List[:py:obj:`~.CUgraphEdgeData`]
-        Optional edge data for the dependencies. If NULL, the data is
-        assumed to be default (zeroed) for all dependencies.
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.CUgraphNodeParams`
-        Specification of the node
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    phGraphNode : :py:obj:`~.CUgraphNode`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphExecNodeSetParams`
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cydriver.CUgraphEdgeData,] or List[cydriver.CUgraphEdgeData,]")
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cydriver.CUgraphNode,] or List[cydriver.CUgraphNode,]")
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphNode phGraphNode = CUgraphNode()
-    cdef cydriver.CUgraphNode* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cydriver.CUgraphNode*> calloc(len(dependencies), sizeof(cydriver.CUgraphNode))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cydriver.CUgraphNode>(<CUgraphNode>dependencies[idx])._ptr[0]
-    cdef cydriver.CUgraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cydriver.CUgraphEdgeData*> calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<CUgraphEdgeData>dependencyData[idx])._ptr, sizeof(cydriver.CUgraphEdgeData))
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    if numDependencies > <size_t>len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies))
-    cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphAddNode_v2(<cydriver.CUgraphNode*>phGraphNode._ptr, cyhGraph, <cydriver.CUgraphNode*>(<CUgraphNode>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, (<CUgraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cynodeParams_ptr)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (CUresult(err), phGraphNode)
-{{endif}}
-
-{{if 'cuGraphNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphNodeSetParams(hNode, nodeParams : Optional[CUgraphNodeParams]):
-    """ Update's a graph node's parameters.
-
-    Sets the parameters of graph node `hNode` to `nodeParams`. The node
-    type specified by `nodeParams->type` must match the type of `hNode`.
-    `nodeParams` must be fully initialized and all unused bytes (reserved,
-    padding) zeroed.
-
-    Modifying parameters is not supported for node types
-    CU_GRAPH_NODE_TYPE_MEM_ALLOC and CU_GRAPH_NODE_TYPE_MEM_FREE.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.CUgraphNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExecNodeSetParams`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphExecNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphExecNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUgraphNodeParams]):
-    """ Update's a graph node's parameters in an instantiated graph.
-
-    Sets the parameters of a node in an executable graph `hGraphExec`. The
-    node is identified by the corresponding node `hNode` in the non-
-    executable graph from which the executable graph was instantiated.
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Allowed changes to parameters on executable graphs are as follows:
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to update the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Corresponding node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.CUgraphNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphNodeSetParams` :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate`
-    """
-    cdef cydriver.CUgraphNode cyhNode
-    if hNode is None:
-        cyhNode = <cydriver.CUgraphNode><void_ptr>0
-    elif isinstance(hNode, (CUgraphNode,)):
-        phNode = int(hNode)
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    else:
-        phNode = int(CUgraphNode(hNode))
-        cyhNode = <cydriver.CUgraphNode><void_ptr>phNode
-    cdef cydriver.CUgraphExec cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>0
-    elif isinstance(hGraphExec, (CUgraphExec,)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(CUgraphExec(hGraphExec))
-        cyhGraphExec = <cydriver.CUgraphExec><void_ptr>phGraphExec
-    cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cydriver.cuGraphExecNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphConditionalHandleCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphConditionalHandleCreate(hGraph, ctx, unsigned int defaultLaunchValue, unsigned int flags):
-    """ Create a conditional handle.
-
-    Creates a conditional handle associated with `hGraph`.
-
-    The conditional handle must be associated with a conditional node in
-    this graph or one of its children.
-
-    Handles not associated with a conditional node may cause graph
-    instantiation to fail.
-
-    Handles can only be set from the context with which they are
-    associated.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph which will contain the conditional node using this handle.
-    ctx : :py:obj:`~.CUcontext`
-        Context for the handle and associated conditional node.
-    defaultLaunchValue : unsigned int
-        Optional initial value for the conditional variable.
-    flags : unsigned int
-        Currently must be CU_GRAPH_COND_ASSIGN_DEFAULT or 0.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pHandle_out : :py:obj:`~.CUgraphConditionalHandle`
-        Pointer used to return the handle to the caller.
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`
-    """
-    cdef cydriver.CUcontext cyctx
-    if ctx is None:
-        cyctx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(ctx, (CUcontext,)):
-        pctx = int(ctx)
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    else:
-        pctx = int(CUcontext(ctx))
-        cyctx = <cydriver.CUcontext><void_ptr>pctx
-    cdef cydriver.CUgraph cyhGraph
-    if hGraph is None:
-        cyhGraph = <cydriver.CUgraph><void_ptr>0
-    elif isinstance(hGraph, (CUgraph,)):
-        phGraph = int(hGraph)
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    else:
-        phGraph = int(CUgraph(hGraph))
-        cyhGraph = <cydriver.CUgraph><void_ptr>phGraph
-    cdef CUgraphConditionalHandle pHandle_out = CUgraphConditionalHandle()
-    err = cydriver.cuGraphConditionalHandleCreate(<cydriver.CUgraphConditionalHandle*>pHandle_out._ptr, cyhGraph, cyctx, defaultLaunchValue, flags)
-    return (CUresult(err), pHandle_out)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dynamicSMemSize):
-    """ Returns occupancy of a function.
-
-    Returns in `*numBlocks` the number of the maximum active blocks per
-    streaming multiprocessor.
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will be the current context.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel for which occupancy is calculated
-    blockSize : int
-        Block size the kernel is intended to be launched with
-    dynamicSMemSize : size_t
-        Per-block dynamic shared memory usage intended, in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    numBlocks : int
-        Returned occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int numBlocks = 0
-    err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize)
-    return (CUresult(err), numBlocks)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, size_t dynamicSMemSize, unsigned int flags):
-    """ Returns occupancy of a function.
-
-    Returns in `*numBlocks` the number of the maximum active blocks per
-    streaming multiprocessor.
-
-    The `Flags` parameter controls how special cases are handled. The valid
-    flags are:
-
-    - :py:obj:`~.CU_OCCUPANCY_DEFAULT`, which maintains the default
-      behavior as :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor`;
-
-    - :py:obj:`~.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`, which suppresses
-      the default behavior on platform where global caching affects
-      occupancy. On such platforms, if caching is enabled, but per-block SM
-      resource usage would result in zero occupancy, the occupancy
-      calculator will calculate the occupancy as if caching is disabled.
-      Setting :py:obj:`~.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE` makes the
-      occupancy calculator to return 0 in such cases. More information can
-      be found about this feature in the "Unified L1/Texture Cache" section
-      of the Maxwell tuning guide.
-
-    Note that the API can also be with launch context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will be the current context.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel for which occupancy is calculated
-    blockSize : int
-        Block size the kernel is intended to be launched with
-    dynamicSMemSize : size_t
-        Per-block dynamic shared memory usage intended, in bytes
-    flags : unsigned int
-        Requested behavior for the occupancy calculator
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    numBlocks : int
-        Returned occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int numBlocks = 0
-    err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags)
-    return (CUresult(err), numBlocks)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxPotentialBlockSize(func, blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit):
-    """ Suggest a launch configuration with reasonable occupancy.
-
-    Returns in `*blockSize` a reasonable block size that can achieve the
-    maximum occupancy (or, the maximum number of active warps with the
-    fewest blocks per multiprocessor), and in `*minGridSize` the minimum
-    grid size to achieve the maximum occupancy.
-
-    If `blockSizeLimit` is 0, the configurator will use the maximum block
-    size permitted by the device / function instead.
-
-    If per-block dynamic shared memory allocation is not needed, the user
-    should leave both `blockSizeToDynamicSMemSize` and `dynamicSMemSize` as
-    0.
-
-    If per-block dynamic shared memory allocation is needed, then if the
-    dynamic shared memory size is constant regardless of block size, the
-    size should be passed through `dynamicSMemSize`, and
-    `blockSizeToDynamicSMemSize` should be NULL.
-
-    Otherwise, if the per-block dynamic shared memory size varies with
-    different block sizes, the user needs to provide a unary function
-    through `blockSizeToDynamicSMemSize` that computes the dynamic shared
-    memory needed by `func` for any given block size. `dynamicSMemSize` is
-    ignored. An example signature is:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will be the current context.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel for which launch configuration is calculated
-    blockSizeToDynamicSMemSize : :py:obj:`~.CUoccupancyB2DSize`
-        A function that calculates how much per-block dynamic shared memory
-        `func` uses based on the block size
-    dynamicSMemSize : size_t
-        Dynamic shared memory usage intended, in bytes
-    blockSizeLimit : int
-        The maximum block size `func` is designed to handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    minGridSize : int
-        Returned minimum grid size needed to achieve the maximum occupancy
-    blockSize : int
-        Returned maximum block size that can achieve the maximum occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxPotentialBlockSize`
-    """
-    cdef cydriver.CUoccupancyB2DSize cyblockSizeToDynamicSMemSize
-    if blockSizeToDynamicSMemSize is None:
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>0
-    elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)):
-        pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize)
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>pblockSizeToDynamicSMemSize
-    else:
-        pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize))
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>pblockSizeToDynamicSMemSize
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int minGridSize = 0
-    cdef int blockSize = 0
-    err = cydriver.cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit)
-    return (CUresult(err), minGridSize, blockSize)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialBlockSizeWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxPotentialBlockSizeWithFlags(func, blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags):
-    """ Suggest a launch configuration with reasonable occupancy.
-
-    An extended version of :py:obj:`~.cuOccupancyMaxPotentialBlockSize`. In
-    addition to arguments passed to
-    :py:obj:`~.cuOccupancyMaxPotentialBlockSize`,
-    :py:obj:`~.cuOccupancyMaxPotentialBlockSizeWithFlags` also takes a
-    `Flags` parameter.
-
-    The `Flags` parameter controls how special cases are handled. The valid
-    flags are:
-
-    - :py:obj:`~.CU_OCCUPANCY_DEFAULT`, which maintains the default
-      behavior as :py:obj:`~.cuOccupancyMaxPotentialBlockSize`;
-
-    - :py:obj:`~.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`, which suppresses
-      the default behavior on platform where global caching affects
-      occupancy. On such platforms, the launch configurations that produces
-      maximal occupancy might not support global caching. Setting
-      :py:obj:`~.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE` guarantees that the
-      the produced launch configuration is global caching compatible at a
-      potential cost of occupancy. More information can be found about this
-      feature in the "Unified L1/Texture Cache" section of the Maxwell
-      tuning guide.
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will be the current context.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel for which launch configuration is calculated
-    blockSizeToDynamicSMemSize : :py:obj:`~.CUoccupancyB2DSize`
-        A function that calculates how much per-block dynamic shared memory
-        `func` uses based on the block size
-    dynamicSMemSize : size_t
-        Dynamic shared memory usage intended, in bytes
-    blockSizeLimit : int
-        The maximum block size `func` is designed to handle
-    flags : unsigned int
-        Options
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    minGridSize : int
-        Returned minimum grid size needed to achieve the maximum occupancy
-    blockSize : int
-        Returned maximum block size that can achieve the maximum occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxPotentialBlockSizeWithFlags`
-    """
-    cdef cydriver.CUoccupancyB2DSize cyblockSizeToDynamicSMemSize
-    if blockSizeToDynamicSMemSize is None:
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>0
-    elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)):
-        pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize)
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>pblockSizeToDynamicSMemSize
-    else:
-        pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize))
-        cyblockSizeToDynamicSMemSize = <cydriver.CUoccupancyB2DSize><void_ptr>pblockSizeToDynamicSMemSize
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int minGridSize = 0
-    cdef int blockSize = 0
-    err = cydriver.cuOccupancyMaxPotentialBlockSizeWithFlags(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags)
-    return (CUresult(err), minGridSize, blockSize)
-{{endif}}
-
-{{if 'cuOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize):
-    """ Returns dynamic shared memory available per block when launching `numBlocks` blocks on SM.
-
-    Returns in `*dynamicSmemSize` the maximum size of dynamic shared memory
-    to allow `numBlocks` blocks per SM.
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will be the current context.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel function for which occupancy is calculated
-    numBlocks : int
-        Number of blocks to fit on SM
-    blockSize : int
-        Size of the blocks
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    dynamicSmemSize : int
-        Returned maximum dynamic shared memory
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef size_t dynamicSmemSize = 0
-    err = cydriver.cuOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize)
-    return (CUresult(err), dynamicSmemSize)
-{{endif}}
-
-{{if 'cuOccupancyMaxPotentialClusterSize' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxPotentialClusterSize(func, config : Optional[CUlaunchConfig]):
-    """ Given the kernel function (`func`) and launch configuration (`config`), return the maximum cluster size in `*clusterSize`.
-
-    The cluster dimensions in `config` are ignored. If func has a required
-    cluster size set (see :py:obj:`~.cudaFuncGetAttributes` /
-    :py:obj:`~.cuFuncGetAttribute`),`*clusterSize` will reflect the
-    required cluster size.
-
-    By default this function will always return a value that's portable on
-    future hardware. A higher value may be returned if the kernel function
-    allows non-portable cluster sizes.
-
-    This function will respect the compile time launch bounds.
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will either be taken from the specified stream
-    `config->hStream` or the current context in case of NULL stream.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel function for which maximum cluster size is calculated
-    config : :py:obj:`~.CUlaunchConfig`
-        Launch configuration for the given kernel function
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    clusterSize : int
-        Returned maximum cluster size that can be launched for the given
-        kernel function and launch configuration
-
-    See Also
-    --------
-    :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cuFuncGetAttribute`
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int clusterSize = 0
-    cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._ptr if config != None else NULL
-    err = cydriver.cuOccupancyMaxPotentialClusterSize(&clusterSize, cyfunc, cyconfig_ptr)
-    return (CUresult(err), clusterSize)
-{{endif}}
-
-{{if 'cuOccupancyMaxActiveClusters' in found_functions}}
-
-@cython.embedsignature(True)
-def cuOccupancyMaxActiveClusters(func, config : Optional[CUlaunchConfig]):
-    """ Given the kernel function (`func`) and launch configuration (`config`), return the maximum number of clusters that could co-exist on the target device in `*numClusters`.
-
-    If the function has required cluster size already set (see
-    :py:obj:`~.cudaFuncGetAttributes` / :py:obj:`~.cuFuncGetAttribute`),
-    the cluster size from config must either be unspecified or match the
-    required size. Without required sizes, the cluster size must be
-    specified in config, else the function will return an error.
-
-    Note that various attributes of the kernel function may affect
-    occupancy calculation. Runtime environment may affect how the hardware
-    schedules the clusters, so the calculated occupancy is not guaranteed
-    to be achievable.
-
-    Note that the API can also be used with context-less kernel
-    :py:obj:`~.CUkernel` by querying the handle using
-    :py:obj:`~.cuLibraryGetKernel()` and then passing it to the API by
-    casting to :py:obj:`~.CUfunction`. Here, the context to use for
-    calculations will either be taken from the specified stream
-    `config->hStream` or the current context in case of NULL stream.
-
-    Parameters
-    ----------
-    func : :py:obj:`~.CUfunction`
-        Kernel function for which maximum number of clusters are calculated
-    config : :py:obj:`~.CUlaunchConfig`
-        Launch configuration for the given kernel function
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_CLUSTER_SIZE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-    numClusters : int
-        Returned maximum number of clusters that could co-exist on the
-        target device
-
-    See Also
-    --------
-    :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cuFuncGetAttribute`
-    """
-    cdef cydriver.CUfunction cyfunc
-    if func is None:
-        cyfunc = <cydriver.CUfunction><void_ptr>0
-    elif isinstance(func, (CUfunction,)):
-        pfunc = int(func)
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    else:
-        pfunc = int(CUfunction(func))
-        cyfunc = <cydriver.CUfunction><void_ptr>pfunc
-    cdef int numClusters = 0
-    cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._ptr if config != None else NULL
-    err = cydriver.cuOccupancyMaxActiveClusters(&numClusters, cyfunc, cyconfig_ptr)
-    return (CUresult(err), numClusters)
-{{endif}}
-
-{{if 'cuTexRefSetArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetArray(hTexRef, hArray, unsigned int Flags):
-    """ Binds an array as a texture reference.
-
-    [Deprecated]
-
-    Binds the CUDA array `hArray` to the texture reference `hTexRef`. Any
-    previous address or CUDA array state associated with the texture
-    reference is superseded by this function. `Flags` must be set to
-    :py:obj:`~.CU_TRSA_OVERRIDE_FORMAT`. Any CUDA array previously bound to
-    `hTexRef` is unbound.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference to bind
-    hArray : :py:obj:`~.CUarray`
-        Array to bind
-    Flags : unsigned int
-        Options (must be :py:obj:`~.CU_TRSA_OVERRIDE_FORMAT`)
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetArray(cyhTexRef, cyhArray, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetMipmappedArray(hTexRef, hMipmappedArray, unsigned int Flags):
-    """ Binds a mipmapped array to a texture reference.
-
-    [Deprecated]
-
-    Binds the CUDA mipmapped array `hMipmappedArray` to the texture
-    reference `hTexRef`. Any previous address or CUDA array state
-    associated with the texture reference is superseded by this function.
-    `Flags` must be set to :py:obj:`~.CU_TRSA_OVERRIDE_FORMAT`. Any CUDA
-    array previously bound to `hTexRef` is unbound.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference to bind
-    hMipmappedArray : :py:obj:`~.CUmipmappedArray`
-        Mipmapped array to bind
-    Flags : unsigned int
-        Options (must be :py:obj:`~.CU_TRSA_OVERRIDE_FORMAT`)
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUmipmappedArray cyhMipmappedArray
-    if hMipmappedArray is None:
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>0
-    elif isinstance(hMipmappedArray, (CUmipmappedArray,)):
-        phMipmappedArray = int(hMipmappedArray)
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    else:
-        phMipmappedArray = int(CUmipmappedArray(hMipmappedArray))
-        cyhMipmappedArray = <cydriver.CUmipmappedArray><void_ptr>phMipmappedArray
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetMipmappedArray(cyhTexRef, cyhMipmappedArray, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetAddress_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetAddress(hTexRef, dptr, size_t numbytes):
-    """ Binds an address as a texture reference.
-
-    [Deprecated]
-
-    Binds a linear address range to the texture reference `hTexRef`. Any
-    previous address or CUDA array state associated with the texture
-    reference is superseded by this function. Any memory previously bound
-    to `hTexRef` is unbound.
-
-    Since the hardware enforces an alignment requirement on texture base
-    addresses, :py:obj:`~.cuTexRefSetAddress()` passes back a byte offset
-    in `*ByteOffset` that must be applied to texture fetches in order to
-    read from the desired memory. This offset must be divided by the texel
-    size and passed to kernels that read from the texture so they can be
-    applied to the :py:obj:`~.tex1Dfetch()` function.
-
-    If the device memory pointer was returned from
-    :py:obj:`~.cuMemAlloc()`, the offset is guaranteed to be 0 and NULL may
-    be passed as the `ByteOffset` parameter.
-
-    The total number of elements (or texels) in the linear address range
-    cannot exceed
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH`. The
-    number of elements is computed as (`numbytes` / bytesPerElement), where
-    bytesPerElement is determined from the data format and number of
-    components set using :py:obj:`~.cuTexRefSetFormat()`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference to bind
-    dptr : :py:obj:`~.CUdeviceptr`
-        Device pointer to bind
-    numbytes : size_t
-        Size of memory to bind in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    ByteOffset : int
-        Returned byte offset
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef size_t ByteOffset = 0
-    err = cydriver.cuTexRefSetAddress(&ByteOffset, cyhTexRef, cydptr, numbytes)
-    return (CUresult(err), ByteOffset)
-{{endif}}
-
-{{if 'cuTexRefSetAddress2D_v3' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetAddress2D(hTexRef, desc : Optional[CUDA_ARRAY_DESCRIPTOR], dptr, size_t Pitch):
-    """ Binds an address as a 2D texture reference.
-
-    [Deprecated]
-
-    Binds a linear address range to the texture reference `hTexRef`. Any
-    previous address or CUDA array state associated with the texture
-    reference is superseded by this function. Any memory previously bound
-    to `hTexRef` is unbound.
-
-    Using a :py:obj:`~.tex2D()` function inside a kernel requires a call to
-    either :py:obj:`~.cuTexRefSetArray()` to bind the corresponding texture
-    reference to an array, or :py:obj:`~.cuTexRefSetAddress2D()` to bind
-    the texture reference to linear memory.
-
-    Function calls to :py:obj:`~.cuTexRefSetFormat()` cannot follow calls
-    to :py:obj:`~.cuTexRefSetAddress2D()` for the same texture reference.
-
-    It is required that `dptr` be aligned to the appropriate hardware-
-    specific texture alignment. You can query this value using the device
-    attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`. If an
-    unaligned `dptr` is supplied, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is
-    returned.
-
-    `Pitch` has to be aligned to the hardware-specific texture pitch
-    alignment. This value can be queried using the device attribute
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT`. If an
-    unaligned `Pitch` is supplied, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is
-    returned.
-
-    Width and Height, which are specified in elements (or texels), cannot
-    exceed :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH`
-    and :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT`
-    respectively. `Pitch`, which is specified in bytes, cannot exceed
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference to bind
-    desc : :py:obj:`~.CUDA_ARRAY_DESCRIPTOR`
-        Descriptor of CUDA array
-    dptr : :py:obj:`~.CUdeviceptr`
-        Device pointer to bind
-    Pitch : size_t
-        Line pitch in bytes
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUdeviceptr cydptr
-    if dptr is None:
-        cydptr = <cydriver.CUdeviceptr><void_ptr>0
-    elif isinstance(dptr, (CUdeviceptr,)):
-        pdptr = int(dptr)
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    else:
-        pdptr = int(CUdeviceptr(dptr))
-        cydptr = <cydriver.CUdeviceptr><void_ptr>pdptr
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cydesc_ptr = desc._ptr if desc != None else NULL
-    err = cydriver.cuTexRefSetAddress2D(cyhTexRef, cydesc_ptr, cydptr, Pitch)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetFormat' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetFormat(hTexRef, fmt not None : CUarray_format, int NumPackedComponents):
-    """ Sets the format for a texture reference.
-
-    [Deprecated]
-
-    Specifies the format of the data to be read by the texture reference
-    `hTexRef`. `fmt` and `NumPackedComponents` are exactly analogous to the
-    :py:obj:`~.Format` and :py:obj:`~.NumChannels` members of the
-    :py:obj:`~.CUDA_ARRAY_DESCRIPTOR` structure: They specify the format of
-    each component and the number of components per array element.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    fmt : :py:obj:`~.CUarray_format`
-        Format to set
-    NumPackedComponents : int
-        Number of components per array element
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`, :py:obj:`~.cudaCreateChannelDesc`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUarray_format cyfmt = fmt.value
-    err = cydriver.cuTexRefSetFormat(cyhTexRef, cyfmt, NumPackedComponents)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetAddressMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetAddressMode(hTexRef, int dim, am not None : CUaddress_mode):
-    """ Sets the addressing mode for a texture reference.
-
-    [Deprecated]
-
-    Specifies the addressing mode `am` for the given dimension `dim` of the
-    texture reference `hTexRef`. If `dim` is zero, the addressing mode is
-    applied to the first parameter of the functions used to fetch from the
-    texture; if `dim` is 1, the second, and so on.
-    :py:obj:`~.CUaddress_mode` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Note that this call has no effect if `hTexRef` is bound to linear
-    memory. Also, if the flag, :py:obj:`~.CU_TRSF_NORMALIZED_COORDINATES`,
-    is not set, the only supported address mode is
-    :py:obj:`~.CU_TR_ADDRESS_MODE_CLAMP`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    dim : int
-        Dimension
-    am : :py:obj:`~.CUaddress_mode`
-        Addressing mode to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUaddress_mode cyam = am.value
-    err = cydriver.cuTexRefSetAddressMode(cyhTexRef, dim, cyam)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetFilterMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetFilterMode(hTexRef, fm not None : CUfilter_mode):
-    """ Sets the filtering mode for a texture reference.
-
-    [Deprecated]
-
-    Specifies the filtering mode `fm` to be used when reading memory
-    through the texture reference `hTexRef`. :py:obj:`~.CUfilter_mode_enum`
-    is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Note that this call has no effect if `hTexRef` is bound to linear
-    memory.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    fm : :py:obj:`~.CUfilter_mode`
-        Filtering mode to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUfilter_mode cyfm = fm.value
-    err = cydriver.cuTexRefSetFilterMode(cyhTexRef, cyfm)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapFilterMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetMipmapFilterMode(hTexRef, fm not None : CUfilter_mode):
-    """ Sets the mipmap filtering mode for a texture reference.
-
-    [Deprecated]
-
-    Specifies the mipmap filtering mode `fm` to be used when reading memory
-    through the texture reference `hTexRef`. :py:obj:`~.CUfilter_mode_enum`
-    is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Note that this call has no effect if `hTexRef` is not bound to a
-    mipmapped array.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    fm : :py:obj:`~.CUfilter_mode`
-        Filtering mode to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUfilter_mode cyfm = fm.value
-    err = cydriver.cuTexRefSetMipmapFilterMode(cyhTexRef, cyfm)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelBias' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetMipmapLevelBias(hTexRef, float bias):
-    """ Sets the mipmap level bias for a texture reference.
-
-    [Deprecated]
-
-    Specifies the mipmap level bias `bias` to be added to the specified
-    mipmap level when reading memory through the texture reference
-    `hTexRef`.
-
-    Note that this call has no effect if `hTexRef` is not bound to a
-    mipmapped array.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    bias : float
-        Mipmap level bias
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetMipmapLevelBias(cyhTexRef, bias)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetMipmapLevelClamp' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetMipmapLevelClamp(hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp):
-    """ Sets the mipmap min/max mipmap level clamps for a texture reference.
-
-    [Deprecated]
-
-    Specifies the min/max mipmap level clamps, `minMipmapLevelClamp` and
-    `maxMipmapLevelClamp` respectively, to be used when reading memory
-    through the texture reference `hTexRef`.
-
-    Note that this call has no effect if `hTexRef` is not bound to a
-    mipmapped array.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    minMipmapLevelClamp : float
-        Mipmap min level clamp
-    maxMipmapLevelClamp : float
-        Mipmap max level clamp
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetMipmapLevelClamp(cyhTexRef, minMipmapLevelClamp, maxMipmapLevelClamp)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetMaxAnisotropy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetMaxAnisotropy(hTexRef, unsigned int maxAniso):
-    """ Sets the maximum anisotropy for a texture reference.
-
-    [Deprecated]
-
-    Specifies the maximum anisotropy `maxAniso` to be used when reading
-    memory through the texture reference `hTexRef`.
-
-    Note that this call has no effect if `hTexRef` is bound to linear
-    memory.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    maxAniso : unsigned int
-        Maximum anisotropy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetMaxAnisotropy(cyhTexRef, maxAniso)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetBorderColor' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetBorderColor(hTexRef, float pBorderColor):
-    """ Sets the border color for a texture reference.
-
-    [Deprecated]
-
-    Specifies the value of the RGBA color via the `pBorderColor` to the
-    texture reference `hTexRef`. The color value supports only float type
-    and holds color components in the following sequence: pBorderColor[0]
-    holds 'R' component pBorderColor[1] holds 'G' component pBorderColor[2]
-    holds 'B' component pBorderColor[3] holds 'A' component
-
-    Note that the color values can be set only when the Address mode is set
-    to CU_TR_ADDRESS_MODE_BORDER using :py:obj:`~.cuTexRefSetAddressMode`.
-    Applications using integer border color values have to
-    "reinterpret_cast" their values to float.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    pBorderColor : float
-        RGBA color
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetBorderColor`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetBorderColor(cyhTexRef, &pBorderColor)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefSetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefSetFlags(hTexRef, unsigned int Flags):
-    """ Sets the flags for a texture reference.
-
-    [Deprecated]
-
-    Specifies optional flags via `Flags` to specify the behavior of data
-    returned through the texture reference `hTexRef`. The valid flags are:
-
-    - :py:obj:`~.CU_TRSF_READ_AS_INTEGER`, which suppresses the default
-      behavior of having the texture promote integer data to floating point
-      data in the range [0, 1]. Note that texture with 32-bit integer
-      format would not be promoted, regardless of whether or not this flag
-      is specified;
-
-    - :py:obj:`~.CU_TRSF_NORMALIZED_COORDINATES`, which suppresses the
-      default behavior of having the texture coordinates range from [0,
-      Dim) where Dim is the width or height of the CUDA array. Instead, the
-      texture coordinates [0, 1.0) reference the entire breadth of the
-      array dimension;
-
-    - :py:obj:`~.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION`, which disables
-      any trilinear filtering optimizations. Trilinear optimizations
-      improve texture filtering performance by allowing bilinear filtering
-      on textures in scenarios where it can closely approximate the
-      expected results.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    Flags : unsigned int
-        Optional flags to set
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefSetFlags(cyhTexRef, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexRefGetAddress_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetAddress(hTexRef):
-    """ Gets the address associated with a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pdptr` the base address bound to the texture reference
-    `hTexRef`, or returns :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if the
-    texture reference is not bound to any device memory range.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pdptr : :py:obj:`~.CUdeviceptr`
-        Returned device address
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef CUdeviceptr pdptr = CUdeviceptr()
-    err = cydriver.cuTexRefGetAddress(<cydriver.CUdeviceptr*>pdptr._ptr, cyhTexRef)
-    return (CUresult(err), pdptr)
-{{endif}}
-
-{{if 'cuTexRefGetArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetArray(hTexRef):
-    """ Gets the array bound to a texture reference.
-
-    [Deprecated]
-
-    Returns in `*phArray` the CUDA array bound to the texture reference
-    `hTexRef`, or returns :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if the
-    texture reference is not bound to any CUDA array.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phArray : :py:obj:`~.CUarray`
-        Returned array
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef CUarray phArray = CUarray()
-    err = cydriver.cuTexRefGetArray(<cydriver.CUarray*>phArray._ptr, cyhTexRef)
-    return (CUresult(err), phArray)
-{{endif}}
-
-{{if 'cuTexRefGetMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetMipmappedArray(hTexRef):
-    """ Gets the mipmapped array bound to a texture reference.
-
-    [Deprecated]
-
-    Returns in `*phMipmappedArray` the CUDA mipmapped array bound to the
-    texture reference `hTexRef`, or returns
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if the texture reference is not
-    bound to any CUDA mipmapped array.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phMipmappedArray : :py:obj:`~.CUmipmappedArray`
-        Returned mipmapped array
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef CUmipmappedArray phMipmappedArray = CUmipmappedArray()
-    err = cydriver.cuTexRefGetMipmappedArray(<cydriver.CUmipmappedArray*>phMipmappedArray._ptr, cyhTexRef)
-    return (CUresult(err), phMipmappedArray)
-{{endif}}
-
-{{if 'cuTexRefGetAddressMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetAddressMode(hTexRef, int dim):
-    """ Gets the addressing mode used by a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pam` the addressing mode corresponding to the dimension
-    `dim` of the texture reference `hTexRef`. Currently, the only valid
-    value for `dim` are 0 and 1.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-    dim : int
-        Dimension
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pam : :py:obj:`~.CUaddress_mode`
-        Returned addressing mode
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUaddress_mode pam
-    err = cydriver.cuTexRefGetAddressMode(&pam, cyhTexRef, dim)
-    return (CUresult(err), CUaddress_mode(pam))
-{{endif}}
-
-{{if 'cuTexRefGetFilterMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetFilterMode(hTexRef):
-    """ Gets the filter-mode used by a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pfm` the filtering mode of the texture reference
-    `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pfm : :py:obj:`~.CUfilter_mode`
-        Returned filtering mode
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUfilter_mode pfm
-    err = cydriver.cuTexRefGetFilterMode(&pfm, cyhTexRef)
-    return (CUresult(err), CUfilter_mode(pfm))
-{{endif}}
-
-{{if 'cuTexRefGetFormat' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetFormat(hTexRef):
-    """ Gets the format used by a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pFormat` and `*pNumChannels` the format and number of
-    components of the CUDA array bound to the texture reference `hTexRef`.
-    If `pFormat` or `pNumChannels` is NULL, it will be ignored.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pFormat : :py:obj:`~.CUarray_format`
-        Returned format
-    pNumChannels : int
-        Returned number of components
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUarray_format pFormat
-    cdef int pNumChannels = 0
-    err = cydriver.cuTexRefGetFormat(&pFormat, &pNumChannels, cyhTexRef)
-    return (CUresult(err), CUarray_format(pFormat), pNumChannels)
-{{endif}}
-
-{{if 'cuTexRefGetMipmapFilterMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetMipmapFilterMode(hTexRef):
-    """ Gets the mipmap filtering mode for a texture reference.
-
-    [Deprecated]
-
-    Returns the mipmap filtering mode in `pfm` that's used when reading
-    memory through the texture reference `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pfm : :py:obj:`~.CUfilter_mode`
-        Returned mipmap filtering mode
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef cydriver.CUfilter_mode pfm
-    err = cydriver.cuTexRefGetMipmapFilterMode(&pfm, cyhTexRef)
-    return (CUresult(err), CUfilter_mode(pfm))
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelBias' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetMipmapLevelBias(hTexRef):
-    """ Gets the mipmap level bias for a texture reference.
-
-    [Deprecated]
-
-    Returns the mipmap level bias in `pBias` that's added to the specified
-    mipmap level when reading memory through the texture reference
-    `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pbias : float
-        Returned mipmap level bias
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef float pbias = 0
-    err = cydriver.cuTexRefGetMipmapLevelBias(&pbias, cyhTexRef)
-    return (CUresult(err), pbias)
-{{endif}}
-
-{{if 'cuTexRefGetMipmapLevelClamp' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetMipmapLevelClamp(hTexRef):
-    """ Gets the min/max mipmap level clamps for a texture reference.
-
-    [Deprecated]
-
-    Returns the min/max mipmap level clamps in `pminMipmapLevelClamp` and
-    `pmaxMipmapLevelClamp` that's used when reading memory through the
-    texture reference `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pminMipmapLevelClamp : float
-        Returned mipmap min level clamp
-    pmaxMipmapLevelClamp : float
-        Returned mipmap max level clamp
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef float pminMipmapLevelClamp = 0
-    cdef float pmaxMipmapLevelClamp = 0
-    err = cydriver.cuTexRefGetMipmapLevelClamp(&pminMipmapLevelClamp, &pmaxMipmapLevelClamp, cyhTexRef)
-    return (CUresult(err), pminMipmapLevelClamp, pmaxMipmapLevelClamp)
-{{endif}}
-
-{{if 'cuTexRefGetMaxAnisotropy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetMaxAnisotropy(hTexRef):
-    """ Gets the maximum anisotropy for a texture reference.
-
-    [Deprecated]
-
-    Returns the maximum anisotropy in `pmaxAniso` that's used when reading
-    memory through the texture reference `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pmaxAniso : int
-        Returned maximum anisotropy
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef int pmaxAniso = 0
-    err = cydriver.cuTexRefGetMaxAnisotropy(&pmaxAniso, cyhTexRef)
-    return (CUresult(err), pmaxAniso)
-{{endif}}
-
-{{if 'cuTexRefGetBorderColor' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetBorderColor(hTexRef):
-    """ Gets the border color used by a texture reference.
-
-    [Deprecated]
-
-    Returns in `pBorderColor`, values of the RGBA color used by the texture
-    reference `hTexRef`. The color value is of type float and holds color
-    components in the following sequence: pBorderColor[0] holds 'R'
-    component pBorderColor[1] holds 'G' component pBorderColor[2] holds 'B'
-    component pBorderColor[3] holds 'A' component
-
-    Parameters
-    ----------
-    pBorderColor : :py:obj:`~.CUtexref`
-        Returned Type and Value of RGBA color
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    hTexRef : float
-        Texture reference
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetBorderColor`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef float pBorderColor = 0
-    err = cydriver.cuTexRefGetBorderColor(&pBorderColor, cyhTexRef)
-    return (CUresult(err), pBorderColor)
-{{endif}}
-
-{{if 'cuTexRefGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefGetFlags(hTexRef):
-    """ Gets the flags used by a texture reference.
-
-    [Deprecated]
-
-    Returns in `*pFlags` the flags of the texture reference `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pFlags : unsigned int
-        Returned flags
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFormat`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    cdef unsigned int pFlags = 0
-    err = cydriver.cuTexRefGetFlags(&pFlags, cyhTexRef)
-    return (CUresult(err), pFlags)
-{{endif}}
-
-{{if 'cuTexRefCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefCreate():
-    """ Creates a texture reference.
-
-    [Deprecated]
-
-    Creates a texture reference and returns its handle in `*pTexRef`. Once
-    created, the application must call :py:obj:`~.cuTexRefSetArray()` or
-    :py:obj:`~.cuTexRefSetAddress()` to associate the reference with
-    allocated memory. Other texture reference functions are used to specify
-    the format and interpretation (addressing, filtering, etc.) to be used
-    when the memory is read through this texture reference.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pTexRef : :py:obj:`~.CUtexref`
-        Returned texture reference
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefDestroy`
-    """
-    cdef CUtexref pTexRef = CUtexref()
-    err = cydriver.cuTexRefCreate(<cydriver.CUtexref*>pTexRef._ptr)
-    return (CUresult(err), pTexRef)
-{{endif}}
-
-{{if 'cuTexRefDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexRefDestroy(hTexRef):
-    """ Destroys a texture reference.
-
-    [Deprecated]
-
-    Destroys the texture reference specified by `hTexRef`.
-
-    Parameters
-    ----------
-    hTexRef : :py:obj:`~.CUtexref`
-        Texture reference to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexRefCreate`
-    """
-    cdef cydriver.CUtexref cyhTexRef
-    if hTexRef is None:
-        cyhTexRef = <cydriver.CUtexref><void_ptr>0
-    elif isinstance(hTexRef, (CUtexref,)):
-        phTexRef = int(hTexRef)
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    else:
-        phTexRef = int(CUtexref(hTexRef))
-        cyhTexRef = <cydriver.CUtexref><void_ptr>phTexRef
-    err = cydriver.cuTexRefDestroy(cyhTexRef)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuSurfRefSetArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSurfRefSetArray(hSurfRef, hArray, unsigned int Flags):
-    """ Sets the CUDA array for a surface reference.
-
-    [Deprecated]
-
-    Sets the CUDA array `hArray` to be read and written by the surface
-    reference `hSurfRef`. Any previous CUDA array state associated with the
-    surface reference is superseded by this function. `Flags` must be set
-    to 0. The :py:obj:`~.CUDA_ARRAY3D_SURFACE_LDST` flag must have been set
-    for the CUDA array. Any CUDA array previously bound to `hSurfRef` is
-    unbound.
-
-    Parameters
-    ----------
-    hSurfRef : :py:obj:`~.CUsurfref`
-        Surface reference handle
-    hArray : :py:obj:`~.CUarray`
-        CUDA array handle
-    Flags : unsigned int
-        set to 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuSurfRefGetArray`
-    """
-    cdef cydriver.CUarray cyhArray
-    if hArray is None:
-        cyhArray = <cydriver.CUarray><void_ptr>0
-    elif isinstance(hArray, (CUarray,)):
-        phArray = int(hArray)
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    else:
-        phArray = int(CUarray(hArray))
-        cyhArray = <cydriver.CUarray><void_ptr>phArray
-    cdef cydriver.CUsurfref cyhSurfRef
-    if hSurfRef is None:
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>0
-    elif isinstance(hSurfRef, (CUsurfref,)):
-        phSurfRef = int(hSurfRef)
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>phSurfRef
-    else:
-        phSurfRef = int(CUsurfref(hSurfRef))
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>phSurfRef
-    err = cydriver.cuSurfRefSetArray(cyhSurfRef, cyhArray, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuSurfRefGetArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSurfRefGetArray(hSurfRef):
-    """ Passes back the CUDA array bound to a surface reference.
-
-    [Deprecated]
-
-    Returns in `*phArray` the CUDA array bound to the surface reference
-    `hSurfRef`, or returns :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if the
-    surface reference is not bound to any CUDA array.
-
-    Parameters
-    ----------
-    hSurfRef : :py:obj:`~.CUsurfref`
-        Surface reference handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    phArray : :py:obj:`~.CUarray`
-        Surface reference handle
-
-    See Also
-    --------
-    :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuSurfRefSetArray`
-    """
-    cdef cydriver.CUsurfref cyhSurfRef
-    if hSurfRef is None:
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>0
-    elif isinstance(hSurfRef, (CUsurfref,)):
-        phSurfRef = int(hSurfRef)
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>phSurfRef
-    else:
-        phSurfRef = int(CUsurfref(hSurfRef))
-        cyhSurfRef = <cydriver.CUsurfref><void_ptr>phSurfRef
-    cdef CUarray phArray = CUarray()
-    err = cydriver.cuSurfRefGetArray(<cydriver.CUarray*>phArray._ptr, cyhSurfRef)
-    return (CUresult(err), phArray)
-{{endif}}
-
-{{if 'cuTexObjectCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC], pTexDesc : Optional[CUDA_TEXTURE_DESC], pResViewDesc : Optional[CUDA_RESOURCE_VIEW_DESC]):
-    """ Creates a texture object.
-
-    Creates a texture object and returns it in `pTexObject`. `pResDesc`
-    describes the data to texture from. `pTexDesc` describes how the data
-    should be sampled. `pResViewDesc` is an optional argument that
-    specifies an alternate format for the data described by `pResDesc`, and
-    also describes the subresource region to restrict access to when
-    texturing. `pResViewDesc` can only be specified if the type of resource
-    is a CUDA array or a CUDA mipmapped array not in a block compressed
-    format.
-
-    Texture objects are only supported on devices of compute capability 3.0
-    or higher. Additionally, a texture object is an opaque value, and, as
-    such, should only be accessed through CUDA API calls.
-
-    The :py:obj:`~.CUDA_RESOURCE_DESC` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.CUDA_RESOURCE_DESC.resType` specifies the type of resource
-      to texture from. CUresourceType is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.CUDA_RESOURCE_DESC.resType` is set to
-    :py:obj:`~.CU_RESOURCE_TYPE_ARRAY`,
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::array::hArray must be set to a
-    valid CUDA array handle.
-
-    If :py:obj:`~.CUDA_RESOURCE_DESC.resType` is set to
-    :py:obj:`~.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY`,
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::mipmap::hMipmappedArray must be
-    set to a valid CUDA mipmapped array handle.
-
-    If :py:obj:`~.CUDA_RESOURCE_DESC.resType` is set to
-    :py:obj:`~.CU_RESOURCE_TYPE_LINEAR`,
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::linear::devPtr must be set to a
-    valid device pointer, that is aligned to
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`.
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::linear::format and
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::linear::numChannels describe the
-    format of each component and the number of components per array
-    element. :py:obj:`~.CUDA_RESOURCE_DESC`::res::linear::sizeInBytes
-    specifies the size of the array in bytes. The total number of elements
-    in the linear address range cannot exceed
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH`. The
-    number of elements is computed as (sizeInBytes / (sizeof(format) *
-    numChannels)).
-
-    If :py:obj:`~.CUDA_RESOURCE_DESC.resType` is set to
-    :py:obj:`~.CU_RESOURCE_TYPE_PITCH2D`,
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::devPtr must be set to a
-    valid device pointer, that is aligned to
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`.
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::format and
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::numChannels describe the
-    format of each component and the number of components per array
-    element. :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::width and
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::height specify the width
-    and height of the array in elements, and cannot exceed
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH` and
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT`
-    respectively.
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::pitch2D::pitchInBytes specifies
-    the pitch between two rows in bytes and has to be aligned to
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT`. Pitch cannot
-    exceed :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH`.
-
-    - :py:obj:`~.flags` must be set to zero.
-
-    The :py:obj:`~.CUDA_TEXTURE_DESC` struct is defined as
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.addressMode` specifies the addressing
-      mode for each dimension of the texture data.
-      :py:obj:`~.CUaddress_mode` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - This is ignored if :py:obj:`~.CUDA_RESOURCE_DESC.resType` is
-      :py:obj:`~.CU_RESOURCE_TYPE_LINEAR`. Also, if the flag,
-      :py:obj:`~.CU_TRSF_NORMALIZED_COORDINATES` is not set, the only
-      supported address mode is :py:obj:`~.CU_TR_ADDRESS_MODE_CLAMP`.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.filterMode` specifies the filtering mode
-      to be used when fetching from the texture. CUfilter_mode is defined
-      as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - This is ignored if :py:obj:`~.CUDA_RESOURCE_DESC.resType` is
-      :py:obj:`~.CU_RESOURCE_TYPE_LINEAR`.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.flags` can be any combination of the
-      following:
-
-      - :py:obj:`~.CU_TRSF_READ_AS_INTEGER`, which suppresses the default
-        behavior of having the texture promote integer data to floating
-        point data in the range [0, 1]. Note that texture with 32-bit
-        integer format would not be promoted, regardless of whether or not
-        this flag is specified.
-
-      - :py:obj:`~.CU_TRSF_NORMALIZED_COORDINATES`, which suppresses the
-        default behavior of having the texture coordinates range from [0,
-        Dim) where Dim is the width or height of the CUDA array. Instead,
-        the texture coordinates [0, 1.0) reference the entire breadth of
-        the array dimension; Note that for CUDA mipmapped arrays, this flag
-        has to be set.
-
-      - :py:obj:`~.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION`, which disables
-        any trilinear filtering optimizations. Trilinear optimizations
-        improve texture filtering performance by allowing bilinear
-        filtering on textures in scenarios where it can closely approximate
-        the expected results.
-
-      - :py:obj:`~.CU_TRSF_SEAMLESS_CUBEMAP`, which enables seamless cube
-        map filtering. This flag can only be specified if the underlying
-        resource is a CUDA array or a CUDA mipmapped array that was created
-        with the flag :py:obj:`~.CUDA_ARRAY3D_CUBEMAP`. When seamless cube
-        map filtering is enabled, texture address modes specified by
-        :py:obj:`~.CUDA_TEXTURE_DESC.addressMode` are ignored. Instead, if
-        the :py:obj:`~.CUDA_TEXTURE_DESC.filterMode` is set to
-        :py:obj:`~.CU_TR_FILTER_MODE_POINT` the address mode
-        :py:obj:`~.CU_TR_ADDRESS_MODE_CLAMP` will be applied for all
-        dimensions. If the :py:obj:`~.CUDA_TEXTURE_DESC.filterMode` is set
-        to :py:obj:`~.CU_TR_FILTER_MODE_LINEAR` seamless cube map filtering
-        will be performed when sampling along the cube face borders.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.maxAnisotropy` specifies the maximum
-      anisotropy ratio to be used when doing anisotropic filtering. This
-      value will be clamped to the range [1,16].
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.mipmapFilterMode` specifies the filter
-      mode when the calculated mipmap level lies between two defined mipmap
-      levels.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.mipmapLevelBias` specifies the offset to
-      be applied to the calculated mipmap level.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.minMipmapLevelClamp` specifies the lower
-      end of the mipmap level range to clamp access to.
-
-    - :py:obj:`~.CUDA_TEXTURE_DESC.maxMipmapLevelClamp` specifies the upper
-      end of the mipmap level range to clamp access to.
-
-    The :py:obj:`~.CUDA_RESOURCE_VIEW_DESC` struct is defined as
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.format` specifies how the data
-      contained in the CUDA array or CUDA mipmapped array should be
-      interpreted. Note that this can incur a change in size of the texture
-      data. If the resource view format is a block compressed format, then
-      the underlying CUDA array or CUDA mipmapped array has to have a base
-      of format :py:obj:`~.CU_AD_FORMAT_UNSIGNED_INT32`. with 2 or 4
-      channels, depending on the block compressed format. For ex., BC1 and
-      BC4 require the underlying CUDA array to have a format of
-      :py:obj:`~.CU_AD_FORMAT_UNSIGNED_INT32` with 2 channels. The other BC
-      formats require the underlying resource to have the same base format
-      but with 4 channels.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.width` specifies the new width of
-      the texture data. If the resource view format is a block compressed
-      format, this value has to be 4 times the original width of the
-      resource. For non block compressed formats, this value has to be
-      equal to that of the original resource.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.height` specifies the new height
-      of the texture data. If the resource view format is a block
-      compressed format, this value has to be 4 times the original height
-      of the resource. For non block compressed formats, this value has to
-      be equal to that of the original resource.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.depth` specifies the new depth of
-      the texture data. This value has to be equal to that of the original
-      resource.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.firstMipmapLevel` specifies the
-      most detailed mipmap level. This will be the new mipmap level zero.
-      For non-mipmapped resources, this value has to be
-      zero.:py:obj:`~.CUDA_TEXTURE_DESC.minMipmapLevelClamp` and
-      :py:obj:`~.CUDA_TEXTURE_DESC.maxMipmapLevelClamp` will be relative to
-      this value. For ex., if the firstMipmapLevel is set to 2, and a
-      minMipmapLevelClamp of 1.2 is specified, then the actual minimum
-      mipmap level clamp will be 3.2.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.lastMipmapLevel` specifies the
-      least detailed mipmap level. For non-mipmapped resources, this value
-      has to be zero.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.firstLayer` specifies the first
-      layer index for layered textures. This will be the new layer zero.
-      For non-layered resources, this value has to be zero.
-
-    - :py:obj:`~.CUDA_RESOURCE_VIEW_DESC.lastLayer` specifies the last
-      layer index for layered textures. For non-layered resources, this
-      value has to be zero.
-
-    Parameters
-    ----------
-    pResDesc : :py:obj:`~.CUDA_RESOURCE_DESC`
-        Resource descriptor
-    pTexDesc : :py:obj:`~.CUDA_TEXTURE_DESC`
-        Texture descriptor
-    pResViewDesc : :py:obj:`~.CUDA_RESOURCE_VIEW_DESC`
-        Resource view descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pTexObject : :py:obj:`~.CUtexObject`
-        Texture object to create
-
-    See Also
-    --------
-    :py:obj:`~.cuTexObjectDestroy`, :py:obj:`~.cudaCreateTextureObject`
-    """
-    cdef CUtexObject pTexObject = CUtexObject()
-    cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._ptr if pResDesc != None else NULL
-    cdef cydriver.CUDA_TEXTURE_DESC* cypTexDesc_ptr = pTexDesc._ptr if pTexDesc != None else NULL
-    cdef cydriver.CUDA_RESOURCE_VIEW_DESC* cypResViewDesc_ptr = pResViewDesc._ptr if pResViewDesc != None else NULL
-    err = cydriver.cuTexObjectCreate(<cydriver.CUtexObject*>pTexObject._ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr)
-    return (CUresult(err), pTexObject)
-{{endif}}
-
-{{if 'cuTexObjectDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexObjectDestroy(texObject):
-    """ Destroys a texture object.
-
-    Destroys the texture object specified by `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.CUtexObject`
-        Texture object to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaDestroyTextureObject`
-    """
-    cdef cydriver.CUtexObject cytexObject
-    if texObject is None:
-        cytexObject = <cydriver.CUtexObject><void_ptr>0
-    elif isinstance(texObject, (CUtexObject,)):
-        ptexObject = int(texObject)
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    else:
-        ptexObject = int(CUtexObject(texObject))
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    err = cydriver.cuTexObjectDestroy(cytexObject)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuTexObjectGetResourceDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexObjectGetResourceDesc(texObject):
-    """ Returns a texture object's resource descriptor.
-
-    Returns the resource descriptor for the texture object specified by
-    `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.CUtexObject`
-        Texture object
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pResDesc : :py:obj:`~.CUDA_RESOURCE_DESC`
-        Resource descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectResourceDesc`,
-    """
-    cdef cydriver.CUtexObject cytexObject
-    if texObject is None:
-        cytexObject = <cydriver.CUtexObject><void_ptr>0
-    elif isinstance(texObject, (CUtexObject,)):
-        ptexObject = int(texObject)
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    else:
-        ptexObject = int(CUtexObject(texObject))
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC()
-    err = cydriver.cuTexObjectGetResourceDesc(<cydriver.CUDA_RESOURCE_DESC*>pResDesc._ptr, cytexObject)
-    return (CUresult(err), pResDesc)
-{{endif}}
-
-{{if 'cuTexObjectGetTextureDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexObjectGetTextureDesc(texObject):
-    """ Returns a texture object's texture descriptor.
-
-    Returns the texture descriptor for the texture object specified by
-    `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.CUtexObject`
-        Texture object
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pTexDesc : :py:obj:`~.CUDA_TEXTURE_DESC`
-        Texture descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectTextureDesc`
-    """
-    cdef cydriver.CUtexObject cytexObject
-    if texObject is None:
-        cytexObject = <cydriver.CUtexObject><void_ptr>0
-    elif isinstance(texObject, (CUtexObject,)):
-        ptexObject = int(texObject)
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    else:
-        ptexObject = int(CUtexObject(texObject))
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    cdef CUDA_TEXTURE_DESC pTexDesc = CUDA_TEXTURE_DESC()
-    err = cydriver.cuTexObjectGetTextureDesc(<cydriver.CUDA_TEXTURE_DESC*>pTexDesc._ptr, cytexObject)
-    return (CUresult(err), pTexDesc)
-{{endif}}
-
-{{if 'cuTexObjectGetResourceViewDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTexObjectGetResourceViewDesc(texObject):
-    """ Returns a texture object's resource view descriptor.
-
-    Returns the resource view descriptor for the texture object specified
-    by `texObject`. If no resource view was set for `texObject`, the
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.CUtexObject`
-        Texture object
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pResViewDesc : :py:obj:`~.CUDA_RESOURCE_VIEW_DESC`
-        Resource view descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectResourceViewDesc`
-    """
-    cdef cydriver.CUtexObject cytexObject
-    if texObject is None:
-        cytexObject = <cydriver.CUtexObject><void_ptr>0
-    elif isinstance(texObject, (CUtexObject,)):
-        ptexObject = int(texObject)
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    else:
-        ptexObject = int(CUtexObject(texObject))
-        cytexObject = <cydriver.CUtexObject><void_ptr>ptexObject
-    cdef CUDA_RESOURCE_VIEW_DESC pResViewDesc = CUDA_RESOURCE_VIEW_DESC()
-    err = cydriver.cuTexObjectGetResourceViewDesc(<cydriver.CUDA_RESOURCE_VIEW_DESC*>pResViewDesc._ptr, cytexObject)
-    return (CUresult(err), pResViewDesc)
-{{endif}}
-
-{{if 'cuSurfObjectCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSurfObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC]):
-    """ Creates a surface object.
-
-    Creates a surface object and returns it in `pSurfObject`. `pResDesc`
-    describes the data to perform surface load/stores on.
-    :py:obj:`~.CUDA_RESOURCE_DESC.resType` must be
-    :py:obj:`~.CU_RESOURCE_TYPE_ARRAY` and
-    :py:obj:`~.CUDA_RESOURCE_DESC`::res::array::hArray must be set to a
-    valid CUDA array handle. :py:obj:`~.CUDA_RESOURCE_DESC.flags` must be
-    set to zero.
-
-    Surface objects are only supported on devices of compute capability 3.0
-    or higher. Additionally, a surface object is an opaque value, and, as
-    such, should only be accessed through CUDA API calls.
-
-    Parameters
-    ----------
-    pResDesc : :py:obj:`~.CUDA_RESOURCE_DESC`
-        Resource descriptor
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pSurfObject : :py:obj:`~.CUsurfObject`
-        Surface object to create
-
-    See Also
-    --------
-    :py:obj:`~.cuSurfObjectDestroy`, :py:obj:`~.cudaCreateSurfaceObject`
-    """
-    cdef CUsurfObject pSurfObject = CUsurfObject()
-    cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._ptr if pResDesc != None else NULL
-    err = cydriver.cuSurfObjectCreate(<cydriver.CUsurfObject*>pSurfObject._ptr, cypResDesc_ptr)
-    return (CUresult(err), pSurfObject)
-{{endif}}
-
-{{if 'cuSurfObjectDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSurfObjectDestroy(surfObject):
-    """ Destroys a surface object.
-
-    Destroys the surface object specified by `surfObject`.
-
-    Parameters
-    ----------
-    surfObject : :py:obj:`~.CUsurfObject`
-        Surface object to destroy
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuSurfObjectCreate`, :py:obj:`~.cudaDestroySurfaceObject`
-    """
-    cdef cydriver.CUsurfObject cysurfObject
-    if surfObject is None:
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>0
-    elif isinstance(surfObject, (CUsurfObject,)):
-        psurfObject = int(surfObject)
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>psurfObject
-    else:
-        psurfObject = int(CUsurfObject(surfObject))
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>psurfObject
-    err = cydriver.cuSurfObjectDestroy(cysurfObject)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuSurfObjectGetResourceDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuSurfObjectGetResourceDesc(surfObject):
-    """ Returns a surface object's resource descriptor.
-
-    Returns the resource descriptor for the surface object specified by
-    `surfObject`.
-
-    Parameters
-    ----------
-    surfObject : :py:obj:`~.CUsurfObject`
-        Surface object
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pResDesc : :py:obj:`~.CUDA_RESOURCE_DESC`
-        Resource descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuSurfObjectCreate`, :py:obj:`~.cudaGetSurfaceObjectResourceDesc`
-    """
-    cdef cydriver.CUsurfObject cysurfObject
-    if surfObject is None:
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>0
-    elif isinstance(surfObject, (CUsurfObject,)):
-        psurfObject = int(surfObject)
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>psurfObject
-    else:
-        psurfObject = int(CUsurfObject(surfObject))
-        cysurfObject = <cydriver.CUsurfObject><void_ptr>psurfObject
-    cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC()
-    err = cydriver.cuSurfObjectGetResourceDesc(<cydriver.CUDA_RESOURCE_DESC*>pResDesc._ptr, cysurfObject)
-    return (CUresult(err), pResDesc)
-{{endif}}
-
-{{if 'cuTensorMapEncodeTiled' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTensorMapEncodeTiled(tensorDataType not None : CUtensorMapDataType, tensorRank, globalAddress, globalDim : Optional[Tuple[cuuint64_t] | List[cuuint64_t]], globalStrides : Optional[Tuple[cuuint64_t] | List[cuuint64_t]], boxDim : Optional[Tuple[cuuint32_t] | List[cuuint32_t]], elementStrides : Optional[Tuple[cuuint32_t] | List[cuuint32_t]], interleave not None : CUtensorMapInterleave, swizzle not None : CUtensorMapSwizzle, l2Promotion not None : CUtensorMapL2promotion, oobFill not None : CUtensorMapFloatOOBfill):
-    """ Create a tensor map descriptor object representing tiled memory region.
-
-    Creates a descriptor for Tensor Memory Access (TMA) object specified by
-    the parameters describing a tiled region and returns it in `tensorMap`.
-
-    Tensor map objects are only supported on devices of compute capability
-    9.0 or higher. Additionally, a tensor map object is an opaque value,
-    and, as such, should only be accessed through CUDA API calls.
-
-    The parameters passed are bound to the following requirements:
-
-    - `tensorMap` address must be aligned to 64 bytes.
-
-    - `tensorDataType` has to be an enum from
-      :py:obj:`~.CUtensorMapDataType` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `tensorRank` must be non-zero and less than or equal to the maximum
-      supported dimensionality of 5. If `interleave` is not
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE`, then `tensorRank` must
-      additionally be greater than or equal to 3.
-
-    - `globalAddress`, which specifies the starting address of the memory
-      region described, must be 32 byte aligned when `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B` and 16 byte aligned
-      otherwise.
-
-    - `globalDim` array, which specifies tensor size of each of the
-      `tensorRank` dimensions, must be non-zero and less than or equal to
-      2^32.
-
-    - `globalStrides` array, which specifies tensor stride of each of the
-      lower `tensorRank` - 1 dimensions in bytes, must be a multiple of 16
-      and less than 2^40. Additionally, the stride must be a multiple of 32
-      when `interleave` is :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B`. Each
-      following dimension specified includes previous dimension stride:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `boxDim` array, which specifies number of elements to be traversed
-      along each of the `tensorRank` dimensions, must be non-zero and less
-      than or equal to 256. When `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE`, { `boxDim`[0] *
-      elementSizeInBytes( `tensorDataType` ) } must be a multiple of 16
-      bytes.
-
-    - `elementStrides` array, which specifies the iteration step along each
-      of the `tensorRank` dimensions, must be non-zero and less than or
-      equal to 8. Note that when `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE`, the first element of this
-      array is ignored since TMA doesn’t support the stride for dimension
-      zero. When all elements of `elementStrides` array is one, `boxDim`
-      specifies the number of elements to load. However, if the
-      `elementStrides`[i] is not equal to one, then TMA loads ceil(
-      `boxDim`[i] / `elementStrides`[i]) number of elements along i-th
-      dimension. To load N elements along i-th dimension, `boxDim`[i] must
-      be set to N * `elementStrides`[i].
-
-    - `interleave` specifies the interleaved layout of type
-      :py:obj:`~.CUtensorMapInterleave`, which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16
-      bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16
-      uses 32 bytes. When `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE` and `swizzle` is not
-      :py:obj:`~.CU_TENSOR_MAP_SWIZZLE_NONE`, the bounding box inner
-      dimension (computed as `boxDim`[0] multiplied by element size derived
-      from `tensorDataType`) must be less than or equal to the swizzle
-      size.
-
-      - CU_TENSOR_MAP_SWIZZLE_32B implies the bounding box inner dimension
-        will be <= 32.
-
-      - CU_TENSOR_MAP_SWIZZLE_64B implies the bounding box inner dimension
-        will be <= 64.
-
-      - CU_TENSOR_MAP_SWIZZLE_128B implies the bounding box inner dimension
-        will be <= 128.
-
-    - `swizzle`, which specifies the shared memory bank swizzling pattern,
-      has to be of type :py:obj:`~.CUtensorMapSwizzle` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - Data are organized in a specific order in global memory; however,
-      this may not match the order in which the application accesses data
-      in shared memory. This difference in data organization may cause bank
-      conflicts when shared memory is accessed. In order to avoid this
-      problem, data can be loaded to shared memory with shuffling across
-      shared memory banks. When `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B`, `swizzle` must be
-      :py:obj:`~.CU_TENSOR_MAP_SWIZZLE_32B`. Other interleave modes can
-      have any swizzling pattern.
-
-    - `l2Promotion` specifies L2 fetch size which indicates the byte
-      granurality at which L2 requests is filled from DRAM. It must be of
-      type :py:obj:`~.CUtensorMapL2promotion`, which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `oobFill`, which indicates whether zero or a special NaN constant
-      should be used to fill out-of-bound elements, must be of type
-      :py:obj:`~.CUtensorMapFloatOOBfill` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - Note that
-      :py:obj:`~.CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA` can
-      only be used when `tensorDataType` represents a floating-point data
-      type.
-
-    Parameters
-    ----------
-    tensorDataType : :py:obj:`~.CUtensorMapDataType`
-        Tensor data type
-    tensorRank : Any
-        Dimensionality of tensor
-    globalAddress : Any
-        Starting address of memory region described by tensor
-    globalDim : List[:py:obj:`~.cuuint64_t`]
-        Array containing tensor size (number of elements) along each of the
-        `tensorRank` dimensions
-    globalStrides : List[:py:obj:`~.cuuint64_t`]
-        Array containing stride size (in bytes) along each of the
-        `tensorRank` - 1 dimensions
-    boxDim : List[:py:obj:`~.cuuint32_t`]
-        Array containing traversal box size (number of elments) along each
-        of the `tensorRank` dimensions. Specifies how many elements to be
-        traversed along each tensor dimension.
-    elementStrides : List[:py:obj:`~.cuuint32_t`]
-        Array containing traversal stride in each of the `tensorRank`
-        dimensions
-    interleave : :py:obj:`~.CUtensorMapInterleave`
-        Type of interleaved layout the tensor addresses
-    swizzle : :py:obj:`~.CUtensorMapSwizzle`
-        Bank swizzling pattern inside shared memory
-    l2Promotion : :py:obj:`~.CUtensorMapL2promotion`
-        L2 promotion size
-    oobFill : :py:obj:`~.CUtensorMapFloatOOBfill`
-        Indicate whether zero or special NaN constant must be used to fill
-        out-of-bound elements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    tensorMap : :py:obj:`~.CUtensorMap`
-        Tensor map object to create
-
-    See Also
-    --------
-    :py:obj:`~.cuTensorMapEncodeIm2col`, :py:obj:`~.cuTensorMapReplaceAddress`
-    """
-    elementStrides = [] if elementStrides is None else elementStrides
-    if not all(isinstance(_x, (cuuint32_t,)) for _x in elementStrides):
-        raise TypeError("Argument 'elementStrides' is not instance of type (expected Tuple[cydriver.cuuint32_t,] or List[cydriver.cuuint32_t,]")
-    boxDim = [] if boxDim is None else boxDim
-    if not all(isinstance(_x, (cuuint32_t,)) for _x in boxDim):
-        raise TypeError("Argument 'boxDim' is not instance of type (expected Tuple[cydriver.cuuint32_t,] or List[cydriver.cuuint32_t,]")
-    globalStrides = [] if globalStrides is None else globalStrides
-    if not all(isinstance(_x, (cuuint64_t,)) for _x in globalStrides):
-        raise TypeError("Argument 'globalStrides' is not instance of type (expected Tuple[cydriver.cuuint64_t,] or List[cydriver.cuuint64_t,]")
-    globalDim = [] if globalDim is None else globalDim
-    if not all(isinstance(_x, (cuuint64_t,)) for _x in globalDim):
-        raise TypeError("Argument 'globalDim' is not instance of type (expected Tuple[cydriver.cuuint64_t,] or List[cydriver.cuuint64_t,]")
-    cdef cydriver.cuuint32_t cytensorRank
-    if tensorRank is None:
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(tensorRank, (cuuint32_t,)):
-        ptensorRank = int(tensorRank)
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>ptensorRank
-    else:
-        ptensorRank = int(cuuint32_t(tensorRank))
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>ptensorRank
-    cdef CUtensorMap tensorMap = CUtensorMap()
-    cdef cydriver.CUtensorMapDataType cytensorDataType = tensorDataType.value
-    cyglobalAddress = utils.HelperInputVoidPtr(globalAddress)
-    cdef void* cyglobalAddress_ptr = <void*><void_ptr>cyglobalAddress.cptr
-    cdef cydriver.cuuint64_t* cyglobalDim = NULL
-    if len(globalDim) > 0:
-        cyglobalDim = <cydriver.cuuint64_t*> calloc(len(globalDim), sizeof(cydriver.cuuint64_t))
-        if cyglobalDim is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalDim)) + 'x' + str(sizeof(cydriver.cuuint64_t)))
-        else:
-            for idx in range(len(globalDim)):
-                cyglobalDim[idx] = <cydriver.cuuint64_t>(<cuuint64_t>globalDim[idx])._ptr[0]
-    cdef cydriver.cuuint64_t* cyglobalStrides = NULL
-    if len(globalStrides) > 0:
-        cyglobalStrides = <cydriver.cuuint64_t*> calloc(len(globalStrides), sizeof(cydriver.cuuint64_t))
-        if cyglobalStrides is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalStrides)) + 'x' + str(sizeof(cydriver.cuuint64_t)))
-        else:
-            for idx in range(len(globalStrides)):
-                cyglobalStrides[idx] = <cydriver.cuuint64_t>(<cuuint64_t>globalStrides[idx])._ptr[0]
-    cdef cydriver.cuuint32_t* cyboxDim = NULL
-    if len(boxDim) > 0:
-        cyboxDim = <cydriver.cuuint32_t*> calloc(len(boxDim), sizeof(cydriver.cuuint32_t))
-        if cyboxDim is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(boxDim)) + 'x' + str(sizeof(cydriver.cuuint32_t)))
-        else:
-            for idx in range(len(boxDim)):
-                cyboxDim[idx] = <cydriver.cuuint32_t>(<cuuint32_t>boxDim[idx])._ptr[0]
-    cdef cydriver.cuuint32_t* cyelementStrides = NULL
-    if len(elementStrides) > 0:
-        cyelementStrides = <cydriver.cuuint32_t*> calloc(len(elementStrides), sizeof(cydriver.cuuint32_t))
-        if cyelementStrides is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(elementStrides)) + 'x' + str(sizeof(cydriver.cuuint32_t)))
-        else:
-            for idx in range(len(elementStrides)):
-                cyelementStrides[idx] = <cydriver.cuuint32_t>(<cuuint32_t>elementStrides[idx])._ptr[0]
-    cdef cydriver.CUtensorMapInterleave cyinterleave = interleave.value
-    cdef cydriver.CUtensorMapSwizzle cyswizzle = swizzle.value
-    cdef cydriver.CUtensorMapL2promotion cyl2Promotion = l2Promotion.value
-    cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = oobFill.value
-    err = cydriver.cuTensorMapEncodeTiled(<cydriver.CUtensorMap*>tensorMap._ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, <cydriver.cuuint64_t*>(<cuuint64_t>globalDim[0])._ptr if len(globalDim) == 1 else cyglobalDim, <cydriver.cuuint64_t*>(<cuuint64_t>globalStrides[0])._ptr if len(globalStrides) == 1 else cyglobalStrides, <cydriver.cuuint32_t*>(<cuuint32_t>boxDim[0])._ptr if len(boxDim) == 1 else cyboxDim, <cydriver.cuuint32_t*>(<cuuint32_t>elementStrides[0])._ptr if len(elementStrides) == 1 else cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill)
-    if cyglobalDim is not NULL:
-        free(cyglobalDim)
-    if cyglobalStrides is not NULL:
-        free(cyglobalStrides)
-    if cyboxDim is not NULL:
-        free(cyboxDim)
-    if cyelementStrides is not NULL:
-        free(cyelementStrides)
-    return (CUresult(err), tensorMap)
-{{endif}}
-
-{{if 'cuTensorMapEncodeIm2col' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTensorMapEncodeIm2col(tensorDataType not None : CUtensorMapDataType, tensorRank, globalAddress, globalDim : Optional[Tuple[cuuint64_t] | List[cuuint64_t]], globalStrides : Optional[Tuple[cuuint64_t] | List[cuuint64_t]], pixelBoxLowerCorner : Optional[Tuple[int] | List[int]], pixelBoxUpperCorner : Optional[Tuple[int] | List[int]], channelsPerPixel, pixelsPerColumn, elementStrides : Optional[Tuple[cuuint32_t] | List[cuuint32_t]], interleave not None : CUtensorMapInterleave, swizzle not None : CUtensorMapSwizzle, l2Promotion not None : CUtensorMapL2promotion, oobFill not None : CUtensorMapFloatOOBfill):
-    """ Create a tensor map descriptor object representing im2col memory region.
-
-    Creates a descriptor for Tensor Memory Access (TMA) object specified by
-    the parameters describing a im2col memory layout and returns it in
-    `tensorMap`.
-
-    Tensor map objects are only supported on devices of compute capability
-    9.0 or higher. Additionally, a tensor map object is an opaque value,
-    and, as such, should only be accessed through CUDA API calls.
-
-    The parameters passed are bound to the following requirements:
-
-    - `tensorMap` address must be aligned to 64 bytes.
-
-    - `tensorDataType` has to be an enum from
-      :py:obj:`~.CUtensorMapDataType` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `tensorRank`, which specifies the number of tensor dimensions, must
-      be 3, 4, or 5.
-
-    - `globalAddress`, which specifies the starting address of the memory
-      region described, must be 32 byte aligned when `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B` and 16 byte aligned
-      otherwise.
-
-    - `globalDim` array, which specifies tensor size of each of the
-      `tensorRank` dimensions, must be non-zero and less than or equal to
-      2^32.
-
-    - `globalStrides` array, which specifies tensor stride of each of the
-      lower `tensorRank` - 1 dimensions in bytes, must be a multiple of 16
-      and less than 2^40. Additionally, the stride must be a multiple of 32
-      when `interleave` is :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B`. Each
-      following dimension specified includes previous dimension stride:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `pixelBoxLowerCorner` array specifies the coordinate offsets {D, H,
-      W} of the bounding box from top/left/front corner. The number of
-      offsets and their precision depend on the tensor dimensionality:
-
-      - When `tensorRank` is 3, one signed offset within range [-32768,
-        32767] is supported.
-
-      - When `tensorRank` is 4, two signed offsets each within range [-128,
-        127] are supported.
-
-      - When `tensorRank` is 5, three offsets each within range [-16, 15]
-        are supported.
-
-    - `pixelBoxUpperCorner` array specifies the coordinate offsets {D, H,
-      W} of the bounding box from bottom/right/back corner. The number of
-      offsets and their precision depend on the tensor dimensionality:
-
-      - When `tensorRank` is 3, one signed offset within range [-32768,
-        32767] is supported.
-
-      - When `tensorRank` is 4, two signed offsets each within range [-128,
-        127] are supported.
-
-      - When `tensorRank` is 5, three offsets each within range [-16, 15]
-        are supported. The bounding box specified by `pixelBoxLowerCorner`
-        and `pixelBoxUpperCorner` must have non-zero area.
-
-    - `channelsPerPixel`, which specifies the number of elements which must
-      be accessed along C dimension, must be less than or equal to 256.
-
-    - `pixelsPerColumn`, which specifies the number of elements that must
-      be accessed along the {N, D, H, W} dimensions, must be less than or
-      equal to 1024.
-
-    - `elementStrides` array, which specifies the iteration step along each
-      of the `tensorRank` dimensions, must be non-zero and less than or
-      equal to 8. Note that when `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE`, the first element of this
-      array is ignored since TMA doesn’t support the stride for dimension
-      zero. When all elements of the `elementStrides` array are one,
-      `boxDim` specifies the number of elements to load. However, if
-      `elementStrides`[i] is not equal to one for some `i`, then TMA loads
-      ceil( `boxDim`[i] / `elementStrides`[i]) number of elements along
-      i-th dimension. To load N elements along i-th dimension, `boxDim`[i]
-      must be set to N * `elementStrides`[i].
-
-    - `interleave` specifies the interleaved layout of type
-      :py:obj:`~.CUtensorMapInterleave`, which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - TMA supports interleaved layouts like NC/8HWC8 where C8 utilizes 16
-      bytes in memory assuming 2 byte per channel or NC/16HWC16 where C16
-      uses 32 bytes. When `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_NONE` and `swizzle` is not
-      :py:obj:`~.CU_TENSOR_MAP_SWIZZLE_NONE`, the bounding box inner
-      dimension (computed as `boxDim`[0] multiplied by element size derived
-      from `tensorDataType`) must be less than or equal to the swizzle
-      size.
-
-      - CU_TENSOR_MAP_SWIZZLE_32B implies the bounding box inner dimension
-        will be <= 32.
-
-      - CU_TENSOR_MAP_SWIZZLE_64B implies the bounding box inner dimension
-        will be <= 64.
-
-      - CU_TENSOR_MAP_SWIZZLE_128B implies the bounding box inner dimension
-        will be <= 128.
-
-    - `swizzle`, which specifies the shared memory bank swizzling pattern,
-      has to be of type :py:obj:`~.CUtensorMapSwizzle` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - Data are organized in a specific order in global memory; however,
-      this may not match the order in which the application accesses data
-      in shared memory. This difference in data organization may cause bank
-      conflicts when shared memory is accessed. In order to avoid this
-      problem, data can be loaded to shared memory with shuffling across
-      shared memory banks. When `interleave` is
-      :py:obj:`~.CU_TENSOR_MAP_INTERLEAVE_32B`, `swizzle` must be
-      :py:obj:`~.CU_TENSOR_MAP_SWIZZLE_32B`. Other interleave modes can
-      have any swizzling pattern.
-
-    - `l2Promotion` specifies L2 fetch size which indicates the byte
-      granularity at which L2 requests are filled from DRAM. It must be of
-      type :py:obj:`~.CUtensorMapL2promotion`, which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - `oobFill`, which indicates whether zero or a special NaN constant
-      should be used to fill out-of-bound elements, must be of type
-      :py:obj:`~.CUtensorMapFloatOOBfill` which is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - Note that
-      :py:obj:`~.CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA` can
-      only be used when `tensorDataType` represents a floating-point data
-      type.
-
-    Parameters
-    ----------
-    tensorDataType : :py:obj:`~.CUtensorMapDataType`
-        Tensor data type
-    tensorRank : Any
-        Dimensionality of tensor; must be at least 3
-    globalAddress : Any
-        Starting address of memory region described by tensor
-    globalDim : List[:py:obj:`~.cuuint64_t`]
-        Array containing tensor size (number of elements) along each of the
-        `tensorRank` dimensions
-    globalStrides : List[:py:obj:`~.cuuint64_t`]
-        Array containing stride size (in bytes) along each of the
-        `tensorRank` - 1 dimensions
-    pixelBoxLowerCorner : List[int]
-        Array containing DHW dimensions of lower box corner
-    pixelBoxUpperCorner : List[int]
-        Array containing DHW dimensions of upper box corner
-    channelsPerPixel : Any
-        Number of channels per pixel
-    pixelsPerColumn : Any
-        Number of pixels per column
-    elementStrides : List[:py:obj:`~.cuuint32_t`]
-        Array containing traversal stride in each of the `tensorRank`
-        dimensions
-    interleave : :py:obj:`~.CUtensorMapInterleave`
-        Type of interleaved layout the tensor addresses
-    swizzle : :py:obj:`~.CUtensorMapSwizzle`
-        Bank swizzling pattern inside shared memory
-    l2Promotion : :py:obj:`~.CUtensorMapL2promotion`
-        L2 promotion size
-    oobFill : :py:obj:`~.CUtensorMapFloatOOBfill`
-        Indicate whether zero or special NaN constant will be used to fill
-        out-of-bound elements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    tensorMap : :py:obj:`~.CUtensorMap`
-        Tensor map object to create
-
-    See Also
-    --------
-    :py:obj:`~.cuTensorMapEncodeTiled`, :py:obj:`~.cuTensorMapReplaceAddress`
-    """
-    elementStrides = [] if elementStrides is None else elementStrides
-    if not all(isinstance(_x, (cuuint32_t,)) for _x in elementStrides):
-        raise TypeError("Argument 'elementStrides' is not instance of type (expected Tuple[cydriver.cuuint32_t,] or List[cydriver.cuuint32_t,]")
-    cdef cydriver.cuuint32_t cypixelsPerColumn
-    if pixelsPerColumn is None:
-        cypixelsPerColumn = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(pixelsPerColumn, (cuuint32_t,)):
-        ppixelsPerColumn = int(pixelsPerColumn)
-        cypixelsPerColumn = <cydriver.cuuint32_t><void_ptr>ppixelsPerColumn
-    else:
-        ppixelsPerColumn = int(cuuint32_t(pixelsPerColumn))
-        cypixelsPerColumn = <cydriver.cuuint32_t><void_ptr>ppixelsPerColumn
-    cdef cydriver.cuuint32_t cychannelsPerPixel
-    if channelsPerPixel is None:
-        cychannelsPerPixel = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(channelsPerPixel, (cuuint32_t,)):
-        pchannelsPerPixel = int(channelsPerPixel)
-        cychannelsPerPixel = <cydriver.cuuint32_t><void_ptr>pchannelsPerPixel
-    else:
-        pchannelsPerPixel = int(cuuint32_t(channelsPerPixel))
-        cychannelsPerPixel = <cydriver.cuuint32_t><void_ptr>pchannelsPerPixel
-    pixelBoxUpperCorner = [] if pixelBoxUpperCorner is None else pixelBoxUpperCorner
-    if not all(isinstance(_x, (int)) for _x in pixelBoxUpperCorner):
-        raise TypeError("Argument 'pixelBoxUpperCorner' is not instance of type (expected Tuple[int] or List[int]")
-    pixelBoxLowerCorner = [] if pixelBoxLowerCorner is None else pixelBoxLowerCorner
-    if not all(isinstance(_x, (int)) for _x in pixelBoxLowerCorner):
-        raise TypeError("Argument 'pixelBoxLowerCorner' is not instance of type (expected Tuple[int] or List[int]")
-    globalStrides = [] if globalStrides is None else globalStrides
-    if not all(isinstance(_x, (cuuint64_t,)) for _x in globalStrides):
-        raise TypeError("Argument 'globalStrides' is not instance of type (expected Tuple[cydriver.cuuint64_t,] or List[cydriver.cuuint64_t,]")
-    globalDim = [] if globalDim is None else globalDim
-    if not all(isinstance(_x, (cuuint64_t,)) for _x in globalDim):
-        raise TypeError("Argument 'globalDim' is not instance of type (expected Tuple[cydriver.cuuint64_t,] or List[cydriver.cuuint64_t,]")
-    cdef cydriver.cuuint32_t cytensorRank
-    if tensorRank is None:
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>0
-    elif isinstance(tensorRank, (cuuint32_t,)):
-        ptensorRank = int(tensorRank)
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>ptensorRank
-    else:
-        ptensorRank = int(cuuint32_t(tensorRank))
-        cytensorRank = <cydriver.cuuint32_t><void_ptr>ptensorRank
-    cdef CUtensorMap tensorMap = CUtensorMap()
-    cdef cydriver.CUtensorMapDataType cytensorDataType = tensorDataType.value
-    cyglobalAddress = utils.HelperInputVoidPtr(globalAddress)
-    cdef void* cyglobalAddress_ptr = <void*><void_ptr>cyglobalAddress.cptr
-    cdef cydriver.cuuint64_t* cyglobalDim = NULL
-    if len(globalDim) > 0:
-        cyglobalDim = <cydriver.cuuint64_t*> calloc(len(globalDim), sizeof(cydriver.cuuint64_t))
-        if cyglobalDim is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalDim)) + 'x' + str(sizeof(cydriver.cuuint64_t)))
-        else:
-            for idx in range(len(globalDim)):
-                cyglobalDim[idx] = <cydriver.cuuint64_t>(<cuuint64_t>globalDim[idx])._ptr[0]
-    cdef cydriver.cuuint64_t* cyglobalStrides = NULL
-    if len(globalStrides) > 0:
-        cyglobalStrides = <cydriver.cuuint64_t*> calloc(len(globalStrides), sizeof(cydriver.cuuint64_t))
-        if cyglobalStrides is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(globalStrides)) + 'x' + str(sizeof(cydriver.cuuint64_t)))
-        else:
-            for idx in range(len(globalStrides)):
-                cyglobalStrides[idx] = <cydriver.cuuint64_t>(<cuuint64_t>globalStrides[idx])._ptr[0]
-    cdef vector[int] cypixelBoxLowerCorner = pixelBoxLowerCorner
-    cdef vector[int] cypixelBoxUpperCorner = pixelBoxUpperCorner
-    cdef cydriver.cuuint32_t* cyelementStrides = NULL
-    if len(elementStrides) > 0:
-        cyelementStrides = <cydriver.cuuint32_t*> calloc(len(elementStrides), sizeof(cydriver.cuuint32_t))
-        if cyelementStrides is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(elementStrides)) + 'x' + str(sizeof(cydriver.cuuint32_t)))
-        else:
-            for idx in range(len(elementStrides)):
-                cyelementStrides[idx] = <cydriver.cuuint32_t>(<cuuint32_t>elementStrides[idx])._ptr[0]
-    cdef cydriver.CUtensorMapInterleave cyinterleave = interleave.value
-    cdef cydriver.CUtensorMapSwizzle cyswizzle = swizzle.value
-    cdef cydriver.CUtensorMapL2promotion cyl2Promotion = l2Promotion.value
-    cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = oobFill.value
-    err = cydriver.cuTensorMapEncodeIm2col(<cydriver.CUtensorMap*>tensorMap._ptr, cytensorDataType, cytensorRank, cyglobalAddress_ptr, <cydriver.cuuint64_t*>(<cuuint64_t>globalDim[0])._ptr if len(globalDim) == 1 else cyglobalDim, <cydriver.cuuint64_t*>(<cuuint64_t>globalStrides[0])._ptr if len(globalStrides) == 1 else cyglobalStrides, cypixelBoxLowerCorner.data(), cypixelBoxUpperCorner.data(), cychannelsPerPixel, cypixelsPerColumn, <cydriver.cuuint32_t*>(<cuuint32_t>elementStrides[0])._ptr if len(elementStrides) == 1 else cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill)
-    if cyglobalDim is not NULL:
-        free(cyglobalDim)
-    if cyglobalStrides is not NULL:
-        free(cyglobalStrides)
-    if cyelementStrides is not NULL:
-        free(cyelementStrides)
-    return (CUresult(err), tensorMap)
-{{endif}}
-
-{{if 'cuTensorMapReplaceAddress' in found_functions}}
-
-@cython.embedsignature(True)
-def cuTensorMapReplaceAddress(tensorMap : Optional[CUtensorMap], globalAddress):
-    """ Modify an existing tensor map descriptor with an updated global address.
-
-    Modifies the descriptor for Tensor Memory Access (TMA) object passed in
-    `tensorMap` with an updated `globalAddress`.
-
-    Tensor map objects are only supported on devices of compute capability
-    9.0 or higher. Additionally, a tensor map object is an opaque value,
-    and, as such, should only be accessed through CUDA API calls.
-
-    Parameters
-    ----------
-    tensorMap : :py:obj:`~.CUtensorMap`
-        Tensor map object to modify
-    globalAddress : Any
-        Starting address of memory region described by tensor, must follow
-        previous alignment requirements
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuTensorMapEncodeTiled`, :py:obj:`~.cuTensorMapEncodeIm2col`
-    """
-    cdef cydriver.CUtensorMap* cytensorMap_ptr = tensorMap._ptr if tensorMap != None else NULL
-    cyglobalAddress = utils.HelperInputVoidPtr(globalAddress)
-    cdef void* cyglobalAddress_ptr = <void*><void_ptr>cyglobalAddress.cptr
-    err = cydriver.cuTensorMapReplaceAddress(cytensorMap_ptr, cyglobalAddress_ptr)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceCanAccessPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceCanAccessPeer(dev, peerDev):
-    """ Queries if a device may directly access a peer device's memory.
-
-    Returns in `*canAccessPeer` a value of 1 if contexts on `dev` are
-    capable of directly accessing memory from contexts on `peerDev` and 0
-    otherwise. If direct access of `peerDev` from `dev` is possible, then
-    access may be enabled on two specific contexts by calling
-    :py:obj:`~.cuCtxEnablePeerAccess()`.
-
-    Parameters
-    ----------
-    dev : :py:obj:`~.CUdevice`
-        Device from which allocations on `peerDev` are to be directly
-        accessed.
-    peerDev : :py:obj:`~.CUdevice`
-        Device on which the allocations to be directly accessed by `dev`
-        reside.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    canAccessPeer : int
-        Returned access capability
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`
-    """
-    cdef cydriver.CUdevice cypeerDev
-    if peerDev is None:
-        cypeerDev = <cydriver.CUdevice>0
-    elif isinstance(peerDev, (CUdevice,)):
-        ppeerDev = int(peerDev)
-        cypeerDev = <cydriver.CUdevice>ppeerDev
-    else:
-        ppeerDev = int(CUdevice(peerDev))
-        cypeerDev = <cydriver.CUdevice>ppeerDev
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef int canAccessPeer = 0
-    err = cydriver.cuDeviceCanAccessPeer(&canAccessPeer, cydev, cypeerDev)
-    return (CUresult(err), canAccessPeer)
-{{endif}}
-
-{{if 'cuCtxEnablePeerAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxEnablePeerAccess(peerContext, unsigned int Flags):
-    """ Enables direct access to memory allocations in a peer context.
-
-    If both the current context and `peerContext` are on devices which
-    support unified addressing (as may be queried using
-    :py:obj:`~.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`) and same major
-    compute capability, then on success all allocations from `peerContext`
-    will immediately be accessible by the current context. See
-    :py:obj:`~.Unified Addressing` for additional details.
-
-    Note that access granted by this call is unidirectional and that in
-    order to access memory from the current context in `peerContext`, a
-    separate symmetric call to :py:obj:`~.cuCtxEnablePeerAccess()` is
-    required.
-
-    Note that there are both device-wide and system-wide limitations per
-    system configuration, as noted in the CUDA Programming Guide under the
-    section "Peer-to-Peer Memory Access".
-
-    Returns :py:obj:`~.CUDA_ERROR_PEER_ACCESS_UNSUPPORTED` if
-    :py:obj:`~.cuDeviceCanAccessPeer()` indicates that the
-    :py:obj:`~.CUdevice` of the current context cannot directly access
-    memory from the :py:obj:`~.CUdevice` of `peerContext`.
-
-    Returns :py:obj:`~.CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED` if direct
-    access of `peerContext` from the current context has already been
-    enabled.
-
-    Returns :py:obj:`~.CUDA_ERROR_TOO_MANY_PEERS` if direct peer access is
-    not possible because hardware resources required for peer access have
-    been exhausted.
-
-    Returns :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` if there is no current
-    context, `peerContext` is not a valid context, or if the current
-    context is `peerContext`.
-
-    Returns :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if `Flags` is not 0.
-
-    Parameters
-    ----------
-    peerContext : :py:obj:`~.CUcontext`
-        Peer context to enable direct access to from the current context
-    Flags : unsigned int
-        Reserved for future use and must be set to 0
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED`, :py:obj:`~.CUDA_ERROR_TOO_MANY_PEERS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_PEER_ACCESS_UNSUPPORTED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceCanAccessPeer`, :py:obj:`~.cuCtxDisablePeerAccess`, :py:obj:`~.cudaDeviceEnablePeerAccess`
-    """
-    cdef cydriver.CUcontext cypeerContext
-    if peerContext is None:
-        cypeerContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(peerContext, (CUcontext,)):
-        ppeerContext = int(peerContext)
-        cypeerContext = <cydriver.CUcontext><void_ptr>ppeerContext
-    else:
-        ppeerContext = int(CUcontext(peerContext))
-        cypeerContext = <cydriver.CUcontext><void_ptr>ppeerContext
-    err = cydriver.cuCtxEnablePeerAccess(cypeerContext, Flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxDisablePeerAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxDisablePeerAccess(peerContext):
-    """ Disables direct access to memory allocations in a peer context and unregisters any registered allocations.
-
-    Returns :py:obj:`~.CUDA_ERROR_PEER_ACCESS_NOT_ENABLED` if direct peer
-    access has not yet been enabled from `peerContext` to the current
-    context.
-
-    Returns :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT` if there is no current
-    context, or if `peerContext` is not a valid context.
-
-    Parameters
-    ----------
-    peerContext : :py:obj:`~.CUcontext`
-        Peer context to disable direct access to
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_PEER_ACCESS_NOT_ENABLED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceCanAccessPeer`, :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`
-    """
-    cdef cydriver.CUcontext cypeerContext
-    if peerContext is None:
-        cypeerContext = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(peerContext, (CUcontext,)):
-        ppeerContext = int(peerContext)
-        cypeerContext = <cydriver.CUcontext><void_ptr>ppeerContext
-    else:
-        ppeerContext = int(CUcontext(peerContext))
-        cypeerContext = <cydriver.CUcontext><void_ptr>ppeerContext
-    err = cydriver.cuCtxDisablePeerAccess(cypeerContext)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuDeviceGetP2PAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetP2PAttribute(attrib not None : CUdevice_P2PAttribute, srcDevice, dstDevice):
-    """ Queries attributes of the link between two devices.
-
-    Returns in `*value` the value of the requested attribute `attrib` of
-    the link between `srcDevice` and `dstDevice`. The supported attributes
-    are:
-
-    - :py:obj:`~.CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK`: A relative
-      value indicating the performance of the link between two devices.
-
-    - :py:obj:`~.CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED` P2P: 1 if P2P
-      Access is enable.
-
-    - :py:obj:`~.CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED`: 1 if
-      Atomic operations over the link are supported.
-
-    - :py:obj:`~.CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED`: 1 if
-      cudaArray can be accessed over the link.
-
-    Returns :py:obj:`~.CUDA_ERROR_INVALID_DEVICE` if `srcDevice` or
-    `dstDevice` are not valid or if they represent the same device.
-
-    Returns :py:obj:`~.CUDA_ERROR_INVALID_VALUE` if `attrib` is not valid
-    or if `value` is a null pointer.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUdevice_P2PAttribute`
-        The requested attribute of the link between `srcDevice` and
-        `dstDevice`.
-    srcDevice : :py:obj:`~.CUdevice`
-        The source device of the target link.
-    dstDevice : :py:obj:`~.CUdevice`
-        The destination device of the target link.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    value : int
-        Returned value of the requested attribute
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceGetP2PAttribute`
-    """
-    cdef cydriver.CUdevice cydstDevice
-    if dstDevice is None:
-        cydstDevice = <cydriver.CUdevice>0
-    elif isinstance(dstDevice, (CUdevice,)):
-        pdstDevice = int(dstDevice)
-        cydstDevice = <cydriver.CUdevice>pdstDevice
-    else:
-        pdstDevice = int(CUdevice(dstDevice))
-        cydstDevice = <cydriver.CUdevice>pdstDevice
-    cdef cydriver.CUdevice cysrcDevice
-    if srcDevice is None:
-        cysrcDevice = <cydriver.CUdevice>0
-    elif isinstance(srcDevice, (CUdevice,)):
-        psrcDevice = int(srcDevice)
-        cysrcDevice = <cydriver.CUdevice>psrcDevice
-    else:
-        psrcDevice = int(CUdevice(srcDevice))
-        cysrcDevice = <cydriver.CUdevice>psrcDevice
-    cdef int value = 0
-    cdef cydriver.CUdevice_P2PAttribute cyattrib = attrib.value
-    err = cydriver.cuDeviceGetP2PAttribute(&value, cyattrib, cysrcDevice, cydstDevice)
-    return (CUresult(err), value)
-{{endif}}
-
-{{if 'cuGraphicsUnregisterResource' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsUnregisterResource(resource):
-    """ Unregisters a graphics resource for access by CUDA.
-
-    Unregisters the graphics resource `resource` so it is not accessible by
-    CUDA unless registered again.
-
-    If `resource` is invalid then :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is
-    returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        Resource to unregister
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsD3D9RegisterResource`, :py:obj:`~.cuGraphicsD3D10RegisterResource`, :py:obj:`~.cuGraphicsD3D11RegisterResource`, :py:obj:`~.cuGraphicsGLRegisterBuffer`, :py:obj:`~.cuGraphicsGLRegisterImage`, :py:obj:`~.cudaGraphicsUnregisterResource`
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    err = cydriver.cuGraphicsUnregisterResource(cyresource)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphicsSubResourceGetMappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsigned int mipLevel):
-    """ Get an array through which to access a subresource of a mapped graphics resource.
-
-    Returns in `*pArray` an array through which the subresource of the
-    mapped graphics resource `resource` which corresponds to array index
-    `arrayIndex` and mipmap level `mipLevel` may be accessed. The value set
-    in `*pArray` may change every time that `resource` is mapped.
-
-    If `resource` is not a texture then it cannot be accessed via an array
-    and :py:obj:`~.CUDA_ERROR_NOT_MAPPED_AS_ARRAY` is returned. If
-    `arrayIndex` is not a valid array index for `resource` then
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned. If `mipLevel` is not
-    a valid mipmap level for `resource` then
-    :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is returned. If `resource` is not
-    mapped then :py:obj:`~.CUDA_ERROR_NOT_MAPPED` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        Mapped resource to access
-    arrayIndex : unsigned int
-        Array index for array textures or cubemap face index as defined by
-        :py:obj:`~.CUarray_cubemap_face` for cubemap textures for the
-        subresource to access
-    mipLevel : unsigned int
-        Mipmap level for the subresource to access
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_MAPPED`, :py:obj:`~.CUDA_ERROR_NOT_MAPPED_AS_ARRAY`
-    pArray : :py:obj:`~.CUarray`
-        Returned array through which a subresource of `resource` may be
-        accessed
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    cdef CUarray pArray = CUarray()
-    err = cydriver.cuGraphicsSubResourceGetMappedArray(<cydriver.CUarray*>pArray._ptr, cyresource, arrayIndex, mipLevel)
-    return (CUresult(err), pArray)
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsResourceGetMappedMipmappedArray(resource):
-    """ Get a mipmapped array through which to access a mapped graphics resource.
-
-    Returns in `*pMipmappedArray` a mipmapped array through which the
-    mapped graphics resource `resource`. The value set in
-    `*pMipmappedArray` may change every time that `resource` is mapped.
-
-    If `resource` is not a texture then it cannot be accessed via a
-    mipmapped array and :py:obj:`~.CUDA_ERROR_NOT_MAPPED_AS_ARRAY` is
-    returned. If `resource` is not mapped then
-    :py:obj:`~.CUDA_ERROR_NOT_MAPPED` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        Mapped resource to access
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_MAPPED`, :py:obj:`~.CUDA_ERROR_NOT_MAPPED_AS_ARRAY`
-    pMipmappedArray : :py:obj:`~.CUmipmappedArray`
-        Returned mipmapped array through which `resource` may be accessed
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsResourceGetMappedMipmappedArray`
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    cdef CUmipmappedArray pMipmappedArray = CUmipmappedArray()
-    err = cydriver.cuGraphicsResourceGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>pMipmappedArray._ptr, cyresource)
-    return (CUresult(err), pMipmappedArray)
-{{endif}}
-
-{{if 'cuGraphicsResourceGetMappedPointer_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsResourceGetMappedPointer(resource):
-    """ Get a device pointer through which to access a mapped graphics resource.
-
-    Returns in `*pDevPtr` a pointer through which the mapped graphics
-    resource `resource` may be accessed. Returns in `pSize` the size of the
-    memory in bytes which may be accessed from that pointer. The value set
-    in `pPointer` may change every time that `resource` is mapped.
-
-    If `resource` is not a buffer then it cannot be accessed via a pointer
-    and :py:obj:`~.CUDA_ERROR_NOT_MAPPED_AS_POINTER` is returned. If
-    `resource` is not mapped then :py:obj:`~.CUDA_ERROR_NOT_MAPPED` is
-    returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        None
-
-    Returns
-    -------
-    CUresult
-
-    pDevPtr : :py:obj:`~.CUdeviceptr`
-        None
-    pSize : int
-        None
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    cdef CUdeviceptr pDevPtr = CUdeviceptr()
-    cdef size_t pSize = 0
-    err = cydriver.cuGraphicsResourceGetMappedPointer(<cydriver.CUdeviceptr*>pDevPtr._ptr, &pSize, cyresource)
-    return (CUresult(err), pDevPtr, pSize)
-{{endif}}
-
-{{if 'cuGraphicsResourceSetMapFlags_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsResourceSetMapFlags(resource, unsigned int flags):
-    """ Set usage flags for mapping a graphics resource.
-
-    Set `flags` for mapping the graphics resource `resource`.
-
-    Changes to `flags` will take effect the next time `resource` is mapped.
-    The `flags` argument may be any of the following:
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`: Specifies no hints
-      about how this resource will be used. It is therefore assumed that
-      this resource will be read from and written to by CUDA kernels. This
-      is the default value.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READONLY`: Specifies that
-      CUDA kernels which access this resource will not write to this
-      resource.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITEDISCARD`: Specifies
-      that CUDA kernels which access this resource will not read from this
-      resource and will write over the entire contents of the resource, so
-      none of the data previously stored in the resource will be preserved.
-
-    If `resource` is presently mapped for access by CUDA then
-    :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED` is returned. If `flags` is not
-    one of the above values then :py:obj:`~.CUDA_ERROR_INVALID_VALUE` is
-    returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        Registered resource to set flags for
-    flags : unsigned int
-        Parameters for resource mapping
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cudaGraphicsResourceSetMapFlags`
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    err = cydriver.cuGraphicsResourceSetMapFlags(cyresource, flags)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphicsMapResources' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsMapResources(unsigned int count, resources, hStream):
-    """ Map graphics resources for access by CUDA.
-
-    Maps the `count` graphics resources in `resources` for access by CUDA.
-
-    The resources in `resources` may be accessed by CUDA until they are
-    unmapped. The graphics API from which `resources` were registered
-    should not access any resources while they are mapped by CUDA. If an
-    application does so, the results are undefined.
-
-    This function provides the synchronization guarantee that any graphics
-    calls issued before :py:obj:`~.cuGraphicsMapResources()` will complete
-    before any subsequent CUDA work issued in `stream` begins.
-
-    If `resources` includes any duplicate entries then
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. If any of
-    `resources` are presently mapped for access by CUDA then
-    :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED` is returned.
-
-    Parameters
-    ----------
-    count : unsigned int
-        Number of resources to map
-    resources : :py:obj:`~.CUgraphicsResource`
-        Resources to map for CUDA usage
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream with which to synchronize
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cudaGraphicsMapResources`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphicsResource *cyresources
-    if resources is None:
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>NULL
-    elif isinstance(resources, (CUgraphicsResource,)):
-        presources = resources.getPtr()
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>presources
-    elif isinstance(resources, (int)):
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>resources
-    else:
-        raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, driver.CUgraphicsResource'>, found " + str(type(resources)))
-    err = cydriver.cuGraphicsMapResources(count, cyresources, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGraphicsUnmapResources' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGraphicsUnmapResources(unsigned int count, resources, hStream):
-    """ Unmap graphics resources.
-
-    Unmaps the `count` graphics resources in `resources`.
-
-    Once unmapped, the resources in `resources` may not be accessed by CUDA
-    until they are mapped again.
-
-    This function provides the synchronization guarantee that any CUDA work
-    issued in `stream` before :py:obj:`~.cuGraphicsUnmapResources()` will
-    complete before any subsequently issued graphics work begins.
-
-    If `resources` includes any duplicate entries then
-    :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is returned. If any of
-    `resources` are not presently mapped for access by CUDA then
-    :py:obj:`~.CUDA_ERROR_NOT_MAPPED` is returned.
-
-    Parameters
-    ----------
-    count : unsigned int
-        Number of resources to unmap
-    resources : :py:obj:`~.CUgraphicsResource`
-        Resources to unmap
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream with which to synchronize
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_NOT_MAPPED`, :py:obj:`~.CUDA_ERROR_UNKNOWN`
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cudaGraphicsUnmapResources`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef cydriver.CUgraphicsResource *cyresources
-    if resources is None:
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>NULL
-    elif isinstance(resources, (CUgraphicsResource,)):
-        presources = resources.getPtr()
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>presources
-    elif isinstance(resources, (int)):
-        cyresources = <cydriver.CUgraphicsResource*><void_ptr>resources
-    else:
-        raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, driver.CUgraphicsResource'>, found " + str(type(resources)))
-    err = cydriver.cuGraphicsUnmapResources(count, cyresources, cyhStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGetProcAddress_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGetProcAddress(char* symbol, int cudaVersion, flags):
-    """ Returns the requested driver API function pointer.
-
-    Returns in `**pfn` the address of the CUDA driver function for the
-    requested CUDA version and flags.
-
-    The CUDA version is specified as (1000 * major + 10 * minor), so CUDA
-    11.2 should be specified as 11020. For a requested driver symbol, if
-    the specified CUDA version is greater than or equal to the CUDA version
-    in which the driver symbol was introduced, this API will return the
-    function pointer to the corresponding versioned function.
-
-    The pointer returned by the API should be cast to a function pointer
-    matching the requested driver function's definition in the API header
-    file. The function pointer typedef can be picked up from the
-    corresponding typedefs header file. For example, cudaTypedefs.h
-    consists of function pointer typedefs for driver APIs defined in
-    :py:obj:`~.cuda.h`.
-
-    The API will return :py:obj:`~.CUDA_SUCCESS` and set the returned `pfn`
-    to NULL if the requested driver function is not supported on the
-    platform, no ABI compatible driver function exists for the specified
-    `cudaVersion` or if the driver symbol is invalid.
-
-    It will also set the optional `symbolStatus` to one of the values in
-    :py:obj:`~.CUdriverProcAddressQueryResult` with the following meanings:
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_SUCCESS` - The requested symbol was
-      succesfully found based on input arguments and `pfn` is valid
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND` - The requested
-      symbol was not found
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT` - The
-      requested symbol was found but is not supported by cudaVersion
-      specified
-
-    The requested flags can be:
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_DEFAULT`: This is the default mode.
-      This is equivalent to
-      :py:obj:`~.CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM` if the code
-      is compiled with --default-stream per-thread compilation flag or the
-      macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;
-      :py:obj:`~.CU_GET_PROC_ADDRESS_LEGACY_STREAM` otherwise.
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_LEGACY_STREAM`: This will enable the
-      search for all driver symbols that match the requested driver symbol
-      name except the corresponding per-thread versions.
-
-    - :py:obj:`~.CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM`: This will
-      enable the search for all driver symbols that match the requested
-      driver symbol name including the per-thread versions. If a per-thread
-      version is not found, the API will return the legacy version of the
-      driver function.
-
-    Parameters
-    ----------
-    symbol : bytes
-        The base name of the driver API function to look for. As an
-        example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`
-        would be cuMemAlloc and `cudaVersion` would be the ABI compatible
-        CUDA version for the _v2 variant.
-    cudaVersion : int
-        The CUDA version to look for the requested driver symbol
-    flags : Any
-        Flags to specify search options.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pfn : Any
-        Location to return the function pointer to the requested driver
-        function
-    symbolStatus : :py:obj:`~.CUdriverProcAddressQueryResult`
-        Optional location to store the status of the search for `symbol`
-        based on `cudaVersion`. See
-        :py:obj:`~.CUdriverProcAddressQueryResult` for possible values.
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDriverEntryPoint`
-    """
-    cdef cydriver.cuuint64_t cyflags
-    if flags is None:
-        cyflags = <cydriver.cuuint64_t><void_ptr>0
-    elif isinstance(flags, (cuuint64_t,)):
-        pflags = int(flags)
-        cyflags = <cydriver.cuuint64_t><void_ptr>pflags
-    else:
-        pflags = int(cuuint64_t(flags))
-        cyflags = <cydriver.cuuint64_t><void_ptr>pflags
-    cdef void_ptr pfn = 0
-    cdef cydriver.CUdriverProcAddressQueryResult symbolStatus
-    err = cydriver.cuGetProcAddress(symbol, <void**>&pfn, cudaVersion, cyflags, &symbolStatus)
-    return (CUresult(err), pfn, CUdriverProcAddressQueryResult(symbolStatus))
-{{endif}}
-
-{{if 'cuCoredumpGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCoredumpGetAttribute(attrib not None : CUcoredumpSettings):
-    """ Allows caller to fetch a coredump attribute value for the current context.
-
-    Returns in `*value` the requested value specified by `attrib`. It is up
-    to the caller to ensure that the data type and size of `*value` matches
-    the request.
-
-    If the caller calls this function with `*value` equal to NULL, the size
-    of the memory region (in bytes) expected for `attrib` will be placed in
-    `size`.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_ON_EXCEPTION`: Bool where
-      :py:obj:`~.true` means that GPU exceptions from this context will
-      create a coredump at the location specified by
-      :py:obj:`~.CU_COREDUMP_FILE`. The default value is :py:obj:`~.false`
-      unless set to :py:obj:`~.true` globally or locally, or the
-      CU_CTX_USER_COREDUMP_ENABLE flag was set during context creation.
-
-    - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true`
-      means that the host CPU will also create a coredump. The default
-      value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or
-      or locally. This value is deprecated as of CUDA 12.5 - raise the
-      :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device
-      abort() if needed.
-
-    - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true`
-      means that any resulting coredumps will not have a dump of GPU memory
-      or non-reloc ELF images. The default value is :py:obj:`~.false`
-      unless set to :py:obj:`~.true` globally or locally. This attribute is
-      deprecated as of CUDA 12.5, please use
-      :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead.
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where
-      :py:obj:`~.true` means that a coredump can be created by writing to
-      the system pipe specified by :py:obj:`~.CU_COREDUMP_PIPE`. The
-      default value is :py:obj:`~.false` unless set to :py:obj:`~.true`
-      globally or locally.
-
-    - :py:obj:`~.CU_COREDUMP_FILE`: String of up to 1023 characters that
-      defines the location where any coredumps generated by this context
-      will be written. The default value is
-      :py:obj:`~.core`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is the
-      host name of the machine running the CUDA applications and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_PIPE`: String of up to 1023 characters that
-      defines the name of the pipe that will be monitored if user-triggered
-      coredumps are enabled. The default value is
-      :py:obj:`~.corepipe`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is
-      the host name of the machine running the CUDA application and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to
-      allow granular control the data contained in a coredump specified as
-      a bitwise OR combination of the following values:
-
-      - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump
-        generation returns to its default settings of including all memory
-        regions that it is able to access
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump
-        will not include the data from CUDA source modules that are not
-        relocated at runtime.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not
-        include device-side global data that does not belong to any
-        context.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not
-        include grid-scale shared memory for the warp that the dumped
-        kernel belonged to.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not
-        include local memory from the kernel.
-
-      - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the
-        above options. Equiavlent to setting the
-        :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will
-        not raise an abort() in the host CPU process. Same functional goal
-        as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the
-        default behavior.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUcoredumpSettings`
-        The enum defining which value to fetch.
-    size : int
-        The size of the memory region `value` points to.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-    value : Any
-        void* containing the requested data.
-    size : int
-        The size of the memory region `value` points to.
-
-    See Also
-    --------
-    :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal`
-    """
-    cdef cydriver.CUcoredumpSettings cyattrib = attrib.value
-    cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    cdef size_t size = cyvalue.size()
-    err = cydriver.cuCoredumpGetAttribute(cyattrib, cyvalue_ptr, &size)
-    return (CUresult(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cuCoredumpGetAttributeGlobal' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCoredumpGetAttributeGlobal(attrib not None : CUcoredumpSettings):
-    """ Allows caller to fetch a coredump attribute value for the entire application.
-
-    Returns in `*value` the requested value specified by `attrib`. It is up
-    to the caller to ensure that the data type and size of `*value` matches
-    the request.
-
-    If the caller calls this function with `*value` equal to NULL, the size
-    of the memory region (in bytes) expected for `attrib` will be placed in
-    `size`.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_ON_EXCEPTION`: Bool where
-      :py:obj:`~.true` means that GPU exceptions from this context will
-      create a coredump at the location specified by
-      :py:obj:`~.CU_COREDUMP_FILE`. The default value is :py:obj:`~.false`.
-
-    - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true`
-      means that the host CPU will also create a coredump. The default
-      value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or
-      or locally. This value is deprecated as of CUDA 12.5 - raise the
-      :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device
-      abort() if needed.
-
-    - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true`
-      means that any resulting coredumps will not have a dump of GPU memory
-      or non-reloc ELF images. The default value is :py:obj:`~.false`. This
-      attribute is deprecated as of CUDA 12.5, please use
-      :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead.
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where
-      :py:obj:`~.true` means that a coredump can be created by writing to
-      the system pipe specified by :py:obj:`~.CU_COREDUMP_PIPE`. The
-      default value is :py:obj:`~.false`.
-
-    - :py:obj:`~.CU_COREDUMP_FILE`: String of up to 1023 characters that
-      defines the location where any coredumps generated by this context
-      will be written. The default value is
-      :py:obj:`~.core`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is the
-      host name of the machine running the CUDA applications and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_PIPE`: String of up to 1023 characters that
-      defines the name of the pipe that will be monitored if user-triggered
-      coredumps are enabled. The default value is
-      :py:obj:`~.corepipe`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is
-      the host name of the machine running the CUDA application and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to
-      allow granular control the data contained in a coredump specified as
-      a bitwise OR combination of the following values:
-
-      - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump
-        generation returns to its default settings of including all memory
-        regions that it is able to access
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump
-        will not include the data from CUDA source modules that are not
-        relocated at runtime.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not
-        include device-side global data that does not belong to any
-        context.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not
-        include grid-scale shared memory for the warp that the dumped
-        kernel belonged to.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not
-        include local memory from the kernel.
-
-      - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the
-        above options. Equiavlent to setting the
-        :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will
-        not raise an abort() in the host CPU process. Same functional goal
-        as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the
-        default behavior.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUcoredumpSettings`
-        The enum defining which value to fetch.
-    size : int
-        The size of the memory region `value` points to.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    value : Any
-        void* containing the requested data.
-    size : int
-        The size of the memory region `value` points to.
-
-    See Also
-    --------
-    :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal`
-    """
-    cdef cydriver.CUcoredumpSettings cyattrib = attrib.value
-    cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    cdef size_t size = cyvalue.size()
-    err = cydriver.cuCoredumpGetAttributeGlobal(cyattrib, cyvalue_ptr, &size)
-    return (CUresult(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cuCoredumpSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value):
-    """ Allows caller to set a coredump attribute value for the current context.
-
-    This function should be considered an alternate interface to the CUDA-
-    GDB environment variables defined in this document:
-    https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump
-
-    An important design decision to note is that any coredump environment
-    variable values set before CUDA initializes will take permanent
-    precedence over any values set with this function. This decision was
-    made to ensure no change in behavior for any users that may be
-    currently using these variables to get coredumps.
-
-    `*value` shall contain the requested value specified by `set`. It is up
-    to the caller to ensure that the data type and size of `*value` matches
-    the request.
-
-    If the caller calls this function with `*value` equal to NULL, the size
-    of the memory region (in bytes) expected for `set` will be placed in
-    `size`.
-
-    /note This function will return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` if
-    the caller attempts to set :py:obj:`~.CU_COREDUMP_ENABLE_ON_EXCEPTION`
-    on a GPU of with Compute Capability < 6.0.
-    :py:obj:`~.cuCoredumpSetAttributeGlobal` works on those platforms as an
-    alternative.
-
-    /note :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` and
-    :py:obj:`~.CU_COREDUMP_PIPE` cannot be set on a per-context basis.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_ON_EXCEPTION`: Bool where
-      :py:obj:`~.true` means that GPU exceptions from this context will
-      create a coredump at the location specified by
-      :py:obj:`~.CU_COREDUMP_FILE`. The default value is :py:obj:`~.false`.
-
-    - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true`
-      means that the host CPU will also create a coredump. The default
-      value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or
-      or locally. This value is deprecated as of CUDA 12.5 - raise the
-      :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device
-      abort() if needed.
-
-    - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true`
-      means that any resulting coredumps will not have a dump of GPU memory
-      or non-reloc ELF images. The default value is :py:obj:`~.false`. This
-      attribute is deprecated as of CUDA 12.5, please use
-      :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead.
-
-    - :py:obj:`~.CU_COREDUMP_FILE`: String of up to 1023 characters that
-      defines the location where any coredumps generated by this context
-      will be written. The default value is
-      :py:obj:`~.core`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is the
-      host name of the machine running the CUDA applications and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to
-      allow granular control the data contained in a coredump specified as
-      a bitwise OR combination of the following values:
-
-      - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump
-        generation returns to its default settings of including all memory
-        regions that it is able to access
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump
-        will not include the data from CUDA source modules that are not
-        relocated at runtime.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not
-        include device-side global data that does not belong to any
-        context.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not
-        include grid-scale shared memory for the warp that the dumped
-        kernel belonged to.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not
-        include local memory from the kernel.
-
-      - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the
-        above options. Equiavlent to setting the
-        :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will
-        not raise an abort() in the host CPU process. Same functional goal
-        as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the
-        default behavior.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUcoredumpSettings`
-        The enum defining which value to set.
-    value : Any
-        void* containing the requested data.
-    size : int
-        The size of the memory region `value` points to.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    size : int
-        The size of the memory region `value` points to.
-
-    See Also
-    --------
-    :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal`
-    """
-    cdef cydriver.CUcoredumpSettings cyattrib = attrib.value
-    cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    cdef size_t size = cyvalue.size()
-    err = cydriver.cuCoredumpSetAttribute(cyattrib, cyvalue_ptr, &size)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCoredumpSetAttributeGlobal' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value):
-    """ Allows caller to set a coredump attribute value globally.
-
-    This function should be considered an alternate interface to the CUDA-
-    GDB environment variables defined in this document:
-    https://docs.nvidia.com/cuda/cuda-gdb/index.html#gpu-coredump
-
-    An important design decision to note is that any coredump environment
-    variable values set before CUDA initializes will take permanent
-    precedence over any values set with this function. This decision was
-    made to ensure no change in behavior for any users that may be
-    currently using these variables to get coredumps.
-
-    `*value` shall contain the requested value specified by `set`. It is up
-    to the caller to ensure that the data type and size of `*value` matches
-    the request.
-
-    If the caller calls this function with `*value` equal to NULL, the size
-    of the memory region (in bytes) expected for `set` will be placed in
-    `size`.
-
-    The supported attributes are:
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_ON_EXCEPTION`: Bool where
-      :py:obj:`~.true` means that GPU exceptions from this context will
-      create a coredump at the location specified by
-      :py:obj:`~.CU_COREDUMP_FILE`. The default value is :py:obj:`~.false`.
-
-    - :py:obj:`~.CU_COREDUMP_TRIGGER_HOST`: Bool where :py:obj:`~.true`
-      means that the host CPU will also create a coredump. The default
-      value is :py:obj:`~.true` unless set to :py:obj:`~.false` globally or
-      or locally. This value is deprecated as of CUDA 12.5 - raise the
-      :py:obj:`~.CU_COREDUMP_SKIP_ABORT` flag to disable host device
-      abort() if needed.
-
-    - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT`: Bool where :py:obj:`~.true`
-      means that any resulting coredumps will not have a dump of GPU memory
-      or non-reloc ELF images. The default value is :py:obj:`~.false`. This
-      attribute is deprecated as of CUDA 12.5, please use
-      :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS` instead.
-
-    - :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER`: Bool where
-      :py:obj:`~.true` means that a coredump can be created by writing to
-      the system pipe specified by :py:obj:`~.CU_COREDUMP_PIPE`. The
-      default value is :py:obj:`~.false`.
-
-    - :py:obj:`~.CU_COREDUMP_FILE`: String of up to 1023 characters that
-      defines the location where any coredumps generated by this context
-      will be written. The default value is
-      :py:obj:`~.core`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is the
-      host name of the machine running the CUDA applications and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_PIPE`: String of up to 1023 characters that
-      defines the name of the pipe that will be monitored if user-triggered
-      coredumps are enabled. This value may not be changed after
-      :py:obj:`~.CU_COREDUMP_ENABLE_USER_TRIGGER` is set to
-      :py:obj:`~.true`. The default value is
-      :py:obj:`~.corepipe`.cuda.HOSTNAME.PID where :py:obj:`~.HOSTNAME` is
-      the host name of the machine running the CUDA application and
-      :py:obj:`~.PID` is the process ID of the CUDA application.
-
-    - :py:obj:`~.CU_COREDUMP_GENERATION_FLAGS`: An integer with values to
-      allow granular control the data contained in a coredump specified as
-      a bitwise OR combination of the following values:
-
-      - :py:obj:`~.CU_COREDUMP_DEFAULT_FLAGS` - if set by itself, coredump
-        generation returns to its default settings of including all memory
-        regions that it is able to access
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES` - Coredump
-        will not include the data from CUDA source modules that are not
-        relocated at runtime.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_GLOBAL_MEMORY` - Coredump will not
-        include device-side global data that does not belong to any
-        context.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_SHARED_MEMORY` - Coredump will not
-        include grid-scale shared memory for the warp that the dumped
-        kernel belonged to.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_LOCAL_MEMORY` - Coredump will not
-        include local memory from the kernel.
-
-      - :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT_FLAGS` - Enables all of the
-        above options. Equiavlent to setting the
-        :py:obj:`~.CU_COREDUMP_LIGHTWEIGHT` attribute to :py:obj:`~.true`.
-
-      - :py:obj:`~.CU_COREDUMP_SKIP_ABORT` - If set, GPU exceptions will
-        not raise an abort() in the host CPU process. Same functional goal
-        as :py:obj:`~.CU_COREDUMP_TRIGGER_HOST` but better reflects the
-        default behavior.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.CUcoredumpSettings`
-        The enum defining which value to set.
-    value : Any
-        void* containing the requested data.
-    size : int
-        The size of the memory region `value` points to.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_PERMITTED`
-    size : int
-        The size of the memory region `value` points to.
-
-    See Also
-    --------
-    :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`
-    """
-    cdef cydriver.CUcoredumpSettings cyattrib = attrib.value
-    cdef utils.HelperCUcoredumpSettings cyvalue = utils.HelperCUcoredumpSettings(attrib, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    cdef size_t size = cyvalue.size()
-    err = cydriver.cuCoredumpSetAttributeGlobal(cyattrib, cyvalue_ptr, &size)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGetExportTable' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGetExportTable(pExportTableId : Optional[CUuuid]):
-    """ 
-
-    Parameters
-    ----------
-    pExportTableId : :py:obj:`~.CUuuid`
-        None
-
-    Returns
-    -------
-    CUresult
-
-    ppExportTable : Any
-        None
-    """
-    cdef void_ptr ppExportTable = 0
-    cdef cydriver.CUuuid* cypExportTableId_ptr = pExportTableId._ptr if pExportTableId != None else NULL
-    err = cydriver.cuGetExportTable(<const void**>&ppExportTable, cypExportTableId_ptr)
-    return (CUresult(err), ppExportTable)
-{{endif}}
-
-{{if 'cuGreenCtxCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxCreate(desc, dev, unsigned int flags):
-    """ Creates a green context with a specified set of resources.
-
-    This API creates a green context with the resources specified in the
-    descriptor `desc` and returns it in the handle represented by `phCtx`.
-    This API will retain the primary context on device `dev`, which will is
-    released when the green context is destroyed. It is advised to have the
-    primary context active before calling this API to avoid the heavy cost
-    of triggering primary context initialization and deinitialization
-    multiple times.
-
-    The API does not set the green context current. In order to set it
-    current, you need to explicitly set it current by first converting the
-    green context to a CUcontext using :py:obj:`~.cuCtxFromGreenCtx` and
-    subsequently calling :py:obj:`~.cuCtxSetCurrent` /
-    :py:obj:`~.cuCtxPushCurrent`. It should be noted that a green context
-    can be current to only one thread at a time. There is no internal
-    synchronization to make API calls accessing the same green context from
-    multiple threads work.
-
-    Note: The API is not supported on 32-bit platforms.
-
-    The supported flags are:
-
-    - `CU_GREEN_CTX_DEFAULT_STREAM` : Creates a default stream to use
-      inside the green context. Required.
-
-    Parameters
-    ----------
-    desc : :py:obj:`~.CUdevResourceDesc`
-        Descriptor generated via :py:obj:`~.cuDevResourceGenerateDesc`
-        which contains the set of resources to be used
-    dev : :py:obj:`~.CUdevice`
-        Device on which to create the green context.
-    flags : unsigned int
-        One of the supported green context creation flags.
-        `CU_GREEN_CTX_DEFAULT_STREAM` is required.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phCtx : :py:obj:`~.CUgreenCtx`
-        Pointer for the output handle to the green context
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxDestroy`, :py:obj:`~.cuCtxFromGreenCtx`, :py:obj:`~.cuCtxSetCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cuDevicePrimaryCtxRetain`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxCreate_v3`
-    """
-    cdef cydriver.CUdevice cydev
-    if dev is None:
-        cydev = <cydriver.CUdevice>0
-    elif isinstance(dev, (CUdevice,)):
-        pdev = int(dev)
-        cydev = <cydriver.CUdevice>pdev
-    else:
-        pdev = int(CUdevice(dev))
-        cydev = <cydriver.CUdevice>pdev
-    cdef cydriver.CUdevResourceDesc cydesc
-    if desc is None:
-        cydesc = <cydriver.CUdevResourceDesc><void_ptr>0
-    elif isinstance(desc, (CUdevResourceDesc,)):
-        pdesc = int(desc)
-        cydesc = <cydriver.CUdevResourceDesc><void_ptr>pdesc
-    else:
-        pdesc = int(CUdevResourceDesc(desc))
-        cydesc = <cydriver.CUdevResourceDesc><void_ptr>pdesc
-    cdef CUgreenCtx phCtx = CUgreenCtx()
-    err = cydriver.cuGreenCtxCreate(<cydriver.CUgreenCtx*>phCtx._ptr, cydesc, cydev, flags)
-    return (CUresult(err), phCtx)
-{{endif}}
-
-{{if 'cuGreenCtxDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxDestroy(hCtx):
-    """ Destroys a green context.
-
-    Destroys the green context, releasing the primary context of the device
-    that this green context was created for. Any resources provisioned for
-    this green context (that were initially available via the resource
-    descriptor) are released as well.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUgreenCtx`
-        Green context to be destroyed
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_CONTEXT_IS_DESTROYED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxCreate`, :py:obj:`~.cuCtxDestroy`
-    """
-    cdef cydriver.CUgreenCtx cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(hCtx, (CUgreenCtx,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    else:
-        phCtx = int(CUgreenCtx(hCtx))
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    err = cydriver.cuGreenCtxDestroy(cyhCtx)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuCtxFromGreenCtx' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxFromGreenCtx(hCtx):
-    """ Converts a green context into the primary context.
-
-    The API converts a green context into the primary context returned in
-    `pContext`. It is important to note that the converted context
-    `pContext` is a normal primary context but with the resources of the
-    specified green context `hCtx`. Once converted, it can then be used to
-    set the context current with :py:obj:`~.cuCtxSetCurrent` or with any of
-    the CUDA APIs that accept a CUcontext parameter.
-
-    Users are expected to call this API before calling any CUDA APIs that
-    accept a CUcontext. Failing to do so will result in the APIs returning
-    :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUgreenCtx`
-        Green context to convert
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pContext : :py:obj:`~.CUcontext`
-        Returned primary context with green context resources
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxCreate`
-    """
-    cdef cydriver.CUgreenCtx cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(hCtx, (CUgreenCtx,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    else:
-        phCtx = int(CUgreenCtx(hCtx))
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    cdef CUcontext pContext = CUcontext()
-    err = cydriver.cuCtxFromGreenCtx(<cydriver.CUcontext*>pContext._ptr, cyhCtx)
-    return (CUresult(err), pContext)
-{{endif}}
-
-{{if 'cuDeviceGetDevResource' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDeviceGetDevResource(device, typename not None : CUdevResourceType):
-    """ Get device resources.
-
-    Get the `typename` resources available to the `device`. This may often
-    be the starting point for further partitioning or configuring of
-    resources.
-
-    Note: The API is not supported on 32-bit platforms.
-
-    Parameters
-    ----------
-    device : :py:obj:`~.CUdevice`
-        Device to get resource for
-    typename : :py:obj:`~.CUdevResourceType`
-        Type of resource to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_TYPE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`
-    resource : :py:obj:`~.CUdevResource`
-        Output pointer to a :py:obj:`~.CUdevResource` structure
-
-    See Also
-    --------
-    :py:obj:`~.cuDevResourceGenerateDesc`
-    """
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef CUdevResource resource = CUdevResource()
-    cdef cydriver.CUdevResourceType cytypename = typename.value
-    err = cydriver.cuDeviceGetDevResource(cydevice, <cydriver.CUdevResource*>resource._ptr, cytypename)
-    return (CUresult(err), resource)
-{{endif}}
-
-{{if 'cuCtxGetDevResource' in found_functions}}
-
-@cython.embedsignature(True)
-def cuCtxGetDevResource(hCtx, typename not None : CUdevResourceType):
-    """ Get context resources.
-
-    Get the `typename` resources available to the context represented by
-    `hCtx`  Note: The API is not supported on 32-bit platforms.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUcontext`
-        Context to get resource for
-    typename : :py:obj:`~.CUdevResourceType`
-        Type of resource to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_TYPE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-    resource : :py:obj:`~.CUdevResource`
-        Output pointer to a :py:obj:`~.CUdevResource` structure
-
-    See Also
-    --------
-    :py:obj:`~.cuDevResourceGenerateDesc`
-    """
-    cdef cydriver.CUcontext cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUcontext><void_ptr>0
-    elif isinstance(hCtx, (CUcontext,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    else:
-        phCtx = int(CUcontext(hCtx))
-        cyhCtx = <cydriver.CUcontext><void_ptr>phCtx
-    cdef CUdevResource resource = CUdevResource()
-    cdef cydriver.CUdevResourceType cytypename = typename.value
-    err = cydriver.cuCtxGetDevResource(cyhCtx, <cydriver.CUdevResource*>resource._ptr, cytypename)
-    return (CUresult(err), resource)
-{{endif}}
-
-{{if 'cuGreenCtxGetDevResource' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxGetDevResource(hCtx, typename not None : CUdevResourceType):
-    """ Get green context resources.
-
-    Get the `typename` resources available to the green context represented
-    by `hCtx`
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUgreenCtx`
-        Green context to get resource for
-    typename : :py:obj:`~.CUdevResourceType`
-        Type of resource to retrieve
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_TYPE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    resource : :py:obj:`~.CUdevResource`
-        Output pointer to a :py:obj:`~.CUdevResource` structure
-
-    See Also
-    --------
-    :py:obj:`~.cuDevResourceGenerateDesc`
-    """
-    cdef cydriver.CUgreenCtx cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(hCtx, (CUgreenCtx,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    else:
-        phCtx = int(CUgreenCtx(hCtx))
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    cdef CUdevResource resource = CUdevResource()
-    cdef cydriver.CUdevResourceType cytypename = typename.value
-    err = cydriver.cuGreenCtxGetDevResource(cyhCtx, <cydriver.CUdevResource*>resource._ptr, cytypename)
-    return (CUresult(err), resource)
-{{endif}}
-
-{{if 'cuDevSmResourceSplitByCount' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevResource], unsigned int useFlags, unsigned int minCount):
-    """ Splits `CU_DEV_RESOURCE_TYPE_SM` resources.
-
-    Splits `CU_DEV_RESOURCE_TYPE_SM` resources into `nbGroups`, adhering to
-    the minimum SM count specified in `minCount` and the usage flags in
-    `useFlags`. If `result` is NULL, the API simulates a split and provides
-    the amount of groups that would be created in `nbGroups`. Otherwise,
-    `nbGroups` must point to the amount of elements in `result` and on
-    return, the API will overwrite `nbGroups` with the amount actually
-    created. The groups are written to the array in `result`. `nbGroups`
-    can be less than the total amount if a smaller number of groups is
-    needed.
-
-    This API is used to spatially partition the input resource. The input
-    resource needs to come from one of :py:obj:`~.cuDeviceGetDevResource`,
-    :py:obj:`~.cuCtxGetDevResource`, or
-    :py:obj:`~.cuGreenCtxGetDevResource`. A limitation of the API is that
-    the output results cannot be split again without first creating a
-    descriptor and a green context with that descriptor.
-
-    When creating the groups, the API will take into account the
-    performance and functional characteristics of the input resource, and
-    guarantee a split that will create a disjoint set of symmetrical
-    partitions. This may lead to fewer groups created than purely dividing
-    the total SM count by the `minCount` due to cluster requirements or
-    alignment and granularity requirements for the minCount.
-
-    The `remainder` set does not have the same functional or performance
-    guarantees as the groups in `result`. Its use should be carefully
-    planned and future partitions of the `remainder` set are discouraged.
-
-    The following flags are supported:
-
-    - `CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING` : Lower the minimum
-      SM count and alignment, and treat each SM independent of its
-      hierarchy. This allows more fine grained partitions but at the cost
-      of advanced features (such as large clusters on compute capability
-      9.0+).
-
-    - `CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE` : Compute
-      Capability 9.0+ only. Attempt to create groups that may allow for
-      maximally sized thread clusters. This can be queried post green
-      context creation using
-      :py:obj:`~.cuOccupancyMaxPotentialClusterSize`.
-
-    A successful API call must either have:
-
-    - A valid array of `result` pointers of size passed in `nbGroups`, with
-      `input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of `minCount` must
-      be between 0 and the SM count specified in `input`. `remaining` may
-      be NULL.
-
-    - NULL passed in for `result`, with a valid integer pointer in
-      `nbGroups` and `input` of type `CU_DEV_RESOURCE_TYPE_SM`. Value of
-      `minCount` must be between 0 and the SM count specified in `input`.
-      `remaining` may be NULL. This queries the number of groups that would
-      be created by the API.
-
-    Note: The API is not supported on 32-bit platforms.
-
-    Parameters
-    ----------
-    nbGroups : unsigned int
-        This is a pointer, specifying the number of groups that would be or
-        should be created as described below.
-    input : :py:obj:`~.CUdevResource`
-        Input SM resource to be split. Must be a valid
-        `CU_DEV_RESOURCE_TYPE_SM` resource.
-    useFlags : unsigned int
-        Flags specifying how these partitions are used or which constraints
-        to abide by when splitting the input. Zero is valid for default
-        behavior.
-    minCount : unsigned int
-        Minimum number of SMs required
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_DEVICE`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_TYPE`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION`
-    result : List[:py:obj:`~.CUdevResource`]
-        Output array of `None` resources. Can be NULL to query the number
-        of groups.
-    nbGroups : unsigned int
-        This is a pointer, specifying the number of groups that would be or
-        should be created as described below.
-    remaining : :py:obj:`~.CUdevResource`
-        If the input resource cannot be cleanly split among `nbGroups`, the
-        remaining is placed in here. Can be ommitted (NULL) if the user
-        does not need the remaining set.
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxGetDevResource`, :py:obj:`~.cuCtxGetDevResource`, :py:obj:`~.cuDeviceGetDevResource`
-    """
-    cdef cydriver.CUdevResource* cyresult = NULL
-    pyresult = [CUdevResource() for idx in range(nbGroups)]
-    if nbGroups != 0:
-        cyresult = <cydriver.CUdevResource*>calloc(nbGroups, sizeof(cydriver.CUdevResource))
-        if cyresult is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cydriver.CUdevResource)))
-    cdef unsigned int cynbGroups = nbGroups
-    cdef cydriver.CUdevResource* cyinput__ptr = input_._ptr if input_ != None else NULL
-    cdef CUdevResource remaining = CUdevResource()
-    err = cydriver.cuDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, <cydriver.CUdevResource*>remaining._ptr, useFlags, minCount)
-    if CUresult(err) == CUresult(0):
-        for idx in range(nbGroups):
-            string.memcpy((<CUdevResource>pyresult[idx])._ptr, &cyresult[idx], sizeof(cydriver.CUdevResource))
-    if cyresult is not NULL:
-        free(cyresult)
-    return (CUresult(err), pyresult, cynbGroups, remaining)
-{{endif}}
-
-{{if 'cuDevResourceGenerateDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cuDevResourceGenerateDesc(resources : Optional[Tuple[CUdevResource] | List[CUdevResource]], unsigned int nbResources):
-    """ Generate a resource descriptor.
-
-    Generates a single resource descriptor with the set of resources
-    specified in `resources`. The generated resource descriptor is
-    necessary for the creation of green contexts via the
-    :py:obj:`~.cuGreenCtxCreate` API. Resources of the same type can be
-    passed in, provided they meet the requirements as noted below.
-
-    A successful API call must have:
-
-    - A valid output pointer for the `phDesc` descriptor as well as a valid
-      array of `resources` pointers, with the array size passed in
-      `nbResources`. If multiple resources are provided in `resources`, the
-      device they came from must be the same, otherwise
-      CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned. If multiple
-      resources are provided in `resources` and they are of type
-      :py:obj:`~.CU_DEV_RESOURCE_TYPE_SM`, they must be outputs (whether
-      `result` or `remaining`) from the same split API instance, otherwise
-      CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION is returned.
-
-    Note: The API is not supported on 32-bit platforms.
-
-    Parameters
-    ----------
-    resources : List[:py:obj:`~.CUdevResource`]
-        Array of resources to be included in the descriptor
-    nbResources : unsigned int
-        Number of resources passed in `resources`
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_TYPE`, :py:obj:`~.CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION`
-    phDesc : :py:obj:`~.CUdevResourceDesc`
-        Output descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cuDevSmResourceSplitByCount`
-    """
-    resources = [] if resources is None else resources
-    if not all(isinstance(_x, (CUdevResource,)) for _x in resources):
-        raise TypeError("Argument 'resources' is not instance of type (expected Tuple[cydriver.CUdevResource,] or List[cydriver.CUdevResource,]")
-    cdef CUdevResourceDesc phDesc = CUdevResourceDesc()
-    cdef cydriver.CUdevResource* cyresources = NULL
-    if len(resources) > 0:
-        cyresources = <cydriver.CUdevResource*> calloc(len(resources), sizeof(cydriver.CUdevResource))
-        if cyresources is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cydriver.CUdevResource)))
-        for idx in range(len(resources)):
-            string.memcpy(&cyresources[idx], (<CUdevResource>resources[idx])._ptr, sizeof(cydriver.CUdevResource))
-    if nbResources > len(resources): raise RuntimeError("List is too small: " + str(len(resources)) + " < " + str(nbResources))
-    err = cydriver.cuDevResourceGenerateDesc(<cydriver.CUdevResourceDesc*>phDesc._ptr, (<CUdevResource>resources[0])._ptr if len(resources) == 1 else cyresources, nbResources)
-    if cyresources is not NULL:
-        free(cyresources)
-    return (CUresult(err), phDesc)
-{{endif}}
-
-{{if 'cuGreenCtxRecordEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxRecordEvent(hCtx, hEvent):
-    """ Records an event.
-
-    Captures in `hEvent` all the activities of the green context of `hCtx`
-    at the time of this call. `hEvent` and `hCtx` must be from the same
-    primary context otherwise :py:obj:`~.CUDA_ERROR_INVALID_HANDLE` is
-    returned. Calls such as :py:obj:`~.cuEventQuery()` or
-    :py:obj:`~.cuGreenCtxWaitEvent()` will then examine or wait for
-    completion of the work that was captured. Uses of `hCtx` after this
-    call do not modify `hEvent`.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUgreenCtx`
-        Green context to record event for
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxWaitEvent`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuCtxWaitEvent`
-
-    Notes
-    -----
-    The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified green context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures.
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    cdef cydriver.CUgreenCtx cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(hCtx, (CUgreenCtx,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    else:
-        phCtx = int(CUgreenCtx(hCtx))
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    err = cydriver.cuGreenCtxRecordEvent(cyhCtx, cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuGreenCtxWaitEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxWaitEvent(hCtx, hEvent):
-    """ Make a green context wait on an event.
-
-    Makes all future work submitted to green context `hCtx` wait for all
-    work captured in `hEvent`. The synchronization will be performed on the
-    device and will not block the calling CPU thread. See
-    :py:obj:`~.cuGreenCtxRecordEvent()` or :py:obj:`~.cuEventRecord()`, for
-    details on what is captured by an event.
-
-    Parameters
-    ----------
-    hCtx : :py:obj:`~.CUgreenCtx`
-        Green context to wait
-    hEvent : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait on
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED`
-
-    See Also
-    --------
-    :py:obj:`~.cuGreenCtxRecordEvent`, :py:obj:`~.cuStreamWaitEvent` :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuCtxWaitEvent`
-
-    Notes
-    -----
-    `hEvent` may be from a different context or device than `hCtx`.
-
-    The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified green context `hCtx` has a stream in the capture mode.
-    """
-    cdef cydriver.CUevent cyhEvent
-    if hEvent is None:
-        cyhEvent = <cydriver.CUevent><void_ptr>0
-    elif isinstance(hEvent, (CUevent,)):
-        phEvent = int(hEvent)
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    else:
-        phEvent = int(CUevent(hEvent))
-        cyhEvent = <cydriver.CUevent><void_ptr>phEvent
-    cdef cydriver.CUgreenCtx cyhCtx
-    if hCtx is None:
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(hCtx, (CUgreenCtx,)):
-        phCtx = int(hCtx)
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    else:
-        phCtx = int(CUgreenCtx(hCtx))
-        cyhCtx = <cydriver.CUgreenCtx><void_ptr>phCtx
-    err = cydriver.cuGreenCtxWaitEvent(cyhCtx, cyhEvent)
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuStreamGetGreenCtx' in found_functions}}
-
-@cython.embedsignature(True)
-def cuStreamGetGreenCtx(hStream):
-    """ Query the green context associated with a stream.
-
-    Returns the CUDA green context that the stream is associated with, or
-    NULL if the stream is not associated with any green context.
-
-    The stream handle `hStream` can refer to any of the following:
-
-    - a stream created via any of the CUDA driver APIs such as
-      :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`
-      and :py:obj:`~.cuGreenCtxStreamCreate`, or their runtime API
-      equivalents such as :py:obj:`~.cudaStreamCreate`,
-      :py:obj:`~.cudaStreamCreateWithFlags` and
-      :py:obj:`~.cudaStreamCreateWithPriority`. If during stream creation
-      the context that was active in the calling thread was obtained with
-      cuCtxFromGreenCtx, that green context is returned in `phCtx`.
-      Otherwise, `*phCtx` is set to NULL instead.
-
-    - special stream such as the NULL stream or
-      :py:obj:`~.CU_STREAM_LEGACY`. In that case if context that is active
-      in the calling thread was obtained with cuCtxFromGreenCtx, that green
-      context is returned. Otherwise, `*phCtx` is set to NULL instead.
-
-    Passing an invalid handle will result in undefined behavior.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`,
-    phCtx : :py:obj:`~.CUgreenCtx`
-        Returned green context associated with the stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetCtx_v2`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`
-    """
-    cdef cydriver.CUstream cyhStream
-    if hStream is None:
-        cyhStream = <cydriver.CUstream><void_ptr>0
-    elif isinstance(hStream, (CUstream,)):
-        phStream = int(hStream)
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    else:
-        phStream = int(CUstream(hStream))
-        cyhStream = <cydriver.CUstream><void_ptr>phStream
-    cdef CUgreenCtx phCtx = CUgreenCtx()
-    err = cydriver.cuStreamGetGreenCtx(cyhStream, <cydriver.CUgreenCtx*>phCtx._ptr)
-    return (CUresult(err), phCtx)
-{{endif}}
-
-{{if 'cuGreenCtxStreamCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cuGreenCtxStreamCreate(greenCtx, unsigned int flags, int priority):
-    """ Create a stream for use in the green context.
-
-    Creates a stream for use in the specified green context `greenCtx` and
-    returns a handle in `phStream`. The stream can be destroyed by calling
-    :py:obj:`~.cuStreamDestroy()`. Note that the API ignores the context
-    that is current to the calling thread and creates a stream in the
-    specified green context `greenCtx`.
-
-    The supported values for `flags` are:
-
-    - :py:obj:`~.CU_STREAM_NON_BLOCKING`: This must be specified. It
-      indicates that work running in the created stream may run
-      concurrently with work in the default stream, and that the created
-      stream should perform no implicit synchronization with the default
-      stream.
-
-    Specifying `priority` affects the scheduling priority of work in the
-    stream. Priorities provide a hint to preferentially run work with
-    higher priority when possible, but do not preempt already-running work
-    or provide any other functional guarantee on execution order.
-    `priority` follows a convention where lower numbers represent higher
-    priorities. '0' represents default priority. The range of meaningful
-    numerical priorities can be queried using
-    :py:obj:`~.cuCtxGetStreamPriorityRange`. If the specified priority is
-    outside the numerical range returned by
-    :py:obj:`~.cuCtxGetStreamPriorityRange`, it will automatically be
-    clamped to the lowest or the highest number in the range.
-
-    Parameters
-    ----------
-    greenCtx : :py:obj:`~.CUgreenCtx`
-        Green context for which to create the stream for
-    flags : unsigned int
-        Flags for stream creation. `CU_STREAM_NON_BLOCKING` must be
-        specified.
-    priority : int
-        Stream priority. Lower numbers represent higher priorities. See
-        :py:obj:`~.cuCtxGetStreamPriorityRange` for more information about
-        meaningful stream priorities that can be passed.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phStream : :py:obj:`~.CUstream`
-        Returned newly created stream
-
-    See Also
-    --------
-    :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuGreenCtxCreate` :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreateWithPriority`
-
-    Notes
-    -----
-    In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.
-    """
-    cdef cydriver.CUgreenCtx cygreenCtx
-    if greenCtx is None:
-        cygreenCtx = <cydriver.CUgreenCtx><void_ptr>0
-    elif isinstance(greenCtx, (CUgreenCtx,)):
-        pgreenCtx = int(greenCtx)
-        cygreenCtx = <cydriver.CUgreenCtx><void_ptr>pgreenCtx
-    else:
-        pgreenCtx = int(CUgreenCtx(greenCtx))
-        cygreenCtx = <cydriver.CUgreenCtx><void_ptr>pgreenCtx
-    cdef CUstream phStream = CUstream()
-    err = cydriver.cuGreenCtxStreamCreate(<cydriver.CUstream*>phStream._ptr, cygreenCtx, flags, priority)
-    return (CUresult(err), phStream)
-{{endif}}
-
-{{if 'cuProfilerStart' in found_functions}}
-
-@cython.embedsignature(True)
-def cuProfilerStart():
-    """ Enable profiling.
-
-    Enables profile collection by the active profiling tool for the current
-    context. If profiling is already enabled, then
-    :py:obj:`~.cuProfilerStart()` has no effect.
-
-    cuProfilerStart and cuProfilerStop APIs are used to programmatically
-    control the profiling granularity by allowing profiling to be done only
-    on selective pieces of code.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuProfilerInitialize`, :py:obj:`~.cuProfilerStop`, :py:obj:`~.cudaProfilerStart`
-    """
-    err = cydriver.cuProfilerStart()
-    return (CUresult(err),)
-{{endif}}
-
-{{if 'cuProfilerStop' in found_functions}}
-
-@cython.embedsignature(True)
-def cuProfilerStop():
-    """ Disable profiling.
-
-    Disables profile collection by the active profiling tool for the
-    current context. If profiling is already disabled, then
-    :py:obj:`~.cuProfilerStop()` has no effect.
-
-    cuProfilerStart and cuProfilerStop APIs are used to programmatically
-    control the profiling granularity by allowing profiling to be done only
-    on selective pieces of code.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`
-
-    See Also
-    --------
-    :py:obj:`~.cuProfilerInitialize`, :py:obj:`~.cuProfilerStart`, :py:obj:`~.cudaProfilerStop`
-    """
-    err = cydriver.cuProfilerStop()
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsEGLRegisterImage(image, unsigned int flags):
-    """ Registers an EGL image.
-
-    Registers the EGLImageKHR specified by `image` for access by CUDA. A
-    handle to the registered object is returned as `pCudaResource`.
-    Additional Mapping/Unmapping is not required for the registered
-    resource and :py:obj:`~.cuGraphicsResourceGetMappedEglFrame` can be
-    directly called on the `pCudaResource`.
-
-    The application will be responsible for synchronizing access to shared
-    objects. The application must ensure that any pending operation which
-    access the objects have completed before passing control to CUDA. This
-    may be accomplished by issuing and waiting for glFinish command on all
-    GLcontexts (for OpenGL and likewise for other APIs). The application
-    will be also responsible for ensuring that any pending operation on the
-    registered CUDA resource has completed prior to executing subsequent
-    commands in other APIs accesing the same memory objects. This can be
-    accomplished by calling cuCtxSynchronize or cuEventSynchronize
-    (preferably).
-
-    The surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`: Specifies no hints
-      about how this resource will be used. It is therefore assumed that
-      this resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY`: Specifies that
-      CUDA will not write to this resource.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD`: Specifies
-      that CUDA will not read from this resource and will write over the
-      entire contents of the resource, so none of the data previously
-      stored in the resource will be preserved.
-
-    The EGLImageKHR is an object which can be used to create EGLImage
-    target resource. It is defined as a void pointer. typedef void*
-    EGLImageKHR
-
-    Parameters
-    ----------
-    image : :py:obj:`~.EGLImageKHR`
-        An EGLImageKHR image which can be used to create target resource.
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsEGLRegisterImage`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cudaGraphicsEGLRegisterImage`
-    """
-    cdef cydriver.EGLImageKHR cyimage
-    if image is None:
-        cyimage = <cydriver.EGLImageKHR><void_ptr>0
-    elif isinstance(image, (EGLImageKHR,)):
-        pimage = int(image)
-        cyimage = <cydriver.EGLImageKHR><void_ptr>pimage
-    else:
-        pimage = int(EGLImageKHR(image))
-        cyimage = <cydriver.EGLImageKHR><void_ptr>pimage
-    cdef CUgraphicsResource pCudaResource = CUgraphicsResource()
-    err = cydriver.cuGraphicsEGLRegisterImage(<cydriver.CUgraphicsResource*>pCudaResource._ptr, cyimage, flags)
-    return (CUresult(err), pCudaResource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamConsumerConnect(stream):
-    """ Connect CUDA to EGLStream as a consumer.
-
-    Connect CUDA as a consumer to EGLStreamKHR specified by `stream`.
-
-    The EGLStreamKHR is an EGL object that transfers a sequence of image
-    frames from one API to another.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerConnect`
-    """
-    cdef cydriver.EGLStreamKHR cystream
-    if stream is None:
-        cystream = <cydriver.EGLStreamKHR><void_ptr>0
-    elif isinstance(stream, (EGLStreamKHR,)):
-        pstream = int(stream)
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    else:
-        pstream = int(EGLStreamKHR(stream))
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    cdef CUeglStreamConnection conn = CUeglStreamConnection()
-    err = cydriver.cuEGLStreamConsumerConnect(<cydriver.CUeglStreamConnection*>conn._ptr, cystream)
-    return (CUresult(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamConsumerConnectWithFlags(stream, unsigned int flags):
-    """ Connect CUDA to EGLStream as a consumer with given flags.
-
-    Connect CUDA as a consumer to EGLStreamKHR specified by `stream` with
-    specified `flags` defined by CUeglResourceLocationFlags.
-
-    The flags specify whether the consumer wants to access frames from
-    system memory or video memory. Default is
-    :py:obj:`~.CU_EGL_RESOURCE_LOCATION_VIDMEM`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-    flags : unsigned int
-        Flags denote intended location - system or video.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerConnectWithFlags`
-    """
-    cdef cydriver.EGLStreamKHR cystream
-    if stream is None:
-        cystream = <cydriver.EGLStreamKHR><void_ptr>0
-    elif isinstance(stream, (EGLStreamKHR,)):
-        pstream = int(stream)
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    else:
-        pstream = int(EGLStreamKHR(stream))
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    cdef CUeglStreamConnection conn = CUeglStreamConnection()
-    err = cydriver.cuEGLStreamConsumerConnectWithFlags(<cydriver.CUeglStreamConnection*>conn._ptr, cystream, flags)
-    return (CUresult(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamConsumerDisconnect(conn):
-    """ Disconnect CUDA as a consumer to EGLStream .
-
-    Disconnect CUDA as a consumer to EGLStreamKHR.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Conection to disconnect.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`
-    """
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    err = cydriver.cuEGLStreamConsumerDisconnect(cyconn)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int timeout):
-    """ Acquire an image frame from the EGLStream with CUDA as a consumer.
-
-    Acquire an image frame from EGLStreamKHR. This API can also acquire an
-    old frame presented by the producer unless explicitly disabled by
-    setting EGL_SUPPORT_REUSE_NV flag to EGL_FALSE during stream
-    initialization. By default, EGLStream is created with this flag set to
-    EGL_TRUE. :py:obj:`~.cuGraphicsResourceGetMappedEglFrame` can be called
-    on `pCudaResource` to get :py:obj:`~.CUeglFrame`.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Connection on which to acquire
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        CUDA resource on which the stream frame will be mapped for use.
-    pStream : :py:obj:`~.CUstream`
-        CUDA stream for synchronization and any data migrations implied by
-        :py:obj:`~.CUeglResourceLocationFlags`.
-    timeout : unsigned int
-        Desired timeout in usec for a new frame to be acquired. If set as
-        :py:obj:`~.CUDA_EGL_INFINITE_TIMEOUT`, acquire waits infinitely.
-        After timeout occurs CUDA consumer tries to acquire an old frame if
-        available and EGL_SUPPORT_REUSE_NV flag is set.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`,
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`
-    """
-    cdef cydriver.CUstream *cypStream
-    if pStream is None:
-        cypStream = <cydriver.CUstream*><void_ptr>NULL
-    elif isinstance(pStream, (CUstream,)):
-        ppStream = pStream.getPtr()
-        cypStream = <cydriver.CUstream*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cydriver.CUstream*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, driver.CUstream'>, found " + str(type(pStream)))
-    cdef cydriver.CUgraphicsResource *cypCudaResource
-    if pCudaResource is None:
-        cypCudaResource = <cydriver.CUgraphicsResource*><void_ptr>NULL
-    elif isinstance(pCudaResource, (CUgraphicsResource,)):
-        ppCudaResource = pCudaResource.getPtr()
-        cypCudaResource = <cydriver.CUgraphicsResource*><void_ptr>ppCudaResource
-    elif isinstance(pCudaResource, (int)):
-        cypCudaResource = <cydriver.CUgraphicsResource*><void_ptr>pCudaResource
-    else:
-        raise TypeError("Argument 'pCudaResource' is not instance of type (expected <class 'int, driver.CUgraphicsResource'>, found " + str(type(pCudaResource)))
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    err = cydriver.cuEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream):
-    """ Releases the last frame acquired from the EGLStream.
-
-    Release the acquired image frame specified by `pCudaResource` to
-    EGLStreamKHR. If EGL_SUPPORT_REUSE_NV flag is set to EGL_TRUE, at the
-    time of EGL creation this API doesn't release the last frame acquired
-    on the EGLStream. By default, EGLStream is created with this flag set
-    to EGL_TRUE.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Connection on which to release
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        CUDA resource whose corresponding frame is to be released
-    pStream : :py:obj:`~.CUstream`
-        CUDA stream on which release will be done.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`
-    """
-    cdef cydriver.CUstream *cypStream
-    if pStream is None:
-        cypStream = <cydriver.CUstream*><void_ptr>NULL
-    elif isinstance(pStream, (CUstream,)):
-        ppStream = pStream.getPtr()
-        cypStream = <cydriver.CUstream*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cydriver.CUstream*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, driver.CUstream'>, found " + str(type(pStream)))
-    cdef cydriver.CUgraphicsResource cypCudaResource
-    if pCudaResource is None:
-        cypCudaResource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(pCudaResource, (CUgraphicsResource,)):
-        ppCudaResource = int(pCudaResource)
-        cypCudaResource = <cydriver.CUgraphicsResource><void_ptr>ppCudaResource
-    else:
-        ppCudaResource = int(CUgraphicsResource(pCudaResource))
-        cypCudaResource = <cydriver.CUgraphicsResource><void_ptr>ppCudaResource
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    err = cydriver.cuEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamProducerConnect(stream, width, height):
-    """ Connect CUDA to EGLStream as a producer.
-
-    Connect CUDA as a producer to EGLStreamKHR specified by `stream`.
-
-    The EGLStreamKHR is an EGL object that transfers a sequence of image
-    frames from one API to another.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-    width : :py:obj:`~.EGLint`
-        width of the image to be submitted to the stream
-    height : :py:obj:`~.EGLint`
-        height of the image to be submitted to the stream
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerConnect`
-    """
-    cdef cydriver.EGLint cyheight
-    if height is None:
-        cyheight = <cydriver.EGLint><void_ptr>0
-    elif isinstance(height, (EGLint,)):
-        pheight = int(height)
-        cyheight = <cydriver.EGLint><void_ptr>pheight
-    else:
-        pheight = int(EGLint(height))
-        cyheight = <cydriver.EGLint><void_ptr>pheight
-    cdef cydriver.EGLint cywidth
-    if width is None:
-        cywidth = <cydriver.EGLint><void_ptr>0
-    elif isinstance(width, (EGLint,)):
-        pwidth = int(width)
-        cywidth = <cydriver.EGLint><void_ptr>pwidth
-    else:
-        pwidth = int(EGLint(width))
-        cywidth = <cydriver.EGLint><void_ptr>pwidth
-    cdef cydriver.EGLStreamKHR cystream
-    if stream is None:
-        cystream = <cydriver.EGLStreamKHR><void_ptr>0
-    elif isinstance(stream, (EGLStreamKHR,)):
-        pstream = int(stream)
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    else:
-        pstream = int(EGLStreamKHR(stream))
-        cystream = <cydriver.EGLStreamKHR><void_ptr>pstream
-    cdef CUeglStreamConnection conn = CUeglStreamConnection()
-    err = cydriver.cuEGLStreamProducerConnect(<cydriver.CUeglStreamConnection*>conn._ptr, cystream, cywidth, cyheight)
-    return (CUresult(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamProducerDisconnect(conn):
-    """ Disconnect CUDA as a producer to EGLStream .
-
-    Disconnect CUDA as a producer to EGLStreamKHR.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Conection to disconnect.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerDisconnect`
-    """
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    err = cydriver.cuEGLStreamProducerDisconnect(cyconn)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamProducerPresentFrame(conn, eglframe not None : CUeglFrame, pStream):
-    """ Present a CUDA eglFrame to the EGLStream with CUDA as a producer.
-
-    When a frame is presented by the producer, it gets associated with the
-    EGLStream and thus it is illegal to free the frame before the producer
-    is disconnected. If a frame is freed and reused it may lead to
-    undefined behavior.
-
-    If producer and consumer are on different GPUs (iGPU and dGPU) then
-    frametype :py:obj:`~.CU_EGL_FRAME_TYPE_ARRAY` is not supported.
-    :py:obj:`~.CU_EGL_FRAME_TYPE_PITCH` can be used for such cross-device
-    applications.
-
-    The :py:obj:`~.CUeglFrame` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For :py:obj:`~.CUeglFrame` of type :py:obj:`~.CU_EGL_FRAME_TYPE_PITCH`,
-    the application may present sub-region of a memory allocation. In that
-    case, the pitched pointer will specify the start address of the sub-
-    region in the allocation and corresponding :py:obj:`~.CUeglFrame`
-    fields will specify the dimensions of the sub-region.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Connection on which to present the CUDA array
-    eglframe : :py:obj:`~.CUeglFrame`
-        CUDA Eglstream Proucer Frame handle to be sent to the consumer over
-        EglStream.
-    pStream : :py:obj:`~.CUstream`
-        CUDA stream on which to present the frame.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`,
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerReturnFrame`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`
-    """
-    cdef cydriver.CUstream *cypStream
-    if pStream is None:
-        cypStream = <cydriver.CUstream*><void_ptr>NULL
-    elif isinstance(pStream, (CUstream,)):
-        ppStream = pStream.getPtr()
-        cypStream = <cydriver.CUstream*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cydriver.CUstream*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, driver.CUstream'>, found " + str(type(pStream)))
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    err = cydriver.cuEGLStreamProducerPresentFrame(cyconn, eglframe._ptr[0], cypStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEGLStreamProducerReturnFrame(conn, eglframe : Optional[CUeglFrame], pStream):
-    """ Return the CUDA eglFrame to the EGLStream released by the consumer.
-
-    This API can potentially return CUDA_ERROR_LAUNCH_TIMEOUT if the
-    consumer has not returned a frame to EGL stream. If timeout is returned
-    the application can retry.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.CUeglStreamConnection`
-        Connection on which to return
-    eglframe : :py:obj:`~.CUeglFrame`
-        CUDA Eglstream Proucer Frame handle returned from the consumer over
-        EglStream.
-    pStream : :py:obj:`~.CUstream`
-        CUDA stream on which to return the frame.
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_LAUNCH_TIMEOUT`
-
-    See Also
-    --------
-    :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`
-    """
-    cdef cydriver.CUstream *cypStream
-    if pStream is None:
-        cypStream = <cydriver.CUstream*><void_ptr>NULL
-    elif isinstance(pStream, (CUstream,)):
-        ppStream = pStream.getPtr()
-        cypStream = <cydriver.CUstream*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cydriver.CUstream*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, driver.CUstream'>, found " + str(type(pStream)))
-    cdef cydriver.CUeglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (CUeglStreamConnection,)):
-        pconn = conn.getPtr()
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cydriver.CUeglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, driver.CUeglStreamConnection'>, found " + str(type(conn)))
-    cdef cydriver.CUeglFrame* cyeglframe_ptr = eglframe._ptr if eglframe != None else NULL
-    err = cydriver.cuEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream)
-    return (CUresult(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned int mipLevel):
-    """ Get an eglFrame through which to access a registered EGL graphics resource.
-
-    Returns in `*eglFrame` an eglFrame pointer through which the registered
-    graphics resource `resource` may be accessed. This API can only be
-    called for registered EGL graphics resources.
-
-    The :py:obj:`~.CUeglFrame` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If `resource` is not registered then :py:obj:`~.CUDA_ERROR_NOT_MAPPED`
-    is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.CUgraphicsResource`
-        None
-    index : unsigned int
-        None
-    mipLevel : unsigned int
-        None
-
-    Returns
-    -------
-    CUresult
-
-    eglFrame : :py:obj:`~.CUeglFrame`
-        None
-    """
-    cdef cydriver.CUgraphicsResource cyresource
-    if resource is None:
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>0
-    elif isinstance(resource, (CUgraphicsResource,)):
-        presource = int(resource)
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    else:
-        presource = int(CUgraphicsResource(resource))
-        cyresource = <cydriver.CUgraphicsResource><void_ptr>presource
-    cdef CUeglFrame eglFrame = CUeglFrame()
-    err = cydriver.cuGraphicsResourceGetMappedEglFrame(<cydriver.CUeglFrame*>eglFrame._ptr, cyresource, index, mipLevel)
-    return (CUresult(err), eglFrame)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuEventCreateFromEGLSync(eglSync, unsigned int flags):
-    """ Creates an event from EGLSync object.
-
-    Creates an event *phEvent from an EGLSyncKHR eglSync with the flags
-    specified via `flags`. Valid flags include:
-
-    - :py:obj:`~.CU_EVENT_DEFAULT`: Default event creation flag.
-
-    - :py:obj:`~.CU_EVENT_BLOCKING_SYNC`: Specifies that the created event
-      should use blocking synchronization. A CPU thread that uses
-      :py:obj:`~.cuEventSynchronize()` to wait on an event created with
-      this flag will block until the event has actually been completed.
-
-    Once the `eglSync` gets destroyed, :py:obj:`~.cuEventDestroy` is the
-    only API that can be invoked on the event.
-
-    :py:obj:`~.cuEventRecord` and TimingData are not supported for events
-    created from EGLSync.
-
-    The EGLSyncKHR is an opaque handle to an EGL sync object. typedef void*
-    EGLSyncKHR
-
-    Parameters
-    ----------
-    eglSync : :py:obj:`~.EGLSyncKHR`
-        Opaque handle to EGLSync object
-    flags : unsigned int
-        Event creation flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    phEvent : :py:obj:`~.CUevent`
-        Returns newly created event
-
-    See Also
-    --------
-    :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`
-    """
-    cdef cydriver.EGLSyncKHR cyeglSync
-    if eglSync is None:
-        cyeglSync = <cydriver.EGLSyncKHR><void_ptr>0
-    elif isinstance(eglSync, (EGLSyncKHR,)):
-        peglSync = int(eglSync)
-        cyeglSync = <cydriver.EGLSyncKHR><void_ptr>peglSync
-    else:
-        peglSync = int(EGLSyncKHR(eglSync))
-        cyeglSync = <cydriver.EGLSyncKHR><void_ptr>peglSync
-    cdef CUevent phEvent = CUevent()
-    err = cydriver.cuEventCreateFromEGLSync(<cydriver.CUevent*>phEvent._ptr, cyeglSync, flags)
-    return (CUresult(err), phEvent)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsGLRegisterBuffer(buffer, unsigned int Flags):
-    """ Registers an OpenGL buffer object.
-
-    Registers the buffer object specified by `buffer` for access by CUDA. A
-    handle to the registered object is returned as `pCudaResource`. The
-    register flags `Flags` specify the intended usage, as follows:
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_NONE`: Specifies no hints about
-      how this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY`: Specifies that CUDA
-      will not write to this resource.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD`: Specifies that
-      CUDA will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    Parameters
-    ----------
-    buffer : :py:obj:`~.GLuint`
-        name of buffer object to be registered
-    Flags : unsigned int
-        Register flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_OPERATING_SYSTEM`
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsGLRegisterBuffer`
-    """
-    cdef cydriver.GLuint cybuffer
-    if buffer is None:
-        cybuffer = <cydriver.GLuint><void_ptr>0
-    elif isinstance(buffer, (GLuint,)):
-        pbuffer = int(buffer)
-        cybuffer = <cydriver.GLuint><void_ptr>pbuffer
-    else:
-        pbuffer = int(GLuint(buffer))
-        cybuffer = <cydriver.GLuint><void_ptr>pbuffer
-    cdef CUgraphicsResource pCudaResource = CUgraphicsResource()
-    err = cydriver.cuGraphicsGLRegisterBuffer(<cydriver.CUgraphicsResource*>pCudaResource._ptr, cybuffer, Flags)
-    return (CUresult(err), pCudaResource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsGLRegisterImage(image, target, unsigned int Flags):
-    """ Register an OpenGL texture or renderbuffer object.
-
-    Registers the texture or renderbuffer object specified by `image` for
-    access by CUDA.   A handle to the registered object is returned as
-    `pCudaResource`.
-
-    `target` must match the type of the object, and must be one of
-    :py:obj:`~.GL_TEXTURE_2D`, :py:obj:`~.GL_TEXTURE_RECTANGLE`,
-    :py:obj:`~.GL_TEXTURE_CUBE_MAP`, :py:obj:`~.GL_TEXTURE_3D`,
-    :py:obj:`~.GL_TEXTURE_2D_ARRAY`, or :py:obj:`~.GL_RENDERBUFFER`.
-
-    The register flags `Flags` specify the intended usage, as follows:
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_NONE`: Specifies no hints about
-      how this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY`: Specifies that CUDA
-      will not write to this resource.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD`: Specifies that
-      CUDA will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST`: Specifies that
-      CUDA will bind this resource to a surface reference.
-
-    - :py:obj:`~.CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER`: Specifies that
-      CUDA will perform texture gather operations on this resource.
-
-    The following image formats are supported. For brevity's sake, the list
-    is abbreviated. For ex., {GL_R, GL_RG} X {8, 16} would expand to the
-    following 4 formats {GL_R8, GL_R16, GL_RG8, GL_RG16} :
-
-    - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA,
-      GL_INTENSITY
-
-    - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I,
-      32I}
-
-    - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16,
-      16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT,
-      32I_EXT}
-
-    The following image classes are currently disallowed:
-
-    - Textures with borders
-
-    - Multisampled renderbuffers
-
-    Parameters
-    ----------
-    image : :py:obj:`~.GLuint`
-        name of texture or renderbuffer object to be registered
-    target : :py:obj:`~.GLenum`
-        Identifies the type of object specified by `image`
-    Flags : unsigned int
-        Register flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_OPERATING_SYSTEM`
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsGLRegisterImage`
-    """
-    cdef cydriver.GLenum cytarget
-    if target is None:
-        cytarget = <cydriver.GLenum><void_ptr>0
-    elif isinstance(target, (GLenum,)):
-        ptarget = int(target)
-        cytarget = <cydriver.GLenum><void_ptr>ptarget
-    else:
-        ptarget = int(GLenum(target))
-        cytarget = <cydriver.GLenum><void_ptr>ptarget
-    cdef cydriver.GLuint cyimage
-    if image is None:
-        cyimage = <cydriver.GLuint><void_ptr>0
-    elif isinstance(image, (GLuint,)):
-        pimage = int(image)
-        cyimage = <cydriver.GLuint><void_ptr>pimage
-    else:
-        pimage = int(GLuint(image))
-        cyimage = <cydriver.GLuint><void_ptr>pimage
-    cdef CUgraphicsResource pCudaResource = CUgraphicsResource()
-    err = cydriver.cuGraphicsGLRegisterImage(<cydriver.CUgraphicsResource*>pCudaResource._ptr, cyimage, cytarget, Flags)
-    return (CUresult(err), pCudaResource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : CUGLDeviceList):
-    """ Gets the CUDA devices associated with the current OpenGL context.
-
-    Returns in `*pCudaDeviceCount` the number of CUDA-compatible devices
-    corresponding to the current OpenGL context. Also returns in
-    `*pCudaDevices` at most cudaDeviceCount of the CUDA-compatible devices
-    corresponding to the current OpenGL context. If any of the GPUs being
-    used by the current OpenGL context are not CUDA capable then the call
-    will return CUDA_ERROR_NO_DEVICE.
-
-    The `deviceList` argument may be any of the following:
-    CU_GL_DEVICE_LIST_ALL: Query all devices used by the current OpenGL
-    context. CU_GL_DEVICE_LIST_CURRENT_FRAME: Query the devices used by the
-    current OpenGL context to render the current frame (in SLI).
-    CU_GL_DEVICE_LIST_NEXT_FRAME: Query the devices used by the current
-    OpenGL context to render the next frame (in SLI). Note that this is a
-    prediction, it can't be guaranteed that this is correct in all cases.
-
-    Parameters
-    ----------
-    cudaDeviceCount : unsigned int
-        The size of the output device array pCudaDevices.
-    deviceList : CUGLDeviceList
-        The set of devices to return.
-
-    Returns
-    -------
-    CUresult
-        CUDA_SUCCESS
-        CUDA_ERROR_NO_DEVICE
-        CUDA_ERROR_INVALID_VALUE
-        CUDA_ERROR_INVALID_CONTEXT
-        CUDA_ERROR_INVALID_GRAPHICS_CONTEXT
-    pCudaDeviceCount : unsigned int
-        Returned number of CUDA devices.
-    pCudaDevices : List[CUdevice]
-        Returned CUDA devices.
-
-    See Also
-    --------
-    ~.cudaGLGetDevices
-
-    Notes
-    -----
-    This function is not supported on Mac OS X.
-
-    """
-    cdef unsigned int pCudaDeviceCount = 0
-    cdef cydriver.CUdevice* cypCudaDevices = NULL
-    pypCudaDevices = []
-    if cudaDeviceCount != 0:
-        cypCudaDevices = <cydriver.CUdevice*>calloc(cudaDeviceCount, sizeof(cydriver.CUdevice))
-        if cypCudaDevices is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(cydriver.CUdevice)))
-    cdef cydriver.CUGLDeviceList cydeviceList = deviceList.value
-    err = cydriver.cuGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList)
-    if CUresult(err) == CUresult(0):
-        pypCudaDevices = [CUdevice(init_value=<void_ptr>cypCudaDevices[idx]) for idx in range(cudaDeviceCount)]
-    if cypCudaDevices is not NULL:
-        free(cypCudaDevices)
-    return (CUresult(err), pCudaDeviceCount, pypCudaDevices)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuVDPAUGetDevice(vdpDevice, vdpGetProcAddress):
-    """ Gets the CUDA device associated with a VDPAU device.
-
-    Returns in `*pDevice` the CUDA device associated with a `vdpDevice`, if
-    applicable.
-
-    Parameters
-    ----------
-    vdpDevice : :py:obj:`~.VdpDevice`
-        A VdpDevice handle
-    vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
-        VDPAU's VdpGetProcAddress function pointer
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`
-    pDevice : :py:obj:`~.CUdevice`
-        Device associated with vdpDevice
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaVDPAUGetDevice`
-    """
-    cdef cydriver.VdpGetProcAddress *cyvdpGetProcAddress
-    if vdpGetProcAddress is None:
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>NULL
-    elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
-        pvdpGetProcAddress = vdpGetProcAddress.getPtr()
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
-    elif isinstance(vdpGetProcAddress, (int)):
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
-    else:
-        raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, driver.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
-    cdef cydriver.VdpDevice cyvdpDevice
-    if vdpDevice is None:
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>0
-    elif isinstance(vdpDevice, (VdpDevice,)):
-        pvdpDevice = int(vdpDevice)
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>pvdpDevice
-    else:
-        pvdpDevice = int(VdpDevice(vdpDevice))
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>pvdpDevice
-    cdef CUdevice pDevice = CUdevice()
-    err = cydriver.cuVDPAUGetDevice(<cydriver.CUdevice*>pDevice._ptr, cyvdpDevice, cyvdpGetProcAddress)
-    return (CUresult(err), pDevice)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuVDPAUCtxCreate(unsigned int flags, device, vdpDevice, vdpGetProcAddress):
-    """ Create a CUDA context for interoperability with VDPAU.
-
-    Creates a new CUDA context, initializes VDPAU interoperability, and
-    associates the CUDA context with the calling thread. It must be called
-    before performing any other VDPAU interoperability operations. It may
-    fail if the needed VDPAU driver facilities are not available. For usage
-    of the `flags` parameter, see :py:obj:`~.cuCtxCreate()`.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Options for CUDA context creation
-    device : :py:obj:`~.CUdevice`
-        Device on which to create the context
-    vdpDevice : :py:obj:`~.VdpDevice`
-        The VdpDevice to interop with
-    vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
-        VDPAU's VdpGetProcAddress function pointer
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    pCtx : :py:obj:`~.CUcontext`
-        Returned CUDA context
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice`
-    """
-    cdef cydriver.VdpGetProcAddress *cyvdpGetProcAddress
-    if vdpGetProcAddress is None:
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>NULL
-    elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
-        pvdpGetProcAddress = vdpGetProcAddress.getPtr()
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
-    elif isinstance(vdpGetProcAddress, (int)):
-        cyvdpGetProcAddress = <cydriver.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
-    else:
-        raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, driver.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
-    cdef cydriver.VdpDevice cyvdpDevice
-    if vdpDevice is None:
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>0
-    elif isinstance(vdpDevice, (VdpDevice,)):
-        pvdpDevice = int(vdpDevice)
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>pvdpDevice
-    else:
-        pvdpDevice = int(VdpDevice(vdpDevice))
-        cyvdpDevice = <cydriver.VdpDevice><void_ptr>pvdpDevice
-    cdef cydriver.CUdevice cydevice
-    if device is None:
-        cydevice = <cydriver.CUdevice>0
-    elif isinstance(device, (CUdevice,)):
-        pdevice = int(device)
-        cydevice = <cydriver.CUdevice>pdevice
-    else:
-        pdevice = int(CUdevice(device))
-        cydevice = <cydriver.CUdevice>pdevice
-    cdef CUcontext pCtx = CUcontext()
-    err = cydriver.cuVDPAUCtxCreate(<cydriver.CUcontext*>pCtx._ptr, flags, cydevice, cyvdpDevice, cyvdpGetProcAddress)
-    return (CUresult(err), pCtx)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags):
-    """ Registers a VDPAU VdpVideoSurface object.
-
-    Registers the VdpVideoSurface specified by `vdpSurface` for access by
-    CUDA. A handle to the registered object is returned as `pCudaResource`.
-    The surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`: Specifies no hints
-      about how this resource will be used. It is therefore assumed that
-      this resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY`: Specifies that
-      CUDA will not write to this resource.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD`: Specifies
-      that CUDA will not read from this resource and will write over the
-      entire contents of the resource, so none of the data previously
-      stored in the resource will be preserved.
-
-    The VdpVideoSurface is presented as an array of subresources that may
-    be accessed using pointers returned by
-    :py:obj:`~.cuGraphicsSubResourceGetMappedArray`. The exact number of
-    valid `arrayIndex` values depends on the VDPAU surface format. The
-    mapping is shown in the table below. `mipLevel` must be 0.
-
-    Parameters
-    ----------
-    vdpSurface : :py:obj:`~.VdpVideoSurface`
-        The VdpVideoSurface to be registered
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice`, :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface`
-    """
-    cdef cydriver.VdpVideoSurface cyvdpSurface
-    if vdpSurface is None:
-        cyvdpSurface = <cydriver.VdpVideoSurface><void_ptr>0
-    elif isinstance(vdpSurface, (VdpVideoSurface,)):
-        pvdpSurface = int(vdpSurface)
-        cyvdpSurface = <cydriver.VdpVideoSurface><void_ptr>pvdpSurface
-    else:
-        pvdpSurface = int(VdpVideoSurface(vdpSurface))
-        cyvdpSurface = <cydriver.VdpVideoSurface><void_ptr>pvdpSurface
-    cdef CUgraphicsResource pCudaResource = CUgraphicsResource()
-    err = cydriver.cuGraphicsVDPAURegisterVideoSurface(<cydriver.CUgraphicsResource*>pCudaResource._ptr, cyvdpSurface, flags)
-    return (CUresult(err), pCudaResource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cuGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags):
-    """ Registers a VDPAU VdpOutputSurface object.
-
-    Registers the VdpOutputSurface specified by `vdpSurface` for access by
-    CUDA. A handle to the registered object is returned as `pCudaResource`.
-    The surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`: Specifies no hints
-      about how this resource will be used. It is therefore assumed that
-      this resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY`: Specifies that
-      CUDA will not write to this resource.
-
-    - :py:obj:`~.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD`: Specifies
-      that CUDA will not read from this resource and will write over the
-      entire contents of the resource, so none of the data previously
-      stored in the resource will be preserved.
-
-    The VdpOutputSurface is presented as an array of subresources that may
-    be accessed using pointers returned by
-    :py:obj:`~.cuGraphicsSubResourceGetMappedArray`. The exact number of
-    valid `arrayIndex` values depends on the VDPAU surface format. The
-    mapping is shown in the table below. `mipLevel` must be 0.
-
-    Parameters
-    ----------
-    vdpSurface : :py:obj:`~.VdpOutputSurface`
-        The VdpOutputSurface to be registered
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    CUresult
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_HANDLE`, :py:obj:`~.CUDA_ERROR_ALREADY_MAPPED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`,
-    pCudaResource : :py:obj:`~.CUgraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface`
-    """
-    cdef cydriver.VdpOutputSurface cyvdpSurface
-    if vdpSurface is None:
-        cyvdpSurface = <cydriver.VdpOutputSurface><void_ptr>0
-    elif isinstance(vdpSurface, (VdpOutputSurface,)):
-        pvdpSurface = int(vdpSurface)
-        cyvdpSurface = <cydriver.VdpOutputSurface><void_ptr>pvdpSurface
-    else:
-        pvdpSurface = int(VdpOutputSurface(vdpSurface))
-        cyvdpSurface = <cydriver.VdpOutputSurface><void_ptr>pvdpSurface
-    cdef CUgraphicsResource pCudaResource = CUgraphicsResource()
-    err = cydriver.cuGraphicsVDPAURegisterOutputSurface(<cydriver.CUgraphicsResource*>pCudaResource._ptr, cyvdpSurface, flags)
-    return (CUresult(err), pCudaResource)
-{{endif}}
-
-@cython.embedsignature(True)
-def sizeof(objType):
-    """ Returns the size of provided CUDA Python structure in bytes
-
-    Parameters
-    ----------
-    objType : Any
-        CUDA Python object
-
-    Returns
-    -------
-    lowered_name : int
-        The size of `objType` in bytes
-    """
-    {{if 'cuuint32_t' in found_types}}
-    if objType == cuuint32_t:
-        return sizeof(cydriver.cuuint32_t){{endif}}
-    {{if 'cuuint64_t' in found_types}}
-    if objType == cuuint64_t:
-        return sizeof(cydriver.cuuint64_t){{endif}}
-    {{if 'CUdeviceptr_v2' in found_types}}
-    if objType == CUdeviceptr_v2:
-        return sizeof(cydriver.CUdeviceptr_v2){{endif}}
-    {{if 'CUdeviceptr' in found_types}}
-    if objType == CUdeviceptr:
-        return sizeof(cydriver.CUdeviceptr){{endif}}
-    {{if 'CUdevice_v1' in found_types}}
-    if objType == CUdevice_v1:
-        return sizeof(cydriver.CUdevice_v1){{endif}}
-    {{if 'CUdevice' in found_types}}
-    if objType == CUdevice:
-        return sizeof(cydriver.CUdevice){{endif}}
-    {{if 'CUcontext' in found_types}}
-    if objType == CUcontext:
-        return sizeof(cydriver.CUcontext){{endif}}
-    {{if 'CUmodule' in found_types}}
-    if objType == CUmodule:
-        return sizeof(cydriver.CUmodule){{endif}}
-    {{if 'CUfunction' in found_types}}
-    if objType == CUfunction:
-        return sizeof(cydriver.CUfunction){{endif}}
-    {{if 'CUlibrary' in found_types}}
-    if objType == CUlibrary:
-        return sizeof(cydriver.CUlibrary){{endif}}
-    {{if 'CUkernel' in found_types}}
-    if objType == CUkernel:
-        return sizeof(cydriver.CUkernel){{endif}}
-    {{if 'CUarray' in found_types}}
-    if objType == CUarray:
-        return sizeof(cydriver.CUarray){{endif}}
-    {{if 'CUmipmappedArray' in found_types}}
-    if objType == CUmipmappedArray:
-        return sizeof(cydriver.CUmipmappedArray){{endif}}
-    {{if 'CUtexref' in found_types}}
-    if objType == CUtexref:
-        return sizeof(cydriver.CUtexref){{endif}}
-    {{if 'CUsurfref' in found_types}}
-    if objType == CUsurfref:
-        return sizeof(cydriver.CUsurfref){{endif}}
-    {{if 'CUevent' in found_types}}
-    if objType == CUevent:
-        return sizeof(cydriver.CUevent){{endif}}
-    {{if 'CUstream' in found_types}}
-    if objType == CUstream:
-        return sizeof(cydriver.CUstream){{endif}}
-    {{if 'CUgraphicsResource' in found_types}}
-    if objType == CUgraphicsResource:
-        return sizeof(cydriver.CUgraphicsResource){{endif}}
-    {{if 'CUtexObject_v1' in found_types}}
-    if objType == CUtexObject_v1:
-        return sizeof(cydriver.CUtexObject_v1){{endif}}
-    {{if 'CUtexObject' in found_types}}
-    if objType == CUtexObject:
-        return sizeof(cydriver.CUtexObject){{endif}}
-    {{if 'CUsurfObject_v1' in found_types}}
-    if objType == CUsurfObject_v1:
-        return sizeof(cydriver.CUsurfObject_v1){{endif}}
-    {{if 'CUsurfObject' in found_types}}
-    if objType == CUsurfObject:
-        return sizeof(cydriver.CUsurfObject){{endif}}
-    {{if 'CUexternalMemory' in found_types}}
-    if objType == CUexternalMemory:
-        return sizeof(cydriver.CUexternalMemory){{endif}}
-    {{if 'CUexternalSemaphore' in found_types}}
-    if objType == CUexternalSemaphore:
-        return sizeof(cydriver.CUexternalSemaphore){{endif}}
-    {{if 'CUgraph' in found_types}}
-    if objType == CUgraph:
-        return sizeof(cydriver.CUgraph){{endif}}
-    {{if 'CUgraphNode' in found_types}}
-    if objType == CUgraphNode:
-        return sizeof(cydriver.CUgraphNode){{endif}}
-    {{if 'CUgraphExec' in found_types}}
-    if objType == CUgraphExec:
-        return sizeof(cydriver.CUgraphExec){{endif}}
-    {{if 'CUmemoryPool' in found_types}}
-    if objType == CUmemoryPool:
-        return sizeof(cydriver.CUmemoryPool){{endif}}
-    {{if 'CUuserObject' in found_types}}
-    if objType == CUuserObject:
-        return sizeof(cydriver.CUuserObject){{endif}}
-    {{if 'CUgraphConditionalHandle' in found_types}}
-    if objType == CUgraphConditionalHandle:
-        return sizeof(cydriver.CUgraphConditionalHandle){{endif}}
-    {{if 'CUgraphDeviceNode' in found_types}}
-    if objType == CUgraphDeviceNode:
-        return sizeof(cydriver.CUgraphDeviceNode){{endif}}
-    {{if 'CUasyncCallbackHandle' in found_types}}
-    if objType == CUasyncCallbackHandle:
-        return sizeof(cydriver.CUasyncCallbackHandle){{endif}}
-    {{if 'CUgreenCtx' in found_types}}
-    if objType == CUgreenCtx:
-        return sizeof(cydriver.CUgreenCtx){{endif}}
-    {{if 'struct CUuuid_st' in found_types}}
-    if objType == CUuuid_st:
-        return sizeof(cydriver.CUuuid_st){{endif}}
-    {{if 'CUuuid' in found_types}}
-    if objType == CUuuid:
-        return sizeof(cydriver.CUuuid){{endif}}
-    {{if 'struct CUmemFabricHandle_st' in found_types}}
-    if objType == CUmemFabricHandle_st:
-        return sizeof(cydriver.CUmemFabricHandle_st){{endif}}
-    {{if 'CUmemFabricHandle_v1' in found_types}}
-    if objType == CUmemFabricHandle_v1:
-        return sizeof(cydriver.CUmemFabricHandle_v1){{endif}}
-    {{if 'CUmemFabricHandle' in found_types}}
-    if objType == CUmemFabricHandle:
-        return sizeof(cydriver.CUmemFabricHandle){{endif}}
-    {{if 'struct CUipcEventHandle_st' in found_types}}
-    if objType == CUipcEventHandle_st:
-        return sizeof(cydriver.CUipcEventHandle_st){{endif}}
-    {{if 'CUipcEventHandle_v1' in found_types}}
-    if objType == CUipcEventHandle_v1:
-        return sizeof(cydriver.CUipcEventHandle_v1){{endif}}
-    {{if 'CUipcEventHandle' in found_types}}
-    if objType == CUipcEventHandle:
-        return sizeof(cydriver.CUipcEventHandle){{endif}}
-    {{if 'struct CUipcMemHandle_st' in found_types}}
-    if objType == CUipcMemHandle_st:
-        return sizeof(cydriver.CUipcMemHandle_st){{endif}}
-    {{if 'CUipcMemHandle_v1' in found_types}}
-    if objType == CUipcMemHandle_v1:
-        return sizeof(cydriver.CUipcMemHandle_v1){{endif}}
-    {{if 'CUipcMemHandle' in found_types}}
-    if objType == CUipcMemHandle:
-        return sizeof(cydriver.CUipcMemHandle){{endif}}
-    {{if 'union CUstreamBatchMemOpParams_union' in found_types}}
-    if objType == CUstreamBatchMemOpParams_union:
-        return sizeof(cydriver.CUstreamBatchMemOpParams_union){{endif}}
-    {{if 'CUstreamBatchMemOpParams_v1' in found_types}}
-    if objType == CUstreamBatchMemOpParams_v1:
-        return sizeof(cydriver.CUstreamBatchMemOpParams_v1){{endif}}
-    {{if 'CUstreamBatchMemOpParams' in found_types}}
-    if objType == CUstreamBatchMemOpParams:
-        return sizeof(cydriver.CUstreamBatchMemOpParams){{endif}}
-    {{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st' in found_types}}
-    if objType == CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st:
-        return sizeof(cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st){{endif}}
-    {{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_BATCH_MEM_OP_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS' in found_types}}
-    if objType == CUDA_BATCH_MEM_OP_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_BATCH_MEM_OP_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_BATCH_MEM_OP_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2){{endif}}
-    {{if 'struct CUasyncNotificationInfo_st' in found_types}}
-    if objType == CUasyncNotificationInfo_st:
-        return sizeof(cydriver.CUasyncNotificationInfo_st){{endif}}
-    {{if 'CUasyncNotificationInfo' in found_types}}
-    if objType == CUasyncNotificationInfo:
-        return sizeof(cydriver.CUasyncNotificationInfo){{endif}}
-    {{if 'CUasyncCallback' in found_types}}
-    if objType == CUasyncCallback:
-        return sizeof(cydriver.CUasyncCallback){{endif}}
-    {{if 'struct CUdevprop_st' in found_types}}
-    if objType == CUdevprop_st:
-        return sizeof(cydriver.CUdevprop_st){{endif}}
-    {{if 'CUdevprop_v1' in found_types}}
-    if objType == CUdevprop_v1:
-        return sizeof(cydriver.CUdevprop_v1){{endif}}
-    {{if 'CUdevprop' in found_types}}
-    if objType == CUdevprop:
-        return sizeof(cydriver.CUdevprop){{endif}}
-    {{if 'CUlinkState' in found_types}}
-    if objType == CUlinkState:
-        return sizeof(cydriver.CUlinkState){{endif}}
-    {{if 'CUhostFn' in found_types}}
-    if objType == CUhostFn:
-        return sizeof(cydriver.CUhostFn){{endif}}
-    {{if 'struct CUaccessPolicyWindow_st' in found_types}}
-    if objType == CUaccessPolicyWindow_st:
-        return sizeof(cydriver.CUaccessPolicyWindow_st){{endif}}
-    {{if 'CUaccessPolicyWindow_v1' in found_types}}
-    if objType == CUaccessPolicyWindow_v1:
-        return sizeof(cydriver.CUaccessPolicyWindow_v1){{endif}}
-    {{if 'CUaccessPolicyWindow' in found_types}}
-    if objType == CUaccessPolicyWindow:
-        return sizeof(cydriver.CUaccessPolicyWindow){{endif}}
-    {{if 'struct CUDA_KERNEL_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_KERNEL_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_v1){{endif}}
-    {{if 'struct CUDA_KERNEL_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_KERNEL_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_v2){{endif}}
-    {{if 'CUDA_KERNEL_NODE_PARAMS' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_KERNEL_NODE_PARAMS_v3_st' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_v3_st:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_v3_st){{endif}}
-    {{if 'CUDA_KERNEL_NODE_PARAMS_v3' in found_types}}
-    if objType == CUDA_KERNEL_NODE_PARAMS_v3:
-        return sizeof(cydriver.CUDA_KERNEL_NODE_PARAMS_v3){{endif}}
-    {{if 'struct CUDA_MEMSET_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_MEMSET_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_MEMSET_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_MEMSET_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_MEMSET_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_MEMSET_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_MEMSET_NODE_PARAMS' in found_types}}
-    if objType == CUDA_MEMSET_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_MEMSET_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_MEMSET_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_MEMSET_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_MEMSET_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_MEMSET_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_MEMSET_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_MEMSET_NODE_PARAMS_v2){{endif}}
-    {{if 'struct CUDA_HOST_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_HOST_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_HOST_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_HOST_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_HOST_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_HOST_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_HOST_NODE_PARAMS' in found_types}}
-    if objType == CUDA_HOST_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_HOST_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_HOST_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_HOST_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_HOST_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_HOST_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_HOST_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_HOST_NODE_PARAMS_v2){{endif}}
-    {{if 'struct CUDA_CONDITIONAL_NODE_PARAMS' in found_types}}
-    if objType == CUDA_CONDITIONAL_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_CONDITIONAL_NODE_PARAMS){{endif}}
-    {{if 'struct CUgraphEdgeData_st' in found_types}}
-    if objType == CUgraphEdgeData_st:
-        return sizeof(cydriver.CUgraphEdgeData_st){{endif}}
-    {{if 'CUgraphEdgeData' in found_types}}
-    if objType == CUgraphEdgeData:
-        return sizeof(cydriver.CUgraphEdgeData){{endif}}
-    {{if 'struct CUDA_GRAPH_INSTANTIATE_PARAMS_st' in found_types}}
-    if objType == CUDA_GRAPH_INSTANTIATE_PARAMS_st:
-        return sizeof(cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS_st){{endif}}
-    {{if 'CUDA_GRAPH_INSTANTIATE_PARAMS' in found_types}}
-    if objType == CUDA_GRAPH_INSTANTIATE_PARAMS:
-        return sizeof(cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS){{endif}}
-    {{if 'struct CUlaunchMemSyncDomainMap_st' in found_types}}
-    if objType == CUlaunchMemSyncDomainMap_st:
-        return sizeof(cydriver.CUlaunchMemSyncDomainMap_st){{endif}}
-    {{if 'CUlaunchMemSyncDomainMap' in found_types}}
-    if objType == CUlaunchMemSyncDomainMap:
-        return sizeof(cydriver.CUlaunchMemSyncDomainMap){{endif}}
-    {{if 'union CUlaunchAttributeValue_union' in found_types}}
-    if objType == CUlaunchAttributeValue_union:
-        return sizeof(cydriver.CUlaunchAttributeValue_union){{endif}}
-    {{if 'CUlaunchAttributeValue' in found_types}}
-    if objType == CUlaunchAttributeValue:
-        return sizeof(cydriver.CUlaunchAttributeValue){{endif}}
-    {{if 'struct CUlaunchAttribute_st' in found_types}}
-    if objType == CUlaunchAttribute_st:
-        return sizeof(cydriver.CUlaunchAttribute_st){{endif}}
-    {{if 'CUlaunchAttribute' in found_types}}
-    if objType == CUlaunchAttribute:
-        return sizeof(cydriver.CUlaunchAttribute){{endif}}
-    {{if 'struct CUlaunchConfig_st' in found_types}}
-    if objType == CUlaunchConfig_st:
-        return sizeof(cydriver.CUlaunchConfig_st){{endif}}
-    {{if 'CUlaunchConfig' in found_types}}
-    if objType == CUlaunchConfig:
-        return sizeof(cydriver.CUlaunchConfig){{endif}}
-    {{if 'CUkernelNodeAttrValue_v1' in found_types}}
-    if objType == CUkernelNodeAttrValue_v1:
-        return sizeof(cydriver.CUkernelNodeAttrValue_v1){{endif}}
-    {{if 'CUkernelNodeAttrValue' in found_types}}
-    if objType == CUkernelNodeAttrValue:
-        return sizeof(cydriver.CUkernelNodeAttrValue){{endif}}
-    {{if 'CUstreamAttrValue_v1' in found_types}}
-    if objType == CUstreamAttrValue_v1:
-        return sizeof(cydriver.CUstreamAttrValue_v1){{endif}}
-    {{if 'CUstreamAttrValue' in found_types}}
-    if objType == CUstreamAttrValue:
-        return sizeof(cydriver.CUstreamAttrValue){{endif}}
-    {{if 'struct CUexecAffinitySmCount_st' in found_types}}
-    if objType == CUexecAffinitySmCount_st:
-        return sizeof(cydriver.CUexecAffinitySmCount_st){{endif}}
-    {{if 'CUexecAffinitySmCount_v1' in found_types}}
-    if objType == CUexecAffinitySmCount_v1:
-        return sizeof(cydriver.CUexecAffinitySmCount_v1){{endif}}
-    {{if 'CUexecAffinitySmCount' in found_types}}
-    if objType == CUexecAffinitySmCount:
-        return sizeof(cydriver.CUexecAffinitySmCount){{endif}}
-    {{if 'struct CUexecAffinityParam_st' in found_types}}
-    if objType == CUexecAffinityParam_st:
-        return sizeof(cydriver.CUexecAffinityParam_st){{endif}}
-    {{if 'CUexecAffinityParam_v1' in found_types}}
-    if objType == CUexecAffinityParam_v1:
-        return sizeof(cydriver.CUexecAffinityParam_v1){{endif}}
-    {{if 'CUexecAffinityParam' in found_types}}
-    if objType == CUexecAffinityParam:
-        return sizeof(cydriver.CUexecAffinityParam){{endif}}
-    {{if 'struct CUctxCigParam_st' in found_types}}
-    if objType == CUctxCigParam_st:
-        return sizeof(cydriver.CUctxCigParam_st){{endif}}
-    {{if 'CUctxCigParam' in found_types}}
-    if objType == CUctxCigParam:
-        return sizeof(cydriver.CUctxCigParam){{endif}}
-    {{if 'struct CUctxCreateParams_st' in found_types}}
-    if objType == CUctxCreateParams_st:
-        return sizeof(cydriver.CUctxCreateParams_st){{endif}}
-    {{if 'CUctxCreateParams' in found_types}}
-    if objType == CUctxCreateParams:
-        return sizeof(cydriver.CUctxCreateParams){{endif}}
-    {{if 'struct CUlibraryHostUniversalFunctionAndDataTable_st' in found_types}}
-    if objType == CUlibraryHostUniversalFunctionAndDataTable_st:
-        return sizeof(cydriver.CUlibraryHostUniversalFunctionAndDataTable_st){{endif}}
-    {{if 'CUlibraryHostUniversalFunctionAndDataTable' in found_types}}
-    if objType == CUlibraryHostUniversalFunctionAndDataTable:
-        return sizeof(cydriver.CUlibraryHostUniversalFunctionAndDataTable){{endif}}
-    {{if 'CUstreamCallback' in found_types}}
-    if objType == CUstreamCallback:
-        return sizeof(cydriver.CUstreamCallback){{endif}}
-    {{if 'CUoccupancyB2DSize' in found_types}}
-    if objType == CUoccupancyB2DSize:
-        return sizeof(cydriver.CUoccupancyB2DSize){{endif}}
-    {{if 'struct CUDA_MEMCPY2D_st' in found_types}}
-    if objType == CUDA_MEMCPY2D_st:
-        return sizeof(cydriver.CUDA_MEMCPY2D_st){{endif}}
-    {{if 'CUDA_MEMCPY2D_v2' in found_types}}
-    if objType == CUDA_MEMCPY2D_v2:
-        return sizeof(cydriver.CUDA_MEMCPY2D_v2){{endif}}
-    {{if 'CUDA_MEMCPY2D' in found_types}}
-    if objType == CUDA_MEMCPY2D:
-        return sizeof(cydriver.CUDA_MEMCPY2D){{endif}}
-    {{if 'struct CUDA_MEMCPY3D_st' in found_types}}
-    if objType == CUDA_MEMCPY3D_st:
-        return sizeof(cydriver.CUDA_MEMCPY3D_st){{endif}}
-    {{if 'CUDA_MEMCPY3D_v2' in found_types}}
-    if objType == CUDA_MEMCPY3D_v2:
-        return sizeof(cydriver.CUDA_MEMCPY3D_v2){{endif}}
-    {{if 'CUDA_MEMCPY3D' in found_types}}
-    if objType == CUDA_MEMCPY3D:
-        return sizeof(cydriver.CUDA_MEMCPY3D){{endif}}
-    {{if 'struct CUDA_MEMCPY3D_PEER_st' in found_types}}
-    if objType == CUDA_MEMCPY3D_PEER_st:
-        return sizeof(cydriver.CUDA_MEMCPY3D_PEER_st){{endif}}
-    {{if 'CUDA_MEMCPY3D_PEER_v1' in found_types}}
-    if objType == CUDA_MEMCPY3D_PEER_v1:
-        return sizeof(cydriver.CUDA_MEMCPY3D_PEER_v1){{endif}}
-    {{if 'CUDA_MEMCPY3D_PEER' in found_types}}
-    if objType == CUDA_MEMCPY3D_PEER:
-        return sizeof(cydriver.CUDA_MEMCPY3D_PEER){{endif}}
-    {{if 'struct CUDA_MEMCPY_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_MEMCPY_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_MEMCPY_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_MEMCPY_NODE_PARAMS' in found_types}}
-    if objType == CUDA_MEMCPY_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_MEMCPY_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_ARRAY_DESCRIPTOR_st' in found_types}}
-    if objType == CUDA_ARRAY_DESCRIPTOR_st:
-        return sizeof(cydriver.CUDA_ARRAY_DESCRIPTOR_st){{endif}}
-    {{if 'CUDA_ARRAY_DESCRIPTOR_v2' in found_types}}
-    if objType == CUDA_ARRAY_DESCRIPTOR_v2:
-        return sizeof(cydriver.CUDA_ARRAY_DESCRIPTOR_v2){{endif}}
-    {{if 'CUDA_ARRAY_DESCRIPTOR' in found_types}}
-    if objType == CUDA_ARRAY_DESCRIPTOR:
-        return sizeof(cydriver.CUDA_ARRAY_DESCRIPTOR){{endif}}
-    {{if 'struct CUDA_ARRAY3D_DESCRIPTOR_st' in found_types}}
-    if objType == CUDA_ARRAY3D_DESCRIPTOR_st:
-        return sizeof(cydriver.CUDA_ARRAY3D_DESCRIPTOR_st){{endif}}
-    {{if 'CUDA_ARRAY3D_DESCRIPTOR_v2' in found_types}}
-    if objType == CUDA_ARRAY3D_DESCRIPTOR_v2:
-        return sizeof(cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2){{endif}}
-    {{if 'CUDA_ARRAY3D_DESCRIPTOR' in found_types}}
-    if objType == CUDA_ARRAY3D_DESCRIPTOR:
-        return sizeof(cydriver.CUDA_ARRAY3D_DESCRIPTOR){{endif}}
-    {{if 'struct CUDA_ARRAY_SPARSE_PROPERTIES_st' in found_types}}
-    if objType == CUDA_ARRAY_SPARSE_PROPERTIES_st:
-        return sizeof(cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_st){{endif}}
-    {{if 'CUDA_ARRAY_SPARSE_PROPERTIES_v1' in found_types}}
-    if objType == CUDA_ARRAY_SPARSE_PROPERTIES_v1:
-        return sizeof(cydriver.CUDA_ARRAY_SPARSE_PROPERTIES_v1){{endif}}
-    {{if 'CUDA_ARRAY_SPARSE_PROPERTIES' in found_types}}
-    if objType == CUDA_ARRAY_SPARSE_PROPERTIES:
-        return sizeof(cydriver.CUDA_ARRAY_SPARSE_PROPERTIES){{endif}}
-    {{if 'struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st' in found_types}}
-    if objType == CUDA_ARRAY_MEMORY_REQUIREMENTS_st:
-        return sizeof(cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS_st){{endif}}
-    {{if 'CUDA_ARRAY_MEMORY_REQUIREMENTS_v1' in found_types}}
-    if objType == CUDA_ARRAY_MEMORY_REQUIREMENTS_v1:
-        return sizeof(cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS_v1){{endif}}
-    {{if 'CUDA_ARRAY_MEMORY_REQUIREMENTS' in found_types}}
-    if objType == CUDA_ARRAY_MEMORY_REQUIREMENTS:
-        return sizeof(cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS){{endif}}
-    {{if 'struct CUDA_RESOURCE_DESC_st' in found_types}}
-    if objType == CUDA_RESOURCE_DESC_st:
-        return sizeof(cydriver.CUDA_RESOURCE_DESC_st){{endif}}
-    {{if 'CUDA_RESOURCE_DESC_v1' in found_types}}
-    if objType == CUDA_RESOURCE_DESC_v1:
-        return sizeof(cydriver.CUDA_RESOURCE_DESC_v1){{endif}}
-    {{if 'CUDA_RESOURCE_DESC' in found_types}}
-    if objType == CUDA_RESOURCE_DESC:
-        return sizeof(cydriver.CUDA_RESOURCE_DESC){{endif}}
-    {{if 'struct CUDA_TEXTURE_DESC_st' in found_types}}
-    if objType == CUDA_TEXTURE_DESC_st:
-        return sizeof(cydriver.CUDA_TEXTURE_DESC_st){{endif}}
-    {{if 'CUDA_TEXTURE_DESC_v1' in found_types}}
-    if objType == CUDA_TEXTURE_DESC_v1:
-        return sizeof(cydriver.CUDA_TEXTURE_DESC_v1){{endif}}
-    {{if 'CUDA_TEXTURE_DESC' in found_types}}
-    if objType == CUDA_TEXTURE_DESC:
-        return sizeof(cydriver.CUDA_TEXTURE_DESC){{endif}}
-    {{if 'struct CUDA_RESOURCE_VIEW_DESC_st' in found_types}}
-    if objType == CUDA_RESOURCE_VIEW_DESC_st:
-        return sizeof(cydriver.CUDA_RESOURCE_VIEW_DESC_st){{endif}}
-    {{if 'CUDA_RESOURCE_VIEW_DESC_v1' in found_types}}
-    if objType == CUDA_RESOURCE_VIEW_DESC_v1:
-        return sizeof(cydriver.CUDA_RESOURCE_VIEW_DESC_v1){{endif}}
-    {{if 'CUDA_RESOURCE_VIEW_DESC' in found_types}}
-    if objType == CUDA_RESOURCE_VIEW_DESC:
-        return sizeof(cydriver.CUDA_RESOURCE_VIEW_DESC){{endif}}
-    {{if 'struct CUtensorMap_st' in found_types}}
-    if objType == CUtensorMap_st:
-        return sizeof(cydriver.CUtensorMap_st){{endif}}
-    {{if 'CUtensorMap' in found_types}}
-    if objType == CUtensorMap:
-        return sizeof(cydriver.CUtensorMap){{endif}}
-    {{if 'struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st' in found_types}}
-    if objType == CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st:
-        return sizeof(cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st){{endif}}
-    {{if 'CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1' in found_types}}
-    if objType == CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1:
-        return sizeof(cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1){{endif}}
-    {{if 'CUDA_POINTER_ATTRIBUTE_P2P_TOKENS' in found_types}}
-    if objType == CUDA_POINTER_ATTRIBUTE_P2P_TOKENS:
-        return sizeof(cydriver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS){{endif}}
-    {{if 'struct CUDA_LAUNCH_PARAMS_st' in found_types}}
-    if objType == CUDA_LAUNCH_PARAMS_st:
-        return sizeof(cydriver.CUDA_LAUNCH_PARAMS_st){{endif}}
-    {{if 'CUDA_LAUNCH_PARAMS_v1' in found_types}}
-    if objType == CUDA_LAUNCH_PARAMS_v1:
-        return sizeof(cydriver.CUDA_LAUNCH_PARAMS_v1){{endif}}
-    {{if 'CUDA_LAUNCH_PARAMS' in found_types}}
-    if objType == CUDA_LAUNCH_PARAMS:
-        return sizeof(cydriver.CUDA_LAUNCH_PARAMS){{endif}}
-    {{if 'struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_HANDLE_DESC' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_HANDLE_DESC:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC){{endif}}
-    {{if 'struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_BUFFER_DESC' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_BUFFER_DESC:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC){{endif}}
-    {{if 'struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC' in found_types}}
-    if objType == CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC:
-        return sizeof(cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC){{endif}}
-    {{if 'struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC){{endif}}
-    {{if 'struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS){{endif}}
-    {{if 'struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1){{endif}}
-    {{if 'CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS' in found_types}}
-    if objType == CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS:
-        return sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS){{endif}}
-    {{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS' in found_types}}
-    if objType == CUDA_EXT_SEM_SIGNAL_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2){{endif}}
-    {{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_EXT_SEM_WAIT_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS' in found_types}}
-    if objType == CUDA_EXT_SEM_WAIT_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2){{endif}}
-    {{if 'CUmemGenericAllocationHandle_v1' in found_types}}
-    if objType == CUmemGenericAllocationHandle_v1:
-        return sizeof(cydriver.CUmemGenericAllocationHandle_v1){{endif}}
-    {{if 'CUmemGenericAllocationHandle' in found_types}}
-    if objType == CUmemGenericAllocationHandle:
-        return sizeof(cydriver.CUmemGenericAllocationHandle){{endif}}
-    {{if 'struct CUarrayMapInfo_st' in found_types}}
-    if objType == CUarrayMapInfo_st:
-        return sizeof(cydriver.CUarrayMapInfo_st){{endif}}
-    {{if 'CUarrayMapInfo_v1' in found_types}}
-    if objType == CUarrayMapInfo_v1:
-        return sizeof(cydriver.CUarrayMapInfo_v1){{endif}}
-    {{if 'CUarrayMapInfo' in found_types}}
-    if objType == CUarrayMapInfo:
-        return sizeof(cydriver.CUarrayMapInfo){{endif}}
-    {{if 'struct CUmemLocation_st' in found_types}}
-    if objType == CUmemLocation_st:
-        return sizeof(cydriver.CUmemLocation_st){{endif}}
-    {{if 'CUmemLocation_v1' in found_types}}
-    if objType == CUmemLocation_v1:
-        return sizeof(cydriver.CUmemLocation_v1){{endif}}
-    {{if 'CUmemLocation' in found_types}}
-    if objType == CUmemLocation:
-        return sizeof(cydriver.CUmemLocation){{endif}}
-    {{if 'struct CUmemAllocationProp_st' in found_types}}
-    if objType == CUmemAllocationProp_st:
-        return sizeof(cydriver.CUmemAllocationProp_st){{endif}}
-    {{if 'CUmemAllocationProp_v1' in found_types}}
-    if objType == CUmemAllocationProp_v1:
-        return sizeof(cydriver.CUmemAllocationProp_v1){{endif}}
-    {{if 'CUmemAllocationProp' in found_types}}
-    if objType == CUmemAllocationProp:
-        return sizeof(cydriver.CUmemAllocationProp){{endif}}
-    {{if 'struct CUmulticastObjectProp_st' in found_types}}
-    if objType == CUmulticastObjectProp_st:
-        return sizeof(cydriver.CUmulticastObjectProp_st){{endif}}
-    {{if 'CUmulticastObjectProp_v1' in found_types}}
-    if objType == CUmulticastObjectProp_v1:
-        return sizeof(cydriver.CUmulticastObjectProp_v1){{endif}}
-    {{if 'CUmulticastObjectProp' in found_types}}
-    if objType == CUmulticastObjectProp:
-        return sizeof(cydriver.CUmulticastObjectProp){{endif}}
-    {{if 'struct CUmemAccessDesc_st' in found_types}}
-    if objType == CUmemAccessDesc_st:
-        return sizeof(cydriver.CUmemAccessDesc_st){{endif}}
-    {{if 'CUmemAccessDesc_v1' in found_types}}
-    if objType == CUmemAccessDesc_v1:
-        return sizeof(cydriver.CUmemAccessDesc_v1){{endif}}
-    {{if 'CUmemAccessDesc' in found_types}}
-    if objType == CUmemAccessDesc:
-        return sizeof(cydriver.CUmemAccessDesc){{endif}}
-    {{if 'struct CUgraphExecUpdateResultInfo_st' in found_types}}
-    if objType == CUgraphExecUpdateResultInfo_st:
-        return sizeof(cydriver.CUgraphExecUpdateResultInfo_st){{endif}}
-    {{if 'CUgraphExecUpdateResultInfo_v1' in found_types}}
-    if objType == CUgraphExecUpdateResultInfo_v1:
-        return sizeof(cydriver.CUgraphExecUpdateResultInfo_v1){{endif}}
-    {{if 'CUgraphExecUpdateResultInfo' in found_types}}
-    if objType == CUgraphExecUpdateResultInfo:
-        return sizeof(cydriver.CUgraphExecUpdateResultInfo){{endif}}
-    {{if 'struct CUmemPoolProps_st' in found_types}}
-    if objType == CUmemPoolProps_st:
-        return sizeof(cydriver.CUmemPoolProps_st){{endif}}
-    {{if 'CUmemPoolProps_v1' in found_types}}
-    if objType == CUmemPoolProps_v1:
-        return sizeof(cydriver.CUmemPoolProps_v1){{endif}}
-    {{if 'CUmemPoolProps' in found_types}}
-    if objType == CUmemPoolProps:
-        return sizeof(cydriver.CUmemPoolProps){{endif}}
-    {{if 'struct CUmemPoolPtrExportData_st' in found_types}}
-    if objType == CUmemPoolPtrExportData_st:
-        return sizeof(cydriver.CUmemPoolPtrExportData_st){{endif}}
-    {{if 'CUmemPoolPtrExportData_v1' in found_types}}
-    if objType == CUmemPoolPtrExportData_v1:
-        return sizeof(cydriver.CUmemPoolPtrExportData_v1){{endif}}
-    {{if 'CUmemPoolPtrExportData' in found_types}}
-    if objType == CUmemPoolPtrExportData:
-        return sizeof(cydriver.CUmemPoolPtrExportData){{endif}}
-    {{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v1_st' in found_types}}
-    if objType == CUDA_MEM_ALLOC_NODE_PARAMS_v1_st:
-        return sizeof(cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v1_st){{endif}}
-    {{if 'CUDA_MEM_ALLOC_NODE_PARAMS_v1' in found_types}}
-    if objType == CUDA_MEM_ALLOC_NODE_PARAMS_v1:
-        return sizeof(cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v1){{endif}}
-    {{if 'CUDA_MEM_ALLOC_NODE_PARAMS' in found_types}}
-    if objType == CUDA_MEM_ALLOC_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_MEM_ALLOC_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_MEM_ALLOC_NODE_PARAMS_v2_st' in found_types}}
-    if objType == CUDA_MEM_ALLOC_NODE_PARAMS_v2_st:
-        return sizeof(cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2_st){{endif}}
-    {{if 'CUDA_MEM_ALLOC_NODE_PARAMS_v2' in found_types}}
-    if objType == CUDA_MEM_ALLOC_NODE_PARAMS_v2:
-        return sizeof(cydriver.CUDA_MEM_ALLOC_NODE_PARAMS_v2){{endif}}
-    {{if 'struct CUDA_MEM_FREE_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_MEM_FREE_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_MEM_FREE_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_MEM_FREE_NODE_PARAMS' in found_types}}
-    if objType == CUDA_MEM_FREE_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_MEM_FREE_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_CHILD_GRAPH_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_CHILD_GRAPH_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_CHILD_GRAPH_NODE_PARAMS' in found_types}}
-    if objType == CUDA_CHILD_GRAPH_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_CHILD_GRAPH_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_EVENT_RECORD_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_EVENT_RECORD_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_EVENT_RECORD_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_EVENT_RECORD_NODE_PARAMS' in found_types}}
-    if objType == CUDA_EVENT_RECORD_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_EVENT_RECORD_NODE_PARAMS){{endif}}
-    {{if 'struct CUDA_EVENT_WAIT_NODE_PARAMS_st' in found_types}}
-    if objType == CUDA_EVENT_WAIT_NODE_PARAMS_st:
-        return sizeof(cydriver.CUDA_EVENT_WAIT_NODE_PARAMS_st){{endif}}
-    {{if 'CUDA_EVENT_WAIT_NODE_PARAMS' in found_types}}
-    if objType == CUDA_EVENT_WAIT_NODE_PARAMS:
-        return sizeof(cydriver.CUDA_EVENT_WAIT_NODE_PARAMS){{endif}}
-    {{if 'struct CUgraphNodeParams_st' in found_types}}
-    if objType == CUgraphNodeParams_st:
-        return sizeof(cydriver.CUgraphNodeParams_st){{endif}}
-    {{if 'CUgraphNodeParams' in found_types}}
-    if objType == CUgraphNodeParams:
-        return sizeof(cydriver.CUgraphNodeParams){{endif}}
-    {{if 'CUdevResourceDesc' in found_types}}
-    if objType == CUdevResourceDesc:
-        return sizeof(cydriver.CUdevResourceDesc){{endif}}
-    {{if 'struct CUdevSmResource_st' in found_types}}
-    if objType == CUdevSmResource_st:
-        return sizeof(cydriver.CUdevSmResource_st){{endif}}
-    {{if 'CUdevSmResource' in found_types}}
-    if objType == CUdevSmResource:
-        return sizeof(cydriver.CUdevSmResource){{endif}}
-    {{if 'struct CUdevResource_st' in found_types}}
-    if objType == CUdevResource_st:
-        return sizeof(cydriver.CUdevResource_st){{endif}}
-    {{if 'struct CUdevResource_st' in found_types}}
-    if objType == CUdevResource_v1:
-        return sizeof(cydriver.CUdevResource_v1){{endif}}
-    {{if 'struct CUdevResource_st' in found_types}}
-    if objType == CUdevResource:
-        return sizeof(cydriver.CUdevResource){{endif}}
-    {{if True}}
-    if objType == CUeglFrame_st:
-        return sizeof(cydriver.CUeglFrame_st){{endif}}
-    {{if True}}
-    if objType == CUeglFrame_v1:
-        return sizeof(cydriver.CUeglFrame_v1){{endif}}
-    {{if True}}
-    if objType == CUeglFrame:
-        return sizeof(cydriver.CUeglFrame){{endif}}
-    {{if True}}
-    if objType == CUeglStreamConnection:
-        return sizeof(cydriver.CUeglStreamConnection){{endif}}
-    {{if True}}
-    if objType == GLenum:
-        return sizeof(cydriver.GLenum){{endif}}
-    {{if True}}
-    if objType == GLuint:
-        return sizeof(cydriver.GLuint){{endif}}
-    {{if True}}
-    if objType == EGLImageKHR:
-        return sizeof(cydriver.EGLImageKHR){{endif}}
-    {{if True}}
-    if objType == EGLStreamKHR:
-        return sizeof(cydriver.EGLStreamKHR){{endif}}
-    {{if True}}
-    if objType == EGLint:
-        return sizeof(cydriver.EGLint){{endif}}
-    {{if True}}
-    if objType == EGLSyncKHR:
-        return sizeof(cydriver.EGLSyncKHR){{endif}}
-    {{if True}}
-    if objType == VdpDevice:
-        return sizeof(cydriver.VdpDevice){{endif}}
-    {{if True}}
-    if objType == VdpGetProcAddress:
-        return sizeof(cydriver.VdpGetProcAddress){{endif}}
-    {{if True}}
-    if objType == VdpVideoSurface:
-        return sizeof(cydriver.VdpVideoSurface){{endif}}
-    {{if True}}
-    if objType == VdpOutputSurface:
-        return sizeof(cydriver.VdpOutputSurface){{endif}}
-    raise TypeError("Unknown type: " + str(objType))
diff --git a/cuda_bindings/cuda/bindings/nvrtc.pxd.in b/cuda_bindings/cuda/bindings/nvrtc.pxd.in
deleted file mode 100644
index fdd23e2f..00000000
--- a/cuda_bindings/cuda/bindings/nvrtc.pxd.in
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings.cynvrtc as cynvrtc
-cimport cuda.bindings._lib.utils as utils
-
-{{if 'nvrtcProgram' in found_types}}
-
-cdef class nvrtcProgram:
-    """ nvrtcProgram is the unit of compilation, and an opaque handle for a program.
-
-    To compile a CUDA program string, an instance of nvrtcProgram must be created first with nvrtcCreateProgram, then compiled with nvrtcCompileProgram.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cynvrtc.nvrtcProgram  __val
-    cdef cynvrtc.nvrtcProgram* _ptr
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/nvrtc.pyx.in b/cuda_bindings/cuda/bindings/nvrtc.pyx.in
deleted file mode 100644
index 73e2adeb..00000000
--- a/cuda_bindings/cuda/bindings/nvrtc.pyx.in
+++ /dev/null
@@ -1,882 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from typing import List, Tuple, Any, Optional
-from enum import IntEnum
-import cython
-import ctypes
-from libc.stdlib cimport calloc, free
-from libc cimport string
-from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
-from libc.stddef cimport wchar_t
-from libc.limits cimport CHAR_MIN
-from libcpp.vector cimport vector
-from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
-from cpython.bytes cimport PyBytes_FromStringAndSize
-
-ctypedef unsigned long long signed_char_ptr
-ctypedef unsigned long long unsigned_char_ptr
-ctypedef unsigned long long char_ptr
-ctypedef unsigned long long short_ptr
-ctypedef unsigned long long unsigned_short_ptr
-ctypedef unsigned long long int_ptr
-ctypedef unsigned long long long_int_ptr
-ctypedef unsigned long long long_long_int_ptr
-ctypedef unsigned long long unsigned_int_ptr
-ctypedef unsigned long long unsigned_long_int_ptr
-ctypedef unsigned long long unsigned_long_long_int_ptr
-ctypedef unsigned long long uint32_t_ptr
-ctypedef unsigned long long uint64_t_ptr
-ctypedef unsigned long long int32_t_ptr
-ctypedef unsigned long long int64_t_ptr
-ctypedef unsigned long long unsigned_ptr
-ctypedef unsigned long long unsigned_long_long_ptr
-ctypedef unsigned long long long_long_ptr
-ctypedef unsigned long long size_t_ptr
-ctypedef unsigned long long float_ptr
-ctypedef unsigned long long double_ptr
-ctypedef unsigned long long void_ptr
-
-
-{{if 'nvrtcResult' in found_types}}
-
-class nvrtcResult(IntEnum):
-    """
-    The enumerated type nvrtcResult defines API call result codes.
-    NVRTC API functions return nvrtcResult to indicate the call result.
-    """
-    {{if 'NVRTC_SUCCESS' in found_values}}
-    NVRTC_SUCCESS = cynvrtc.nvrtcResult.NVRTC_SUCCESS{{endif}}
-    {{if 'NVRTC_ERROR_OUT_OF_MEMORY' in found_values}}
-    NVRTC_ERROR_OUT_OF_MEMORY = cynvrtc.nvrtcResult.NVRTC_ERROR_OUT_OF_MEMORY{{endif}}
-    {{if 'NVRTC_ERROR_PROGRAM_CREATION_FAILURE' in found_values}}
-    NVRTC_ERROR_PROGRAM_CREATION_FAILURE = cynvrtc.nvrtcResult.NVRTC_ERROR_PROGRAM_CREATION_FAILURE{{endif}}
-    {{if 'NVRTC_ERROR_INVALID_INPUT' in found_values}}
-    NVRTC_ERROR_INVALID_INPUT = cynvrtc.nvrtcResult.NVRTC_ERROR_INVALID_INPUT{{endif}}
-    {{if 'NVRTC_ERROR_INVALID_PROGRAM' in found_values}}
-    NVRTC_ERROR_INVALID_PROGRAM = cynvrtc.nvrtcResult.NVRTC_ERROR_INVALID_PROGRAM{{endif}}
-    {{if 'NVRTC_ERROR_INVALID_OPTION' in found_values}}
-    NVRTC_ERROR_INVALID_OPTION = cynvrtc.nvrtcResult.NVRTC_ERROR_INVALID_OPTION{{endif}}
-    {{if 'NVRTC_ERROR_COMPILATION' in found_values}}
-    NVRTC_ERROR_COMPILATION = cynvrtc.nvrtcResult.NVRTC_ERROR_COMPILATION{{endif}}
-    {{if 'NVRTC_ERROR_BUILTIN_OPERATION_FAILURE' in found_values}}
-    NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = cynvrtc.nvrtcResult.NVRTC_ERROR_BUILTIN_OPERATION_FAILURE{{endif}}
-    {{if 'NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION' in found_values}}
-    NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = cynvrtc.nvrtcResult.NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION{{endif}}
-    {{if 'NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION' in found_values}}
-    NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = cynvrtc.nvrtcResult.NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION{{endif}}
-    {{if 'NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID' in found_values}}
-    NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = cynvrtc.nvrtcResult.NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID{{endif}}
-    {{if 'NVRTC_ERROR_INTERNAL_ERROR' in found_values}}
-    NVRTC_ERROR_INTERNAL_ERROR = cynvrtc.nvrtcResult.NVRTC_ERROR_INTERNAL_ERROR{{endif}}
-    {{if 'NVRTC_ERROR_TIME_FILE_WRITE_FAILED' in found_values}}
-    NVRTC_ERROR_TIME_FILE_WRITE_FAILED = cynvrtc.nvrtcResult.NVRTC_ERROR_TIME_FILE_WRITE_FAILED{{endif}}
-{{endif}}
-{{if 'nvrtcProgram' in found_types}}
-
-cdef class nvrtcProgram:
-    """ nvrtcProgram is the unit of compilation, and an opaque handle for a program.
-
-    To compile a CUDA program string, an instance of nvrtcProgram must be created first with nvrtcCreateProgram, then compiled with nvrtcCompileProgram.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cynvrtc.nvrtcProgram>init_value
-        else:
-            self._ptr = <cynvrtc.nvrtcProgram *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<nvrtcProgram ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'nvrtcGetErrorString' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetErrorString(result not None : nvrtcResult):
-    """ nvrtcGetErrorString is a helper function that returns a string describing the given nvrtcResult code, e.g., NVRTC_SUCCESS to `"NVRTC_SUCCESS"`. For unrecognized enumeration values, it returns `"NVRTC_ERROR unknown"`.
-
-    Parameters
-    ----------
-    result : :py:obj:`~.nvrtcResult`
-        CUDA Runtime Compilation API result code.
-
-    Returns
-    -------
-    nvrtcResult.NVRTC_SUCCESS
-        nvrtcResult.NVRTC_SUCCESS
-    bytes
-        Message string for the given :py:obj:`~.nvrtcResult` code.
-    """
-    cdef cynvrtc.nvrtcResult cyresult = result.value
-    err = cynvrtc.nvrtcGetErrorString(cyresult)
-    return (nvrtcResult.NVRTC_SUCCESS, err)
-{{endif}}
-
-{{if 'nvrtcVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcVersion():
-    """ nvrtcVersion sets the output parameters `major` and `minor` with the CUDA Runtime Compilation version number.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-    major : int
-        CUDA Runtime Compilation major version number.
-    minor : int
-        CUDA Runtime Compilation minor version number.
-    """
-    cdef int major = 0
-    cdef int minor = 0
-    err = cynvrtc.nvrtcVersion(&major, &minor)
-    return (nvrtcResult(err), major, minor)
-{{endif}}
-
-{{if 'nvrtcGetNumSupportedArchs' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetNumSupportedArchs():
-    """ nvrtcGetNumSupportedArchs sets the output parameter `numArchs` with the number of architectures supported by NVRTC. This can then be used to pass an array to :py:obj:`~.nvrtcGetSupportedArchs` to get the supported architectures.
-
-    see :py:obj:`~.nvrtcGetSupportedArchs`
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-    numArchs : int
-        number of supported architectures.
-    """
-    cdef int numArchs = 0
-    err = cynvrtc.nvrtcGetNumSupportedArchs(&numArchs)
-    return (nvrtcResult(err), numArchs)
-{{endif}}
-
-{{if 'nvrtcGetSupportedArchs' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetSupportedArchs():
-    """ nvrtcGetSupportedArchs populates the array passed via the output parameter `supportedArchs` with the architectures supported by NVRTC. The array is sorted in the ascending order. The size of the array to be passed can be determined using :py:obj:`~.nvrtcGetNumSupportedArchs`.
-
-    see :py:obj:`~.nvrtcGetNumSupportedArchs`
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-    supportedArchs : List[int]
-        sorted array of supported architectures.
-    """
-    cdef vector[int] supportedArchs
-    _, s = nvrtcGetNumSupportedArchs()
-    supportedArchs.resize(s)
-
-    err = cynvrtc.nvrtcGetSupportedArchs(supportedArchs.data())
-    return (nvrtcResult(err), supportedArchs)
-{{endif}}
-
-{{if 'nvrtcCreateProgram' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcCreateProgram(char* src, char* name, int numHeaders, headers : Optional[Tuple[bytes] | List[bytes]], includeNames : Optional[Tuple[bytes] | List[bytes]]):
-    """ nvrtcCreateProgram creates an instance of nvrtcProgram with the given input parameters, and sets the output parameter `prog` with it.
-
-    Parameters
-    ----------
-    src : bytes
-        CUDA program source.
-    name : bytes
-        CUDA program name.  `name` can be `NULL`; `"default_program"` is
-        used when `name` is `NULL` or "".
-    numHeaders : int
-        Number of headers used.  `numHeaders` must be greater than or equal
-        to 0.
-    headers : List[bytes]
-        Sources of the headers.  `headers` can be `NULL` when `numHeaders`
-        is 0.
-    includeNames : List[bytes]
-        Name of each header by which they can be included in the CUDA
-        program source.  `includeNames` can be `NULL` when `numHeaders` is
-        0. These headers must be included with the exact names specified
-        here.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_OUT_OF_MEMORY`
-        - :py:obj:`~.NVRTC_ERROR_PROGRAM_CREATION_FAILURE`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcDestroyProgram`
-    """
-    includeNames = [] if includeNames is None else includeNames
-    if not all(isinstance(_x, (bytes)) for _x in includeNames):
-        raise TypeError("Argument 'includeNames' is not instance of type (expected Tuple[bytes] or List[bytes]")
-    headers = [] if headers is None else headers
-    if not all(isinstance(_x, (bytes)) for _x in headers):
-        raise TypeError("Argument 'headers' is not instance of type (expected Tuple[bytes] or List[bytes]")
-    cdef nvrtcProgram prog = nvrtcProgram()
-    if numHeaders > len(headers): raise RuntimeError("List is too small: " + str(len(headers)) + " < " + str(numHeaders))
-    if numHeaders > len(includeNames): raise RuntimeError("List is too small: " + str(len(includeNames)) + " < " + str(numHeaders))
-    cdef vector[const char*] cyheaders = headers
-    cdef vector[const char*] cyincludeNames = includeNames
-    err = cynvrtc.nvrtcCreateProgram(<cynvrtc.nvrtcProgram*>prog._ptr, src, name, numHeaders, cyheaders.data(), cyincludeNames.data())
-    return (nvrtcResult(err), prog)
-{{endif}}
-
-{{if 'nvrtcDestroyProgram' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcDestroyProgram(prog):
-    """ nvrtcDestroyProgram destroys the given program.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcCreateProgram`
-    """
-    cdef cynvrtc.nvrtcProgram *cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram*><void_ptr>NULL
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = prog.getPtr()
-        cyprog = <cynvrtc.nvrtcProgram*><void_ptr>pprog
-    elif isinstance(prog, (int)):
-        cyprog = <cynvrtc.nvrtcProgram*><void_ptr>prog
-    else:
-        raise TypeError("Argument 'prog' is not instance of type (expected <class 'int, nvrtc.nvrtcProgram'>, found " + str(type(prog)))
-    err = cynvrtc.nvrtcDestroyProgram(cyprog)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcCompileProgram' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcCompileProgram(prog, int numOptions, options : Optional[Tuple[bytes] | List[bytes]]):
-    """ nvrtcCompileProgram compiles the given program.
-
-    It supports compile options listed in :py:obj:`~.Supported Compile
-    Options`.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    numOptions : int
-        Number of compiler options passed.
-    options : List[bytes]
-        Compiler options in the form of C string array.  `options` can be
-        `NULL` when `numOptions` is 0.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_OUT_OF_MEMORY`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_OPTION`
-        - :py:obj:`~.NVRTC_ERROR_COMPILATION`
-        - :py:obj:`~.NVRTC_ERROR_BUILTIN_OPERATION_FAILURE`
-        - :py:obj:`~.NVRTC_ERROR_TIME_FILE_WRITE_FAILED`
-    """
-    options = [] if options is None else options
-    if not all(isinstance(_x, (bytes)) for _x in options):
-        raise TypeError("Argument 'options' is not instance of type (expected Tuple[bytes] or List[bytes]")
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions))
-    cdef vector[const char*] cyoptions = options
-    err = cynvrtc.nvrtcCompileProgram(cyprog, numOptions, cyoptions.data())
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetPTXSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetPTXSize(prog):
-    """ nvrtcGetPTXSize sets the value of `ptxSizeRet` with the size of the PTX generated by the previous compilation of `prog` (including the trailing `NULL`).
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    ptxSizeRet : int
-        Size of the generated PTX (including the trailing `NULL`).
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetPTX`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t ptxSizeRet = 0
-    err = cynvrtc.nvrtcGetPTXSize(cyprog, &ptxSizeRet)
-    return (nvrtcResult(err), ptxSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetPTX' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetPTX(prog, char* ptx):
-    """ nvrtcGetPTX stores the PTX generated by the previous compilation of `prog` in the memory pointed by `ptx`.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    ptx : bytes
-        Compiled result.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetPTXSize`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetPTX(cyprog, ptx)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetCUBINSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetCUBINSize(prog):
-    """ nvrtcGetCUBINSize sets the value of `cubinSizeRet` with the size of the cubin generated by the previous compilation of `prog`. The value of cubinSizeRet is set to 0 if the value specified to `-arch` is a virtual architecture instead of an actual architecture.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    cubinSizeRet : int
-        Size of the generated cubin.
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetCUBIN`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t cubinSizeRet = 0
-    err = cynvrtc.nvrtcGetCUBINSize(cyprog, &cubinSizeRet)
-    return (nvrtcResult(err), cubinSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetCUBIN' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetCUBIN(prog, char* cubin):
-    """ nvrtcGetCUBIN stores the cubin generated by the previous compilation of `prog` in the memory pointed by `cubin`. No cubin is available if the value specified to `-arch` is a virtual architecture instead of an actual architecture.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    cubin : bytes
-        Compiled and assembled result.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetCUBINSize`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetCUBIN(cyprog, cubin)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetNVVMSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetNVVMSize(prog):
-    """ DEPRECATION NOTICE: This function will be removed in a future release. Please use nvrtcGetLTOIRSize (and nvrtcGetLTOIR) instead.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        None
-
-    Returns
-    -------
-    nvrtcResult
-
-    nvvmSizeRet : int
-        None
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t nvvmSizeRet = 0
-    err = cynvrtc.nvrtcGetNVVMSize(cyprog, &nvvmSizeRet)
-    return (nvrtcResult(err), nvvmSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetNVVM' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetNVVM(prog, char* nvvm):
-    """ DEPRECATION NOTICE: This function will be removed in a future release. Please use nvrtcGetLTOIR (and nvrtcGetLTOIRSize) instead.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        None
-    nvvm : bytes
-        None
-
-    Returns
-    -------
-    nvrtcResult
-
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetNVVM(cyprog, nvvm)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetLTOIRSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetLTOIRSize(prog):
-    """ nvrtcGetLTOIRSize sets the value of `LTOIRSizeRet` with the size of the LTO IR generated by the previous compilation of `prog`. The value of LTOIRSizeRet is set to 0 if the program was not compiled with `-dlto`.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    LTOIRSizeRet : int
-        Size of the generated LTO IR.
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetLTOIR`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t LTOIRSizeRet = 0
-    err = cynvrtc.nvrtcGetLTOIRSize(cyprog, &LTOIRSizeRet)
-    return (nvrtcResult(err), LTOIRSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetLTOIR' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetLTOIR(prog, char* LTOIR):
-    """ nvrtcGetLTOIR stores the LTO IR generated by the previous compilation of `prog` in the memory pointed by `LTOIR`. No LTO IR is available if the program was compiled without `-dlto`.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    LTOIR : bytes
-        Compiled result.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetLTOIRSize`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetLTOIR(cyprog, LTOIR)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetOptiXIRSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetOptiXIRSize(prog):
-    """ nvrtcGetOptiXIRSize sets the value of `optixirSizeRet` with the size of the OptiX IR generated by the previous compilation of `prog`. The value of nvrtcGetOptiXIRSize is set to 0 if the program was compiled with options incompatible with OptiX IR generation.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    optixirSizeRet : int
-        Size of the generated LTO IR.
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetOptiXIR`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t optixirSizeRet = 0
-    err = cynvrtc.nvrtcGetOptiXIRSize(cyprog, &optixirSizeRet)
-    return (nvrtcResult(err), optixirSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetOptiXIR' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetOptiXIR(prog, char* optixir):
-    """ nvrtcGetOptiXIR stores the OptiX IR generated by the previous compilation of `prog` in the memory pointed by `optixir`. No OptiX IR is available if the program was compiled with options incompatible with OptiX IR generation.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    Optix : bytes
-        IR Compiled result.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetOptiXIRSize`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetOptiXIR(cyprog, optixir)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetProgramLogSize' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetProgramLogSize(prog):
-    """ nvrtcGetProgramLogSize sets `logSizeRet` with the size of the log generated by the previous compilation of `prog` (including the trailing `NULL`).
-
-    Note that compilation log may be generated with warnings and
-    informative messages, even when the compilation of `prog` succeeds.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-    logSizeRet : int
-        Size of the compilation log (including the trailing `NULL`).
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetProgramLog`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef size_t logSizeRet = 0
-    err = cynvrtc.nvrtcGetProgramLogSize(cyprog, &logSizeRet)
-    return (nvrtcResult(err), logSizeRet)
-{{endif}}
-
-{{if 'nvrtcGetProgramLog' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetProgramLog(prog, char* log):
-    """ nvrtcGetProgramLog stores the log generated by the previous compilation of `prog` in the memory pointed by `log`.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    log : bytes
-        Compilation log.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetProgramLogSize`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcGetProgramLog(cyprog, log)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcAddNameExpression' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcAddNameExpression(prog, char* name_expression):
-    """ nvrtcAddNameExpression notes the given name expression denoting the address of a global function or device/__constant__ variable.
-
-    The identical name expression string must be provided on a subsequent
-    call to nvrtcGetLoweredName to extract the lowered name.
-
-    Parameters
-    ----------
-    prog : :py:obj:`~.nvrtcProgram`
-        CUDA Runtime Compilation program.
-    name_expression : bytes
-        constant expression denoting the address of a global function or
-        device/__constant__ variable.
-
-    Returns
-    -------
-    nvrtcResult
-        - :py:obj:`~.NVRTC_SUCCESS`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM`
-        - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT`
-        - :py:obj:`~.NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION`
-
-    See Also
-    --------
-    :py:obj:`~.nvrtcGetLoweredName`
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    err = cynvrtc.nvrtcAddNameExpression(cyprog, name_expression)
-    return (nvrtcResult(err),)
-{{endif}}
-
-{{if 'nvrtcGetLoweredName' in found_functions}}
-
-@cython.embedsignature(True)
-def nvrtcGetLoweredName(prog, char* name_expression):
-    """ nvrtcGetLoweredName extracts the lowered (mangled) name for a global function or device/__constant__ variable, and updates lowered_name to point to it. The memory containing the name is released when the NVRTC program is destroyed by nvrtcDestroyProgram. The identical name expression must have been previously provided to nvrtcAddNameExpression.
-
-    Parameters
-    ----------
-    prog : nvrtcProgram
-        CUDA Runtime Compilation program.
-    name_expression : bytes
-        constant expression denoting the address of a global function or
-        device/__constant__ variable.
-
-    Returns
-    -------
-    nvrtcResult
-        NVRTC_SUCCESS
-        NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION
-        NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID
-    lowered_name : bytes
-        initialized by the function to point to a C string containing the
-        lowered (mangled) name corresponding to the provided name
-        expression.
-
-    See Also
-    --------
-    nvrtcAddNameExpression
-    """
-    cdef cynvrtc.nvrtcProgram cyprog
-    if prog is None:
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>0
-    elif isinstance(prog, (nvrtcProgram,)):
-        pprog = int(prog)
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    else:
-        pprog = int(nvrtcProgram(prog))
-        cyprog = <cynvrtc.nvrtcProgram><void_ptr>pprog
-    cdef const char* lowered_name = NULL
-    err = cynvrtc.nvrtcGetLoweredName(cyprog, name_expression, &lowered_name)
-    return (nvrtcResult(err), <bytes>lowered_name)
-{{endif}}
-
-@cython.embedsignature(True)
-def sizeof(objType):
-    """ Returns the size of provided CUDA Python structure in bytes
-
-    Parameters
-    ----------
-    objType : Any
-        CUDA Python object
-
-    Returns
-    -------
-    lowered_name : int
-        The size of `objType` in bytes
-    """
-    {{if 'nvrtcProgram' in found_types}}
-    if objType == nvrtcProgram:
-        return sizeof(cynvrtc.nvrtcProgram){{endif}}
-    raise TypeError("Unknown type: " + str(objType))
diff --git a/cuda_bindings/cuda/bindings/runtime.pxd.in b/cuda_bindings/cuda/bindings/runtime.pxd.in
deleted file mode 100644
index a3727e18..00000000
--- a/cuda_bindings/cuda/bindings/runtime.pxd.in
+++ /dev/null
@@ -1,3645 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-cimport cuda.bindings.cyruntime as cyruntime
-cimport cuda.bindings._lib.utils as utils
-cimport cuda.bindings.driver as driver
-
-{{if 'cudaArray_t' in found_types}}
-
-cdef class cudaArray_t:
-    """
-
-    CUDA array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaArray_t  __val
-    cdef cyruntime.cudaArray_t* _ptr
-{{endif}}
-
-{{if 'cudaArray_const_t' in found_types}}
-
-cdef class cudaArray_const_t:
-    """
-
-    CUDA array (as source copy argument)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaArray_const_t  __val
-    cdef cyruntime.cudaArray_const_t* _ptr
-{{endif}}
-
-{{if 'cudaMipmappedArray_t' in found_types}}
-
-cdef class cudaMipmappedArray_t:
-    """
-
-    CUDA mipmapped array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMipmappedArray_t  __val
-    cdef cyruntime.cudaMipmappedArray_t* _ptr
-{{endif}}
-
-{{if 'cudaMipmappedArray_const_t' in found_types}}
-
-cdef class cudaMipmappedArray_const_t:
-    """
-
-    CUDA mipmapped array (as source argument)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMipmappedArray_const_t  __val
-    cdef cyruntime.cudaMipmappedArray_const_t* _ptr
-{{endif}}
-
-{{if 'cudaGraphicsResource_t' in found_types}}
-
-cdef class cudaGraphicsResource_t:
-    """
-
-    CUDA graphics resource types
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphicsResource_t  __val
-    cdef cyruntime.cudaGraphicsResource_t* _ptr
-{{endif}}
-
-{{if 'cudaExternalMemory_t' in found_types}}
-
-cdef class cudaExternalMemory_t:
-    """
-
-    CUDA external memory
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemory_t  __val
-    cdef cyruntime.cudaExternalMemory_t* _ptr
-{{endif}}
-
-{{if 'cudaExternalSemaphore_t' in found_types}}
-
-cdef class cudaExternalSemaphore_t:
-    """
-
-    CUDA external semaphore
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphore_t  __val
-    cdef cyruntime.cudaExternalSemaphore_t* _ptr
-{{endif}}
-
-{{if 'cudaKernel_t' in found_types}}
-
-cdef class cudaKernel_t:
-    """
-
-    CUDA kernel
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaKernel_t  __val
-    cdef cyruntime.cudaKernel_t* _ptr
-{{endif}}
-
-{{if 'cudaGraphDeviceNode_t' in found_types}}
-
-cdef class cudaGraphDeviceNode_t:
-    """
-
-    CUDA device node handle for device-side node update
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphDeviceNode_t  __val
-    cdef cyruntime.cudaGraphDeviceNode_t* _ptr
-{{endif}}
-
-{{if 'cudaAsyncCallbackHandle_t' in found_types}}
-
-cdef class cudaAsyncCallbackHandle_t:
-    """
-
-    CUDA async callback handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAsyncCallbackHandle_t  __val
-    cdef cyruntime.cudaAsyncCallbackHandle_t* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLImageKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.EGLImageKHR  __val
-    cdef cyruntime.EGLImageKHR* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLStreamKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.EGLStreamKHR  __val
-    cdef cyruntime.EGLStreamKHR* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLSyncKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.EGLSyncKHR  __val
-    cdef cyruntime.EGLSyncKHR* _ptr
-{{endif}}
-
-{{if 'cudaHostFn_t' in found_types}}
-
-cdef class cudaHostFn_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaHostFn_t  __val
-    cdef cyruntime.cudaHostFn_t* _ptr
-{{endif}}
-
-{{if 'cudaAsyncCallback' in found_types}}
-
-cdef class cudaAsyncCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAsyncCallback  __val
-    cdef cyruntime.cudaAsyncCallback* _ptr
-{{endif}}
-
-{{if 'cudaStreamCallback_t' in found_types}}
-
-cdef class cudaStreamCallback_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaStreamCallback_t  __val
-    cdef cyruntime.cudaStreamCallback_t* _ptr
-{{endif}}
-
-{{if 'struct dim3' in found_types}}
-
-cdef class dim3:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.dim3 __val
-    cdef cyruntime.dim3* _ptr
-{{endif}}
-{{if 'struct cudaChannelFormatDesc' in found_types}}
-
-cdef class cudaChannelFormatDesc:
-    """
-    CUDA Channel format descriptor
-
-    Attributes
-    ----------
-    x : int
-        x
-    y : int
-        y
-    z : int
-        z
-    w : int
-        w
-    f : cudaChannelFormatKind
-        Channel format kind
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaChannelFormatDesc __val
-    cdef cyruntime.cudaChannelFormatDesc* _ptr
-{{endif}}
-{{if 'struct cudaArraySparseProperties' in found_types}}
-
-cdef class anon_struct0:
-    """
-    Attributes
-    ----------
-    width : unsigned int
-
-    height : unsigned int
-
-    depth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaArraySparseProperties* _ptr
-{{endif}}
-{{if 'struct cudaArraySparseProperties' in found_types}}
-
-cdef class cudaArraySparseProperties:
-    """
-    Sparse CUDA array and CUDA mipmapped array properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct0
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or cudaArraySparsePropertiesSingleMipTail
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaArraySparseProperties __val
-    cdef cyruntime.cudaArraySparseProperties* _ptr
-    cdef anon_struct0 _tileExtent
-{{endif}}
-{{if 'struct cudaArrayMemoryRequirements' in found_types}}
-
-cdef class cudaArrayMemoryRequirements:
-    """
-    CUDA array and CUDA mipmapped array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total size of the array.
-    alignment : size_t
-        Alignment necessary for mapping the array.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaArrayMemoryRequirements __val
-    cdef cyruntime.cudaArrayMemoryRequirements* _ptr
-{{endif}}
-{{if 'struct cudaPitchedPtr' in found_types}}
-
-cdef class cudaPitchedPtr:
-    """
-    CUDA Pitched memory pointer  ::make_cudaPitchedPtr
-
-    Attributes
-    ----------
-    ptr : Any
-        Pointer to allocated memory
-    pitch : size_t
-        Pitch of allocated memory in bytes
-    xsize : size_t
-        Logical width of allocation in elements
-    ysize : size_t
-        Logical height of allocation in elements
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaPitchedPtr __val
-    cdef cyruntime.cudaPitchedPtr* _ptr
-{{endif}}
-{{if 'struct cudaExtent' in found_types}}
-
-cdef class cudaExtent:
-    """
-    CUDA extent  ::make_cudaExtent
-
-    Attributes
-    ----------
-    width : size_t
-        Width in elements when referring to array memory, in bytes when
-        referring to linear memory
-    height : size_t
-        Height in elements
-    depth : size_t
-        Depth in elements
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExtent __val
-    cdef cyruntime.cudaExtent* _ptr
-{{endif}}
-{{if 'struct cudaPos' in found_types}}
-
-cdef class cudaPos:
-    """
-    CUDA 3D position  ::make_cudaPos
-
-    Attributes
-    ----------
-    x : size_t
-        x
-    y : size_t
-        y
-    z : size_t
-        z
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaPos __val
-    cdef cyruntime.cudaPos* _ptr
-{{endif}}
-{{if 'struct cudaMemcpy3DParms' in found_types}}
-
-cdef class cudaMemcpy3DParms:
-    """
-    CUDA 3D memory copying parameters
-
-    Attributes
-    ----------
-    srcArray : cudaArray_t
-        Source memory address
-    srcPos : cudaPos
-        Source position offset
-    srcPtr : cudaPitchedPtr
-        Pitched source memory address
-    dstArray : cudaArray_t
-        Destination memory address
-    dstPos : cudaPos
-        Destination position offset
-    dstPtr : cudaPitchedPtr
-        Pitched destination memory address
-    extent : cudaExtent
-        Requested memory copy size
-    kind : cudaMemcpyKind
-        Type of transfer
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemcpy3DParms __val
-    cdef cyruntime.cudaMemcpy3DParms* _ptr
-    cdef cudaArray_t _srcArray
-    cdef cudaPos _srcPos
-    cdef cudaPitchedPtr _srcPtr
-    cdef cudaArray_t _dstArray
-    cdef cudaPos _dstPos
-    cdef cudaPitchedPtr _dstPtr
-    cdef cudaExtent _extent
-{{endif}}
-{{if 'struct cudaMemcpyNodeParams' in found_types}}
-
-cdef class cudaMemcpyNodeParams:
-    """
-    Memcpy node parameters
-
-    Attributes
-    ----------
-    flags : int
-        Must be zero
-    reserved : List[int]
-        Must be zero
-    copyParams : cudaMemcpy3DParms
-        Parameters for the memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemcpyNodeParams __val
-    cdef cyruntime.cudaMemcpyNodeParams* _ptr
-    cdef cudaMemcpy3DParms _copyParams
-{{endif}}
-{{if 'struct cudaMemcpy3DPeerParms' in found_types}}
-
-cdef class cudaMemcpy3DPeerParms:
-    """
-    CUDA 3D cross-device memory copying parameters
-
-    Attributes
-    ----------
-    srcArray : cudaArray_t
-        Source memory address
-    srcPos : cudaPos
-        Source position offset
-    srcPtr : cudaPitchedPtr
-        Pitched source memory address
-    srcDevice : int
-        Source device
-    dstArray : cudaArray_t
-        Destination memory address
-    dstPos : cudaPos
-        Destination position offset
-    dstPtr : cudaPitchedPtr
-        Pitched destination memory address
-    dstDevice : int
-        Destination device
-    extent : cudaExtent
-        Requested memory copy size
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemcpy3DPeerParms __val
-    cdef cyruntime.cudaMemcpy3DPeerParms* _ptr
-    cdef cudaArray_t _srcArray
-    cdef cudaPos _srcPos
-    cdef cudaPitchedPtr _srcPtr
-    cdef cudaArray_t _dstArray
-    cdef cudaPos _dstPos
-    cdef cudaPitchedPtr _dstPtr
-    cdef cudaExtent _extent
-{{endif}}
-{{if 'struct cudaMemsetParams' in found_types}}
-
-cdef class cudaMemsetParams:
-    """
-    CUDA Memset node parameters
-
-    Attributes
-    ----------
-    dst : Any
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemsetParams __val
-    cdef cyruntime.cudaMemsetParams* _ptr
-{{endif}}
-{{if 'struct cudaMemsetParamsV2' in found_types}}
-
-cdef class cudaMemsetParamsV2:
-    """
-    CUDA Memset node parameters
-
-    Attributes
-    ----------
-    dst : Any
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemsetParamsV2 __val
-    cdef cyruntime.cudaMemsetParamsV2* _ptr
-{{endif}}
-{{if 'struct cudaAccessPolicyWindow' in found_types}}
-
-cdef class cudaAccessPolicyWindow:
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    Partition into many segments and assign segments such that. sum of
-    "hit segments" / window == approx. ratio. sum of "miss segments" /
-    window == approx 1-ratio. Segments and ratio specifications are
-    fitted to the capabilities of the architecture. Accesses in a hit
-    segment apply the hitProp access policy. Accesses in a miss segment
-    apply the missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : cudaAccessProperty
-        ::CUaccessProperty set for hit.
-    missProp : cudaAccessProperty
-        ::CUaccessProperty set for miss. Must be either NORMAL or
-        STREAMING.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAccessPolicyWindow __val
-    cdef cyruntime.cudaAccessPolicyWindow* _ptr
-{{endif}}
-{{if 'struct cudaHostNodeParams' in found_types}}
-
-cdef class cudaHostNodeParams:
-    """
-    CUDA host node parameters
-
-    Attributes
-    ----------
-    fn : cudaHostFn_t
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaHostNodeParams __val
-    cdef cyruntime.cudaHostNodeParams* _ptr
-    cdef cudaHostFn_t _fn
-{{endif}}
-{{if 'struct cudaHostNodeParamsV2' in found_types}}
-
-cdef class cudaHostNodeParamsV2:
-    """
-    CUDA host node parameters
-
-    Attributes
-    ----------
-    fn : cudaHostFn_t
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaHostNodeParamsV2 __val
-    cdef cyruntime.cudaHostNodeParamsV2* _ptr
-    cdef cudaHostFn_t _fn
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct1:
-    """
-    Attributes
-    ----------
-    array : cudaArray_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef cudaArray_t _array
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct2:
-    """
-    Attributes
-    ----------
-    mipmap : cudaMipmappedArray_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef cudaMipmappedArray_t _mipmap
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct3:
-    """
-    Attributes
-    ----------
-    devPtr : Any
-
-    desc : cudaChannelFormatDesc
-
-    sizeInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef cudaChannelFormatDesc _desc
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct4:
-    """
-    Attributes
-    ----------
-    devPtr : Any
-
-    desc : cudaChannelFormatDesc
-
-    width : size_t
-
-    height : size_t
-
-    pitchInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef cudaChannelFormatDesc _desc
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_union0:
-    """
-    Attributes
-    ----------
-    array : anon_struct1
-
-    mipmap : anon_struct2
-
-    linear : anon_struct3
-
-    pitch2D : anon_struct4
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef anon_struct1 _array
-    cdef anon_struct2 _mipmap
-    cdef anon_struct3 _linear
-    cdef anon_struct4 _pitch2D
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class cudaResourceDesc:
-    """
-    CUDA resource descriptor
-
-    Attributes
-    ----------
-    resType : cudaResourceType
-        Resource type
-    res : anon_union0
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceDesc* _val_ptr
-    cdef cyruntime.cudaResourceDesc* _ptr
-    cdef anon_union0 _res
-{{endif}}
-{{if 'struct cudaResourceViewDesc' in found_types}}
-
-cdef class cudaResourceViewDesc:
-    """
-    CUDA resource view descriptor
-
-    Attributes
-    ----------
-    format : cudaResourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaResourceViewDesc __val
-    cdef cyruntime.cudaResourceViewDesc* _ptr
-{{endif}}
-{{if 'struct cudaPointerAttributes' in found_types}}
-
-cdef class cudaPointerAttributes:
-    """
-    CUDA pointer attributes
-
-    Attributes
-    ----------
-    type : cudaMemoryType
-        The type of memory - cudaMemoryTypeUnregistered,
-        cudaMemoryTypeHost, cudaMemoryTypeDevice or cudaMemoryTypeManaged.
-    device : int
-        The device against which the memory was allocated or registered. If
-        the memory type is cudaMemoryTypeDevice then this identifies the
-        device on which the memory referred physically resides. If the
-        memory type is cudaMemoryTypeHost or::cudaMemoryTypeManaged then
-        this identifies the device which was current when the memory was
-        allocated or registered (and if that device is deinitialized then
-        this allocation will vanish with that device's state).
-    devicePointer : Any
-        The address which may be dereferenced on the current device to
-        access the memory or NULL if no such address exists.
-    hostPointer : Any
-        The address which may be dereferenced on the host to access the
-        memory or NULL if no such address exists.  CUDA doesn't check if
-        unregistered memory is allocated so this field may contain invalid
-        pointer if an invalid pointer has been passed to CUDA.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaPointerAttributes __val
-    cdef cyruntime.cudaPointerAttributes* _ptr
-{{endif}}
-{{if 'struct cudaFuncAttributes' in found_types}}
-
-cdef class cudaFuncAttributes:
-    """
-    CUDA function attributes
-
-    Attributes
-    ----------
-    sharedSizeBytes : size_t
-        The size in bytes of statically-allocated shared memory per block
-        required by this function. This does not include dynamically-
-        allocated shared memory requested by the user at runtime.
-    constSizeBytes : size_t
-        The size in bytes of user-allocated constant memory required by
-        this function.
-    localSizeBytes : size_t
-        The size in bytes of local memory used by each thread of this
-        function.
-    maxThreadsPerBlock : int
-        The maximum number of threads per block, beyond which a launch of
-        the function would fail. This number depends on both the function
-        and the device on which the function is currently loaded.
-    numRegs : int
-        The number of registers used by each thread of this function.
-    ptxVersion : int
-        The PTX virtual architecture version for which the function was
-        compiled. This value is the major PTX version * 10 + the minor PTX
-        version, so a PTX version 1.3 function would return the value 13.
-    binaryVersion : int
-        The binary architecture version for which the function was
-        compiled. This value is the major binary version * 10 + the minor
-        binary version, so a binary version 1.3 function would return the
-        value 13.
-    cacheModeCA : int
-        The attribute to indicate whether the function has been compiled
-        with user specified option "-Xptxas --dlcm=ca" set.
-    maxDynamicSharedSizeBytes : int
-        The maximum size in bytes of dynamic shared memory per block for
-        this function. Any launch must have a dynamic shared memory size
-        smaller than this value.
-    preferredShmemCarveout : int
-        On devices where the L1 cache and shared memory use the same
-        hardware resources, this sets the shared memory carveout
-        preference, in percent of the maximum shared memory. Refer to
-        cudaDevAttrMaxSharedMemoryPerMultiprocessor. This is only a hint,
-        and the driver can choose a different ratio if required to execute
-        the function. See cudaFuncSetAttribute
-    clusterDimMustBeSet : int
-        If this attribute is set, the kernel must launch with a valid
-        cluster dimension specified.
-    requiredClusterWidth : int
-        The required cluster width/height/depth in blocks. The values must
-        either all be 0 or all be positive. The validity of the cluster
-        dimensions is otherwise checked at launch time.  If the value is
-        set during compile time, it cannot be set at runtime. Setting it at
-        runtime should return cudaErrorNotPermitted. See
-        cudaFuncSetAttribute
-    requiredClusterHeight : int
-
-    requiredClusterDepth : int
-
-    clusterSchedulingPolicyPreference : int
-        The block scheduling policy of a function. See cudaFuncSetAttribute
-    nonPortableClusterSizeAllowed : int
-        Whether the function can be launched with non-portable cluster
-        size. 1 is allowed, 0 is disallowed. A non-portable cluster size
-        may only function on the specific SKUs the program is tested on.
-        The launch might fail if the program is run on a different hardware
-        platform.  CUDA API provides cudaOccupancyMaxActiveClusters to
-        assist with checking whether the desired size can be launched on
-        the current device.  Portable Cluster Size  A portable cluster size
-        is guaranteed to be functional on all compute capabilities higher
-        than the target compute capability. The portable cluster size for
-        sm_90 is 8 blocks per cluster. This value may increase for future
-        compute capabilities.  The specific hardware unit may support
-        higher cluster sizes that’s not guaranteed to be portable. See
-        cudaFuncSetAttribute
-    reserved : List[int]
-        Reserved for future use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaFuncAttributes __val
-    cdef cyruntime.cudaFuncAttributes* _ptr
-{{endif}}
-{{if 'struct cudaMemLocation' in found_types}}
-
-cdef class cudaMemLocation:
-    """
-    Specifies a memory location.  To specify a gpu, set type =
-    cudaMemLocationTypeDevice and set id = the gpu's device ordinal. To
-    specify a cpu NUMA node, set type = cudaMemLocationTypeHostNuma and
-    set id = host NUMA node id.
-
-    Attributes
-    ----------
-    type : cudaMemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's ::CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemLocation __val
-    cdef cyruntime.cudaMemLocation* _ptr
-{{endif}}
-{{if 'struct cudaMemAccessDesc' in found_types}}
-
-cdef class cudaMemAccessDesc:
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : cudaMemLocation
-        Location on which the request is to change it's accessibility
-    flags : cudaMemAccessFlags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemAccessDesc __val
-    cdef cyruntime.cudaMemAccessDesc* _ptr
-    cdef cudaMemLocation _location
-{{endif}}
-{{if 'struct cudaMemPoolProps' in found_types}}
-
-cdef class cudaMemPoolProps:
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : cudaMemAllocationType
-        Allocation type. Currently must be specified as
-        cudaMemAllocationTypePinned
-    handleTypes : cudaMemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : cudaMemLocation
-        Location allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        cudaMemHandleTypeWin32 is specified. This security attribute
-        defines the scope of which exported allocations may be tranferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemPoolProps __val
-    cdef cyruntime.cudaMemPoolProps* _ptr
-    cdef cudaMemLocation _location
-{{endif}}
-{{if 'struct cudaMemPoolPtrExportData' in found_types}}
-
-cdef class cudaMemPoolPtrExportData:
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemPoolPtrExportData __val
-    cdef cyruntime.cudaMemPoolPtrExportData* _ptr
-{{endif}}
-{{if 'struct cudaMemAllocNodeParams' in found_types}}
-
-cdef class cudaMemAllocNodeParams:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : cudaMemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
-        not supported. in: array of memory access descriptors. Used to
-        describe peer GPU access
-    accessDescs : cudaMemAccessDesc
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    accessDescCount : size_t
-        in: Number of `accessDescs`s
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : Any
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemAllocNodeParams __val
-    cdef cyruntime.cudaMemAllocNodeParams* _ptr
-    cdef cudaMemPoolProps _poolProps
-    cdef size_t _accessDescs_length
-    cdef cyruntime.cudaMemAccessDesc* _accessDescs
-
-{{endif}}
-{{if 'struct cudaMemAllocNodeParamsV2' in found_types}}
-
-cdef class cudaMemAllocNodeParamsV2:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : cudaMemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
-        not supported. in: array of memory access descriptors. Used to
-        describe peer GPU access
-    accessDescs : cudaMemAccessDesc
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    accessDescCount : size_t
-        in: Number of `accessDescs`s
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : Any
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemAllocNodeParamsV2 __val
-    cdef cyruntime.cudaMemAllocNodeParamsV2* _ptr
-    cdef cudaMemPoolProps _poolProps
-    cdef size_t _accessDescs_length
-    cdef cyruntime.cudaMemAccessDesc* _accessDescs
-
-{{endif}}
-{{if 'struct cudaMemFreeNodeParams' in found_types}}
-
-cdef class cudaMemFreeNodeParams:
-    """
-    Memory free node parameters
-
-    Attributes
-    ----------
-    dptr : Any
-        in: the pointer to free
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemFreeNodeParams __val
-    cdef cyruntime.cudaMemFreeNodeParams* _ptr
-{{endif}}
-{{if 'struct CUuuid_st' in found_types}}
-
-cdef class CUuuid_st:
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.CUuuid_st __val
-    cdef cyruntime.CUuuid_st* _ptr
-{{endif}}
-{{if 'struct cudaDeviceProp' in found_types}}
-
-cdef class cudaDeviceProp:
-    """
-    CUDA device properties
-
-    Attributes
-    ----------
-    name : bytes
-        ASCII string identifying device
-    uuid : cudaUUID_t
-        16-byte unique identifier
-    luid : bytes
-        8-byte locally unique identifier. Value is undefined on TCC and
-        non-Windows platforms
-    luidDeviceNodeMask : unsigned int
-        LUID device node mask. Value is undefined on TCC and non-Windows
-        platforms
-    totalGlobalMem : size_t
-        Global memory available on device in bytes
-    sharedMemPerBlock : size_t
-        Shared memory available per block in bytes
-    regsPerBlock : int
-        32-bit registers available per block
-    warpSize : int
-        Warp size in threads
-    memPitch : size_t
-        Maximum pitch in bytes allowed by memory copies
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    clockRate : int
-        Deprecated, Clock frequency in kilohertz
-    totalConstMem : size_t
-        Constant memory available on device in bytes
-    major : int
-        Major compute capability
-    minor : int
-        Minor compute capability
-    textureAlignment : size_t
-        Alignment requirement for textures
-    texturePitchAlignment : size_t
-        Pitch alignment requirement for texture references bound to pitched
-        memory
-    deviceOverlap : int
-        Device can concurrently copy memory and execute a kernel.
-        Deprecated. Use instead asyncEngineCount.
-    multiProcessorCount : int
-        Number of multiprocessors on device
-    kernelExecTimeoutEnabled : int
-        Deprecated, Specified whether there is a run time limit on kernels
-    integrated : int
-        Device is integrated as opposed to discrete
-    canMapHostMemory : int
-        Device can map host memory with
-        cudaHostAlloc/cudaHostGetDevicePointer
-    computeMode : int
-        Deprecated, Compute mode (See cudaComputeMode)
-    maxTexture1D : int
-        Maximum 1D texture size
-    maxTexture1DMipmap : int
-        Maximum 1D mipmapped texture size
-    maxTexture1DLinear : int
-        Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth()
-        or cuDeviceGetTexture1DLinearMaxWidth() instead.
-    maxTexture2D : List[int]
-        Maximum 2D texture dimensions
-    maxTexture2DMipmap : List[int]
-        Maximum 2D mipmapped texture dimensions
-    maxTexture2DLinear : List[int]
-        Maximum dimensions (width, height, pitch) for 2D textures bound to
-        pitched memory
-    maxTexture2DGather : List[int]
-        Maximum 2D texture dimensions if texture gather operations have to
-        be performed
-    maxTexture3D : List[int]
-        Maximum 3D texture dimensions
-    maxTexture3DAlt : List[int]
-        Maximum alternate 3D texture dimensions
-    maxTextureCubemap : int
-        Maximum Cubemap texture dimensions
-    maxTexture1DLayered : List[int]
-        Maximum 1D layered texture dimensions
-    maxTexture2DLayered : List[int]
-        Maximum 2D layered texture dimensions
-    maxTextureCubemapLayered : List[int]
-        Maximum Cubemap layered texture dimensions
-    maxSurface1D : int
-        Maximum 1D surface size
-    maxSurface2D : List[int]
-        Maximum 2D surface dimensions
-    maxSurface3D : List[int]
-        Maximum 3D surface dimensions
-    maxSurface1DLayered : List[int]
-        Maximum 1D layered surface dimensions
-    maxSurface2DLayered : List[int]
-        Maximum 2D layered surface dimensions
-    maxSurfaceCubemap : int
-        Maximum Cubemap surface dimensions
-    maxSurfaceCubemapLayered : List[int]
-        Maximum Cubemap layered surface dimensions
-    surfaceAlignment : size_t
-        Alignment requirements for surfaces
-    concurrentKernels : int
-        Device can possibly execute multiple kernels concurrently
-    ECCEnabled : int
-        Device has ECC support enabled
-    pciBusID : int
-        PCI bus ID of the device
-    pciDeviceID : int
-        PCI device ID of the device
-    pciDomainID : int
-        PCI domain ID of the device
-    tccDriver : int
-        1 if device is a Tesla device using TCC driver, 0 otherwise
-    asyncEngineCount : int
-        Number of asynchronous engines
-    unifiedAddressing : int
-        Device shares a unified address space with the host
-    memoryClockRate : int
-        Deprecated, Peak memory clock frequency in kilohertz
-    memoryBusWidth : int
-        Global memory bus width in bits
-    l2CacheSize : int
-        Size of L2 cache in bytes
-    persistingL2CacheMaxSize : int
-        Device's maximum l2 persisting lines capacity setting in bytes
-    maxThreadsPerMultiProcessor : int
-        Maximum resident threads per multiprocessor
-    streamPrioritiesSupported : int
-        Device supports stream priorities
-    globalL1CacheSupported : int
-        Device supports caching globals in L1
-    localL1CacheSupported : int
-        Device supports caching locals in L1
-    sharedMemPerMultiprocessor : size_t
-        Shared memory available per multiprocessor in bytes
-    regsPerMultiprocessor : int
-        32-bit registers available per multiprocessor
-    managedMemory : int
-        Device supports allocating managed memory on this system
-    isMultiGpuBoard : int
-        Device is on a multi-GPU board
-    multiGpuBoardGroupID : int
-        Unique identifier for a group of devices on the same multi-GPU
-        board
-    hostNativeAtomicSupported : int
-        Link between the device and the host supports native atomic
-        operations
-    singleToDoublePrecisionPerfRatio : int
-        Deprecated, Ratio of single precision performance (in floating-
-        point operations per second) to double precision performance
-    pageableMemoryAccess : int
-        Device supports coherently accessing pageable memory without
-        calling cudaHostRegister on it
-    concurrentManagedAccess : int
-        Device can coherently access managed memory concurrently with the
-        CPU
-    computePreemptionSupported : int
-        Device supports Compute Preemption
-    canUseHostPointerForRegisteredMem : int
-        Device can access host registered memory at the same virtual
-        address as the CPU
-    cooperativeLaunch : int
-        Device supports launching cooperative kernels via
-        cudaLaunchCooperativeKernel
-    cooperativeMultiDeviceLaunch : int
-        Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.
-    sharedMemPerBlockOptin : size_t
-        Per device maximum shared memory per block usable by special opt in
-    pageableMemoryAccessUsesHostPageTables : int
-        Device accesses pageable memory via the host's page tables
-    directManagedMemAccessFromHost : int
-        Host can directly access managed memory on the device without
-        migration.
-    maxBlocksPerMultiProcessor : int
-        Maximum number of resident blocks per multiprocessor
-    accessPolicyMaxWindowSize : int
-        The maximum value of cudaAccessPolicyWindow::num_bytes.
-    reservedSharedMemPerBlock : size_t
-        Shared memory reserved by CUDA driver per block in bytes
-    hostRegisterSupported : int
-        Device supports host memory registration via cudaHostRegister.
-    sparseCudaArraySupported : int
-        1 if the device supports sparse CUDA arrays and sparse CUDA
-        mipmapped arrays, 0 otherwise
-    hostRegisterReadOnlySupported : int
-        Device supports using the cudaHostRegister flag
-        cudaHostRegisterReadOnly to register memory that must be mapped as
-        read-only to the GPU
-    timelineSemaphoreInteropSupported : int
-        External timeline semaphore interop is supported on the device
-    memoryPoolsSupported : int
-        1 if the device supports using the cudaMallocAsync and cudaMemPool
-        family of APIs, 0 otherwise
-    gpuDirectRDMASupported : int
-        1 if the device supports GPUDirect RDMA APIs, 0 otherwise
-    gpuDirectRDMAFlushWritesOptions : unsigned int
-        Bitmask to be interpreted according to the
-        cudaFlushGPUDirectRDMAWritesOptions enum
-    gpuDirectRDMAWritesOrdering : int
-        See the cudaGPUDirectRDMAWritesOrdering enum for numerical values
-    memoryPoolSupportedHandleTypes : unsigned int
-        Bitmask of handle types supported with mempool-based IPC
-    deferredMappingCudaArraySupported : int
-        1 if the device supports deferred mapping CUDA arrays and CUDA
-        mipmapped arrays
-    ipcEventSupported : int
-        Device supports IPC Events.
-    clusterLaunch : int
-        Indicates device supports cluster launch
-    unifiedFunctionPointers : int
-        Indicates device supports unified pointers
-    reserved2 : List[int]
-
-    reserved1 : List[int]
-        Reserved for future use
-    reserved : List[int]
-        Reserved for future use
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaDeviceProp __val
-    cdef cyruntime.cudaDeviceProp* _ptr
-    cdef cudaUUID_t _uuid
-{{endif}}
-{{if 'struct cudaIpcEventHandle_st' in found_types}}
-
-cdef class cudaIpcEventHandle_st:
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaIpcEventHandle_st __val
-    cdef cyruntime.cudaIpcEventHandle_st* _ptr
-{{endif}}
-{{if 'struct cudaIpcMemHandle_st' in found_types}}
-
-cdef class cudaIpcMemHandle_st:
-    """
-    CUDA IPC memory handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaIpcMemHandle_st __val
-    cdef cyruntime.cudaIpcMemHandle_st* _ptr
-{{endif}}
-{{if 'struct cudaMemFabricHandle_st' in found_types}}
-
-cdef class cudaMemFabricHandle_st:
-    """
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaMemFabricHandle_st __val
-    cdef cyruntime.cudaMemFabricHandle_st* _ptr
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class anon_struct5:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemoryHandleDesc* _ptr
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class anon_union1:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct5
-
-    nvSciBufObject : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemoryHandleDesc* _ptr
-    cdef anon_struct5 _win32
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class cudaExternalMemoryHandleDesc:
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : cudaExternalMemoryHandleType
-        Type of the handle
-    handle : anon_union1
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or cudaExternalMemoryDedicated
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemoryHandleDesc* _val_ptr
-    cdef cyruntime.cudaExternalMemoryHandleDesc* _ptr
-    cdef anon_union1 _handle
-{{endif}}
-{{if 'struct cudaExternalMemoryBufferDesc' in found_types}}
-
-cdef class cudaExternalMemoryBufferDesc:
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemoryBufferDesc __val
-    cdef cyruntime.cudaExternalMemoryBufferDesc* _ptr
-{{endif}}
-{{if 'struct cudaExternalMemoryMipmappedArrayDesc' in found_types}}
-
-cdef class cudaExternalMemoryMipmappedArrayDesc:
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    formatDesc : cudaChannelFormatDesc
-        Format of base level of the mipmap chain
-    extent : cudaExtent
-        Dimensions of base level of the mipmap chain
-    flags : unsigned int
-        Flags associated with CUDA mipmapped arrays. See
-        cudaMallocMipmappedArray
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc __val
-    cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* _ptr
-    cdef cudaChannelFormatDesc _formatDesc
-    cdef cudaExtent _extent
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class anon_struct6:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreHandleDesc* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class anon_union2:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct6
-
-    nvSciSyncObj : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreHandleDesc* _ptr
-    cdef anon_struct6 _win32
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class cudaExternalSemaphoreHandleDesc:
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : cudaExternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union2
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreHandleDesc* _val_ptr
-    cdef cyruntime.cudaExternalSemaphoreHandleDesc* _ptr
-    cdef anon_union2 _handle
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct13:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_union5:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct14:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct15:
-    """
-    Attributes
-    ----------
-    fence : anon_struct13
-
-    nvSciSync : anon_union5
-
-    keyedMutex : anon_struct14
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _ptr
-    cdef anon_struct13 _fence
-    cdef anon_union5 _nvSciSync
-    cdef anon_struct14 _keyedMutex
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalParams:
-    """
-    External semaphore signal parameters, compatible with driver type
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when cudaExternalSemaphoreSignalParams is used to signal a
-        cudaExternalSemaphore_t of type
-        cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
-        cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
-        that while signaling the cudaExternalSemaphore_t, no memory
-        synchronization operations should be performed for any external
-        memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
-        all other types of cudaExternalSemaphore_t, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalParams __val
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _ptr
-    cdef anon_struct15 _params
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct16:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_union6:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct17:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-    timeoutMs : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _ptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct18:
-    """
-    Attributes
-    ----------
-    fence : anon_struct16
-
-    nvSciSync : anon_union6
-
-    keyedMutex : anon_struct17
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _ptr
-    cdef anon_struct16 _fence
-    cdef anon_union6 _nvSciSync
-    cdef anon_struct17 _keyedMutex
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitParams:
-    """
-    External semaphore wait parameters, compatible with driver type
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when cudaExternalSemaphoreSignalParams is used to signal a
-        cudaExternalSemaphore_t of type
-        cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
-        cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
-        that while waiting for the cudaExternalSemaphore_t, no memory
-        synchronization operations should be performed for any external
-        memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
-        all other types of cudaExternalSemaphore_t, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitParams __val
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _ptr
-    cdef anon_struct18 _params
-{{endif}}
-{{if 'struct cudaKernelNodeParams' in found_types}}
-
-cdef class cudaKernelNodeParams:
-    """
-    CUDA GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : Any
-        Kernel to launch
-    gridDim : dim3
-        Grid dimensions
-    blockDim : dim3
-        Block dimensions
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to individual kernel arguments
-    extra : Any
-        Pointer to kernel arguments in the "extra" format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaKernelNodeParams __val
-    cdef cyruntime.cudaKernelNodeParams* _ptr
-    cdef dim3 _gridDim
-    cdef dim3 _blockDim
-    cdef utils.HelperKernelParams _cykernelParams
-{{endif}}
-{{if 'struct cudaKernelNodeParamsV2' in found_types}}
-
-cdef class cudaKernelNodeParamsV2:
-    """
-    CUDA GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : Any
-        Kernel to launch
-    gridDim : dim3
-        Grid dimensions
-    blockDim : dim3
-        Block dimensions
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to individual kernel arguments
-    extra : Any
-        Pointer to kernel arguments in the "extra" format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaKernelNodeParamsV2 __val
-    cdef cyruntime.cudaKernelNodeParamsV2* _ptr
-    cdef dim3 _gridDim
-    cdef dim3 _blockDim
-    cdef utils.HelperKernelParams _cykernelParams
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalNodeParams' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalNodeParams:
-    """
-    External semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreSignalParams
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParams __val
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* _ptr
-    cdef size_t _extSemArray_length
-    cdef cyruntime.cudaExternalSemaphore_t* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _paramsArray
-
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalNodeParamsV2' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalNodeParamsV2:
-    """
-    External semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreSignalParams
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParamsV2 __val
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParamsV2* _ptr
-    cdef size_t _extSemArray_length
-    cdef cyruntime.cudaExternalSemaphore_t* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* _paramsArray
-
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitNodeParams' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitNodeParams:
-    """
-    External semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreWaitParams
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParams __val
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* _ptr
-    cdef size_t _extSemArray_length
-    cdef cyruntime.cudaExternalSemaphore_t* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _paramsArray
-
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitNodeParamsV2' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitNodeParamsV2:
-    """
-    External semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreWaitParams
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParamsV2 __val
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParamsV2* _ptr
-    cdef size_t _extSemArray_length
-    cdef cyruntime.cudaExternalSemaphore_t* _extSemArray
-
-    cdef size_t _paramsArray_length
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* _paramsArray
-
-{{endif}}
-{{if 'struct cudaConditionalNodeParams' in found_types}}
-
-cdef class cudaConditionalNodeParams:
-    """
-    CUDA conditional node parameters
-
-    Attributes
-    ----------
-    handle : cudaGraphConditionalHandle
-        Conditional node handle. Handles must be created in advance of
-        creating the node using cudaGraphConditionalHandleCreate.
-    type : cudaGraphConditionalNodeType
-        Type of conditional node.
-    size : unsigned int
-        Size of graph output array. Must be 1.
-    phGraph_out : cudaGraph_t
-        CUDA-owned array populated with conditional node child graphs
-        during creation of the node. Valid for the lifetime of the
-        conditional node. The contents of the graph(s) are subject to the
-        following constraints:   - Allowed node types are kernel nodes,
-        empty nodes, child graphs, memsets, memcopies, and conditionals.
-        This applies recursively to child graphs and conditional bodies.
-        - All kernels, including kernels in nested conditionals or child
-        graphs at any level, must belong to the same CUDA context.
-        These graphs may be populated using graph node creation APIs or
-        cudaStreamBeginCaptureToGraph.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaConditionalNodeParams __val
-    cdef cyruntime.cudaConditionalNodeParams* _ptr
-    cdef cudaGraphConditionalHandle _handle
-    cdef size_t _phGraph_out_length
-    cdef cyruntime.cudaGraph_t* _phGraph_out
-
-{{endif}}
-{{if 'struct cudaChildGraphNodeParams' in found_types}}
-
-cdef class cudaChildGraphNodeParams:
-    """
-    Child graph node parameters
-
-    Attributes
-    ----------
-    graph : cudaGraph_t
-        The child graph to clone into the node for node creation, or a
-        handle to the graph owned by the node for node query
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaChildGraphNodeParams __val
-    cdef cyruntime.cudaChildGraphNodeParams* _ptr
-    cdef cudaGraph_t _graph
-{{endif}}
-{{if 'struct cudaEventRecordNodeParams' in found_types}}
-
-cdef class cudaEventRecordNodeParams:
-    """
-    Event record node parameters
-
-    Attributes
-    ----------
-    event : cudaEvent_t
-        The event to record when the node executes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaEventRecordNodeParams __val
-    cdef cyruntime.cudaEventRecordNodeParams* _ptr
-    cdef cudaEvent_t _event
-{{endif}}
-{{if 'struct cudaEventWaitNodeParams' in found_types}}
-
-cdef class cudaEventWaitNodeParams:
-    """
-    Event wait node parameters
-
-    Attributes
-    ----------
-    event : cudaEvent_t
-        The event to wait on from the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaEventWaitNodeParams __val
-    cdef cyruntime.cudaEventWaitNodeParams* _ptr
-    cdef cudaEvent_t _event
-{{endif}}
-{{if 'struct cudaGraphNodeParams' in found_types}}
-
-cdef class cudaGraphNodeParams:
-    """
-    Graph node parameters. See cudaGraphAddNode.
-
-    Attributes
-    ----------
-    type : cudaGraphNodeType
-        Type of the node
-    reserved0 : List[int]
-        Reserved. Must be zero.
-    reserved1 : List[long long]
-        Padding. Unused bytes must be zero.
-    kernel : cudaKernelNodeParamsV2
-        Kernel node parameters.
-    memcpy : cudaMemcpyNodeParams
-        Memcpy node parameters.
-    memset : cudaMemsetParamsV2
-        Memset node parameters.
-    host : cudaHostNodeParamsV2
-        Host node parameters.
-    graph : cudaChildGraphNodeParams
-        Child graph node parameters.
-    eventWait : cudaEventWaitNodeParams
-        Event wait node parameters.
-    eventRecord : cudaEventRecordNodeParams
-        Event record node parameters.
-    extSemSignal : cudaExternalSemaphoreSignalNodeParamsV2
-        External semaphore signal node parameters.
-    extSemWait : cudaExternalSemaphoreWaitNodeParamsV2
-        External semaphore wait node parameters.
-    alloc : cudaMemAllocNodeParamsV2
-        Memory allocation node parameters.
-    free : cudaMemFreeNodeParams
-        Memory free node parameters.
-    conditional : cudaConditionalNodeParams
-        Conditional node parameters.
-    reserved2 : long long
-        Reserved bytes. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphNodeParams* _val_ptr
-    cdef cyruntime.cudaGraphNodeParams* _ptr
-    cdef cudaKernelNodeParamsV2 _kernel
-    cdef cudaMemcpyNodeParams _memcpy
-    cdef cudaMemsetParamsV2 _memset
-    cdef cudaHostNodeParamsV2 _host
-    cdef cudaChildGraphNodeParams _graph
-    cdef cudaEventWaitNodeParams _eventWait
-    cdef cudaEventRecordNodeParams _eventRecord
-    cdef cudaExternalSemaphoreSignalNodeParamsV2 _extSemSignal
-    cdef cudaExternalSemaphoreWaitNodeParamsV2 _extSemWait
-    cdef cudaMemAllocNodeParamsV2 _alloc
-    cdef cudaMemFreeNodeParams _free
-    cdef cudaConditionalNodeParams _conditional
-{{endif}}
-{{if 'struct cudaGraphEdgeData_st' in found_types}}
-
-cdef class cudaGraphEdgeData_st:
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        cudaGraphKernelNodePortDefault,
-        cudaGraphKernelNodePortProgrammatic, or
-        cudaGraphKernelNodePortLaunchCompletion.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from
-        ::cudaGraphDependencyType. (It is typed as char due to compiler-
-        specific layout of bitfields.) See ::cudaGraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphEdgeData_st __val
-    cdef cyruntime.cudaGraphEdgeData_st* _ptr
-{{endif}}
-{{if 'struct cudaGraphInstantiateParams_st' in found_types}}
-
-cdef class cudaGraphInstantiateParams_st:
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : unsigned long long
-        Instantiation flags
-    uploadStream : cudaStream_t
-        Upload stream
-    errNode_out : cudaGraphNode_t
-        The node which caused instantiation to fail, if any
-    result_out : cudaGraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphInstantiateParams_st __val
-    cdef cyruntime.cudaGraphInstantiateParams_st* _ptr
-    cdef cudaStream_t _uploadStream
-    cdef cudaGraphNode_t _errNode_out
-{{endif}}
-{{if 'struct cudaGraphExecUpdateResultInfo_st' in found_types}}
-
-cdef class cudaGraphExecUpdateResultInfo_st:
-    """
-    Result information returned by cudaGraphExecUpdate
-
-    Attributes
-    ----------
-    result : cudaGraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : cudaGraphNode_t
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : cudaGraphNode_t
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphExecUpdateResultInfo_st __val
-    cdef cyruntime.cudaGraphExecUpdateResultInfo_st* _ptr
-    cdef cudaGraphNode_t _errorNode
-    cdef cudaGraphNode_t _errorFromNode
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class anon_struct19:
-    """
-    Attributes
-    ----------
-    pValue : Any
-
-    offset : size_t
-
-    size : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphKernelNodeUpdate* _ptr
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class anon_union8:
-    """
-    Attributes
-    ----------
-    gridDim : dim3
-
-    param : anon_struct19
-
-    isEnabled : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphKernelNodeUpdate* _ptr
-    cdef dim3 _gridDim
-    cdef anon_struct19 _param
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class cudaGraphKernelNodeUpdate:
-    """
-    Struct to specify a single node update to pass as part of a larger
-    array to ::cudaGraphKernelNodeUpdatesApply
-
-    Attributes
-    ----------
-    node : cudaGraphDeviceNode_t
-        Node to update
-    field : cudaGraphKernelNodeField
-        Which type of update to apply. Determines how updateData is
-        interpreted
-    updateData : anon_union8
-        Update data to apply. Which field is used depends on field's value
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphKernelNodeUpdate* _val_ptr
-    cdef cyruntime.cudaGraphKernelNodeUpdate* _ptr
-    cdef cudaGraphDeviceNode_t _node
-    cdef anon_union8 _updateData
-{{endif}}
-{{if 'struct cudaLaunchMemSyncDomainMap_st' in found_types}}
-
-cdef class cudaLaunchMemSyncDomainMap_st:
-    """
-    Memory Synchronization Domain map  See cudaLaunchMemSyncDomain.  By
-    default, kernels are launched in domain 0. Kernel launched with
-    cudaLaunchMemSyncDomainRemote will have a different domain ID. User
-    may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    cudaLaunchAttributeMemSyncDomainMap.  Domain ID range is available
-    through cudaDevAttrMemSyncDomainCount.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchMemSyncDomainMap_st __val
-    cdef cyruntime.cudaLaunchMemSyncDomainMap_st* _ptr
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct20:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttributeValue* _ptr
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct21:
-    """
-    Attributes
-    ----------
-    event : cudaEvent_t
-
-    flags : int
-
-    triggerAtBlockStart : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttributeValue* _ptr
-    cdef cudaEvent_t _event
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct22:
-    """
-    Attributes
-    ----------
-    event : cudaEvent_t
-
-    flags : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttributeValue* _ptr
-    cdef cudaEvent_t _event
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct23:
-    """
-    Attributes
-    ----------
-    deviceUpdatable : int
-
-    devNode : cudaGraphDeviceNode_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttributeValue* _ptr
-    cdef cudaGraphDeviceNode_t _devNode
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class cudaLaunchAttributeValue:
-    """
-    Launch attributes union; used as value field of
-    ::cudaLaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : cudaAccessPolicyWindow
-        Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.
-    cooperative : int
-        Value of launch attribute cudaLaunchAttributeCooperative. Nonzero
-        indicates a cooperative kernel (see cudaLaunchCooperativeKernel).
-    syncPolicy : cudaSynchronizationPolicy
-        Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.
-        ::cudaSynchronizationPolicy for work queued up in this stream.
-    clusterDim : anon_struct20
-        Value of launch attribute cudaLaunchAttributeClusterDimension that
-        represents the desired cluster dimensions for the kernel. Opaque
-        type with the following fields: - `x` - The X dimension of the
-        cluster, in blocks. Must be a divisor of the grid X dimension.    -
-        `y` - The Y dimension of the cluster, in blocks. Must be a divisor
-        of the grid Y dimension.    - `z` - The Z dimension of the cluster,
-        in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy
-        Value of launch attribute
-        cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        cudaLaunchAttributeProgrammaticStreamSerialization.
-    programmaticEvent : anon_struct21
-        Value of launch attribute cudaLaunchAttributeProgrammaticEvent with
-        the following fields: - `cudaEvent_t` event - Event to fire when
-        all blocks trigger it.    - `int` flags; - Event record flags, see
-        cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.
-        - `int` triggerAtBlockStart - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    priority : int
-        Value of launch attribute cudaLaunchAttributePriority. Execution
-        priority of the kernel.
-    memSyncDomainMap : cudaLaunchMemSyncDomainMap
-        Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See
-        ::cudaLaunchMemSyncDomainMap.
-    memSyncDomain : cudaLaunchMemSyncDomain
-        Value of launch attribute cudaLaunchAttributeMemSyncDomain. See
-        cudaLaunchMemSyncDomain.
-    launchCompletionEvent : anon_struct22
-        Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent
-        with the following fields: - `cudaEvent_t` event - Event to fire
-        when the last block launches.    - `int` flags - Event record
-        flags, see cudaEventRecordWithFlags. Does not accept
-        cudaEventRecordExternal.
-    deviceUpdatableKernelNode : anon_struct23
-        Value of launch attribute
-        cudaLaunchAttributeDeviceUpdatableKernelNode with the following
-        fields: - `int` deviceUpdatable - Whether or not the resulting
-        kernel node should be device-updatable.    -
-        `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        cudaLaunchAttributePreferredSharedMemoryCarveout.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttributeValue __val
-    cdef cyruntime.cudaLaunchAttributeValue* _ptr
-    cdef cudaAccessPolicyWindow _accessPolicyWindow
-    cdef anon_struct20 _clusterDim
-    cdef anon_struct21 _programmaticEvent
-    cdef cudaLaunchMemSyncDomainMap _memSyncDomainMap
-    cdef anon_struct22 _launchCompletionEvent
-    cdef anon_struct23 _deviceUpdatableKernelNode
-{{endif}}
-{{if 'struct cudaLaunchAttribute_st' in found_types}}
-
-cdef class cudaLaunchAttribute_st:
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : cudaLaunchAttributeID
-        Attribute to set
-    val : cudaLaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaLaunchAttribute_st __val
-    cdef cyruntime.cudaLaunchAttribute_st* _ptr
-    cdef cudaLaunchAttributeValue _val
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class anon_struct24:
-    """
-    Attributes
-    ----------
-    bytesOverBudget : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAsyncNotificationInfo* _ptr
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class anon_union9:
-    """
-    Attributes
-    ----------
-    overBudget : anon_struct24
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAsyncNotificationInfo* _ptr
-    cdef anon_struct24 _overBudget
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class cudaAsyncNotificationInfo:
-    """
-    Information describing an async notification event
-
-    Attributes
-    ----------
-    type : cudaAsyncNotificationType
-
-    info : anon_union9
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaAsyncNotificationInfo* _val_ptr
-    cdef cyruntime.cudaAsyncNotificationInfo* _ptr
-    cdef anon_union9 _info
-{{endif}}
-{{if 'struct cudaTextureDesc' in found_types}}
-
-cdef class cudaTextureDesc:
-    """
-    CUDA texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[cudaTextureAddressMode]
-        Texture address mode for up to 3 dimensions
-    filterMode : cudaTextureFilterMode
-        Texture filter mode
-    readMode : cudaTextureReadMode
-        Texture read mode
-    sRGB : int
-        Perform sRGB->linear conversion during texture read
-    borderColor : List[float]
-        Texture Border Color
-    normalizedCoords : int
-        Indicates whether texture reads are normalized or not
-    maxAnisotropy : unsigned int
-        Limit to the anisotropy ratio
-    mipmapFilterMode : cudaTextureFilterMode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Offset applied to the supplied mipmap level
-    minMipmapLevelClamp : float
-        Lower end of the mipmap level range to clamp access to
-    maxMipmapLevelClamp : float
-        Upper end of the mipmap level range to clamp access to
-    disableTrilinearOptimization : int
-        Disable any trilinear filtering optimizations.
-    seamlessCubemap : int
-        Enable seamless cube map filtering.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaTextureDesc __val
-    cdef cyruntime.cudaTextureDesc* _ptr
-{{endif}}
-{{if True}}
-
-cdef class cudaEglPlaneDesc_st:
-    """
-    CUDA EGL Plane Descriptor - structure defining each plane of a CUDA
-    EGLFrame
-
-    Attributes
-    ----------
-    width : unsigned int
-        Width of plane
-    height : unsigned int
-        Height of plane
-    depth : unsigned int
-        Depth of plane
-    pitch : unsigned int
-        Pitch of plane
-    numChannels : unsigned int
-        Number of channels for the plane
-    channelDesc : cudaChannelFormatDesc
-        Channel Format Descriptor
-    reserved : List[unsigned int]
-        Reserved for future use
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaEglPlaneDesc_st __val
-    cdef cyruntime.cudaEglPlaneDesc_st* _ptr
-    cdef cudaChannelFormatDesc _channelDesc
-{{endif}}
-{{if True}}
-
-cdef class anon_union10:
-    """
-    Attributes
-    ----------
-    pArray : List[cudaArray_t]
-
-    pPitch : List[cudaPitchedPtr]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaEglFrame_st* _ptr
-{{endif}}
-{{if True}}
-
-cdef class cudaEglFrame_st:
-    """
-    CUDA EGLFrame Descriptor - structure defining one frame of EGL.
-    Each frame may contain one or more planes depending on whether the
-    surface is Multiplanar or not. Each plane of EGLFrame is
-    represented by cudaEglPlaneDesc which is defined as:
-    typedefstructcudaEglPlaneDesc_st unsignedintwidth;
-    unsignedintheight; unsignedintdepth; unsignedintpitch;
-    unsignedintnumChannels; structcudaChannelFormatDescchannelDesc;
-    unsignedintreserved[4]; cudaEglPlaneDesc;
-
-    Attributes
-    ----------
-    frame : anon_union10
-
-    planeDesc : List[cudaEglPlaneDesc]
-        CUDA EGL Plane Descriptor cudaEglPlaneDesc
-    planeCount : unsigned int
-        Number of planes
-    frameType : cudaEglFrameType
-        Array or Pitch
-    eglColorFormat : cudaEglColorFormat
-        CUDA EGL Color Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaEglFrame_st* _val_ptr
-    cdef cyruntime.cudaEglFrame_st* _ptr
-    cdef anon_union10 _frame
-{{endif}}
-{{if 'CUuuid' in found_types}}
-
-cdef class CUuuid(CUuuid_st):
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaUUID_t' in found_types}}
-
-cdef class cudaUUID_t(CUuuid_st):
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaIpcEventHandle_t' in found_types}}
-
-cdef class cudaIpcEventHandle_t(cudaIpcEventHandle_st):
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaIpcMemHandle_t' in found_types}}
-
-cdef class cudaIpcMemHandle_t(cudaIpcMemHandle_st):
-    """
-    CUDA IPC memory handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaMemFabricHandle_t' in found_types}}
-
-cdef class cudaMemFabricHandle_t(cudaMemFabricHandle_st):
-    """
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaGraphEdgeData' in found_types}}
-
-cdef class cudaGraphEdgeData(cudaGraphEdgeData_st):
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        cudaGraphKernelNodePortDefault,
-        cudaGraphKernelNodePortProgrammatic, or
-        cudaGraphKernelNodePortLaunchCompletion.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from
-        ::cudaGraphDependencyType. (It is typed as char due to compiler-
-        specific layout of bitfields.) See ::cudaGraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaGraphInstantiateParams' in found_types}}
-
-cdef class cudaGraphInstantiateParams(cudaGraphInstantiateParams_st):
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : unsigned long long
-        Instantiation flags
-    uploadStream : cudaStream_t
-        Upload stream
-    errNode_out : cudaGraphNode_t
-        The node which caused instantiation to fail, if any
-    result_out : cudaGraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaGraphExecUpdateResultInfo' in found_types}}
-
-cdef class cudaGraphExecUpdateResultInfo(cudaGraphExecUpdateResultInfo_st):
-    """
-    Result information returned by cudaGraphExecUpdate
-
-    Attributes
-    ----------
-    result : cudaGraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : cudaGraphNode_t
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : cudaGraphNode_t
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaLaunchMemSyncDomainMap' in found_types}}
-
-cdef class cudaLaunchMemSyncDomainMap(cudaLaunchMemSyncDomainMap_st):
-    """
-    Memory Synchronization Domain map  See cudaLaunchMemSyncDomain.  By
-    default, kernels are launched in domain 0. Kernel launched with
-    cudaLaunchMemSyncDomainRemote will have a different domain ID. User
-    may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    cudaLaunchAttributeMemSyncDomainMap.  Domain ID range is available
-    through cudaDevAttrMemSyncDomainCount.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaLaunchAttribute' in found_types}}
-
-cdef class cudaLaunchAttribute(cudaLaunchAttribute_st):
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : cudaLaunchAttributeID
-        Attribute to set
-    val : cudaLaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaAsyncNotificationInfo_t' in found_types}}
-
-cdef class cudaAsyncNotificationInfo_t(cudaAsyncNotificationInfo):
-    """
-    Information describing an async notification event
-
-    Attributes
-    ----------
-    type : cudaAsyncNotificationType
-
-    info : anon_union9
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class cudaStreamAttrValue(cudaLaunchAttributeValue):
-    """
-    Launch attributes union; used as value field of
-    ::cudaLaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : cudaAccessPolicyWindow
-        Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.
-    cooperative : int
-        Value of launch attribute cudaLaunchAttributeCooperative. Nonzero
-        indicates a cooperative kernel (see cudaLaunchCooperativeKernel).
-    syncPolicy : cudaSynchronizationPolicy
-        Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.
-        ::cudaSynchronizationPolicy for work queued up in this stream.
-    clusterDim : anon_struct20
-        Value of launch attribute cudaLaunchAttributeClusterDimension that
-        represents the desired cluster dimensions for the kernel. Opaque
-        type with the following fields: - `x` - The X dimension of the
-        cluster, in blocks. Must be a divisor of the grid X dimension.    -
-        `y` - The Y dimension of the cluster, in blocks. Must be a divisor
-        of the grid Y dimension.    - `z` - The Z dimension of the cluster,
-        in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy
-        Value of launch attribute
-        cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        cudaLaunchAttributeProgrammaticStreamSerialization.
-    programmaticEvent : anon_struct21
-        Value of launch attribute cudaLaunchAttributeProgrammaticEvent with
-        the following fields: - `cudaEvent_t` event - Event to fire when
-        all blocks trigger it.    - `int` flags; - Event record flags, see
-        cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.
-        - `int` triggerAtBlockStart - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    priority : int
-        Value of launch attribute cudaLaunchAttributePriority. Execution
-        priority of the kernel.
-    memSyncDomainMap : cudaLaunchMemSyncDomainMap
-        Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See
-        ::cudaLaunchMemSyncDomainMap.
-    memSyncDomain : cudaLaunchMemSyncDomain
-        Value of launch attribute cudaLaunchAttributeMemSyncDomain. See
-        cudaLaunchMemSyncDomain.
-    launchCompletionEvent : anon_struct22
-        Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent
-        with the following fields: - `cudaEvent_t` event - Event to fire
-        when the last block launches.    - `int` flags - Event record
-        flags, see cudaEventRecordWithFlags. Does not accept
-        cudaEventRecordExternal.
-    deviceUpdatableKernelNode : anon_struct23
-        Value of launch attribute
-        cudaLaunchAttributeDeviceUpdatableKernelNode with the following
-        fields: - `int` deviceUpdatable - Whether or not the resulting
-        kernel node should be device-updatable.    -
-        `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        cudaLaunchAttributePreferredSharedMemoryCarveout.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class cudaKernelNodeAttrValue(cudaLaunchAttributeValue):
-    """
-    Launch attributes union; used as value field of
-    ::cudaLaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : cudaAccessPolicyWindow
-        Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.
-    cooperative : int
-        Value of launch attribute cudaLaunchAttributeCooperative. Nonzero
-        indicates a cooperative kernel (see cudaLaunchCooperativeKernel).
-    syncPolicy : cudaSynchronizationPolicy
-        Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.
-        ::cudaSynchronizationPolicy for work queued up in this stream.
-    clusterDim : anon_struct20
-        Value of launch attribute cudaLaunchAttributeClusterDimension that
-        represents the desired cluster dimensions for the kernel. Opaque
-        type with the following fields: - `x` - The X dimension of the
-        cluster, in blocks. Must be a divisor of the grid X dimension.    -
-        `y` - The Y dimension of the cluster, in blocks. Must be a divisor
-        of the grid Y dimension.    - `z` - The Z dimension of the cluster,
-        in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy
-        Value of launch attribute
-        cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        cudaLaunchAttributeProgrammaticStreamSerialization.
-    programmaticEvent : anon_struct21
-        Value of launch attribute cudaLaunchAttributeProgrammaticEvent with
-        the following fields: - `cudaEvent_t` event - Event to fire when
-        all blocks trigger it.    - `int` flags; - Event record flags, see
-        cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.
-        - `int` triggerAtBlockStart - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    priority : int
-        Value of launch attribute cudaLaunchAttributePriority. Execution
-        priority of the kernel.
-    memSyncDomainMap : cudaLaunchMemSyncDomainMap
-        Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See
-        ::cudaLaunchMemSyncDomainMap.
-    memSyncDomain : cudaLaunchMemSyncDomain
-        Value of launch attribute cudaLaunchAttributeMemSyncDomain. See
-        cudaLaunchMemSyncDomain.
-    launchCompletionEvent : anon_struct22
-        Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent
-        with the following fields: - `cudaEvent_t` event - Event to fire
-        when the last block launches.    - `int` flags - Event record
-        flags, see cudaEventRecordWithFlags. Does not accept
-        cudaEventRecordExternal.
-    deviceUpdatableKernelNode : anon_struct23
-        Value of launch attribute
-        cudaLaunchAttributeDeviceUpdatableKernelNode with the following
-        fields: - `int` deviceUpdatable - Whether or not the resulting
-        kernel node should be device-updatable.    -
-        `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        cudaLaunchAttributePreferredSharedMemoryCarveout.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class cudaEglPlaneDesc(cudaEglPlaneDesc_st):
-    """
-    CUDA EGL Plane Descriptor - structure defining each plane of a CUDA
-    EGLFrame
-
-    Attributes
-    ----------
-    width : unsigned int
-        Width of plane
-    height : unsigned int
-        Height of plane
-    depth : unsigned int
-        Depth of plane
-    pitch : unsigned int
-        Pitch of plane
-    numChannels : unsigned int
-        Number of channels for the plane
-    channelDesc : cudaChannelFormatDesc
-        Channel Format Descriptor
-    reserved : List[unsigned int]
-        Reserved for future use
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if True}}
-
-cdef class cudaEglFrame(cudaEglFrame_st):
-    """
-    CUDA EGLFrame Descriptor - structure defining one frame of EGL.
-    Each frame may contain one or more planes depending on whether the
-    surface is Multiplanar or not. Each plane of EGLFrame is
-    represented by cudaEglPlaneDesc which is defined as:
-    typedefstructcudaEglPlaneDesc_st unsignedintwidth;
-    unsignedintheight; unsignedintdepth; unsignedintpitch;
-    unsignedintnumChannels; structcudaChannelFormatDescchannelDesc;
-    unsignedintreserved[4]; cudaEglPlaneDesc;
-
-    Attributes
-    ----------
-    frame : anon_union10
-
-    planeDesc : List[cudaEglPlaneDesc]
-        CUDA EGL Plane Descriptor cudaEglPlaneDesc
-    planeCount : unsigned int
-        Number of planes
-    frameType : cudaEglFrameType
-        Array or Pitch
-    eglColorFormat : cudaEglColorFormat
-        CUDA EGL Color Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-{{if 'cudaStream_t' in found_types}}
-
-cdef class cudaStream_t(driver.CUstream):
-    """
-
-    CUDA stream
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaEvent_t' in found_types}}
-
-cdef class cudaEvent_t(driver.CUevent):
-    """
-
-    CUDA event types
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaGraph_t' in found_types}}
-
-cdef class cudaGraph_t(driver.CUgraph):
-    """
-
-    CUDA graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaGraphNode_t' in found_types}}
-
-cdef class cudaGraphNode_t(driver.CUgraphNode):
-    """
-
-    CUDA graph node.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaUserObject_t' in found_types}}
-
-cdef class cudaUserObject_t(driver.CUuserObject):
-    """
-
-    CUDA user object for graphs
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaFunction_t' in found_types}}
-
-cdef class cudaFunction_t(driver.CUfunction):
-    """
-
-    CUDA function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaMemPool_t' in found_types}}
-
-cdef class cudaMemPool_t(driver.CUmemoryPool):
-    """
-
-    CUDA memory pool
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaGraphExec_t' in found_types}}
-
-cdef class cudaGraphExec_t(driver.CUgraphExec):
-    """
-
-    CUDA executable (launchable) graph
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if True}}
-
-cdef class cudaEglStreamConnection(driver.CUeglStreamConnection):
-    """
-
-    CUDA EGLSream Connection
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    pass
-{{endif}}
-
-{{if 'cudaGraphConditionalHandle' in found_types}}
-
-cdef class cudaGraphConditionalHandle:
-    """
-
-    CUDA handle for conditional graph nodes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaGraphConditionalHandle  __val
-    cdef cyruntime.cudaGraphConditionalHandle* _ptr
-{{endif}}
-
-{{if 'cudaSurfaceObject_t' in found_types}}
-
-cdef class cudaSurfaceObject_t:
-    """
-
-    An opaque value that represents a CUDA Surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaSurfaceObject_t  __val
-    cdef cyruntime.cudaSurfaceObject_t* _ptr
-{{endif}}
-
-{{if 'cudaTextureObject_t' in found_types}}
-
-cdef class cudaTextureObject_t:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.cudaTextureObject_t  __val
-    cdef cyruntime.cudaTextureObject_t* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLenum:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.GLenum  __val
-    cdef cyruntime.GLenum* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLuint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.GLuint  __val
-    cdef cyruntime.GLuint* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.EGLint  __val
-    cdef cyruntime.EGLint* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpDevice:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.VdpDevice  __val
-    cdef cyruntime.VdpDevice* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpGetProcAddress:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.VdpGetProcAddress  __val
-    cdef cyruntime.VdpGetProcAddress* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpVideoSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.VdpVideoSurface  __val
-    cdef cyruntime.VdpVideoSurface* _ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpOutputSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    cdef cyruntime.VdpOutputSurface  __val
-    cdef cyruntime.VdpOutputSurface* _ptr
-{{endif}}
diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in
deleted file mode 100644
index e735ee44..00000000
--- a/cuda_bindings/cuda/bindings/runtime.pyx.in
+++ /dev/null
@@ -1,32369 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from typing import List, Tuple, Any, Optional
-from enum import IntEnum
-import cython
-import ctypes
-from libc.stdlib cimport calloc, free
-from libc cimport string
-from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
-from libc.stddef cimport wchar_t
-from libc.limits cimport CHAR_MIN
-from libcpp.vector cimport vector
-from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
-from cpython.bytes cimport PyBytes_FromStringAndSize
-import cuda.bindings.driver
-
-ctypedef unsigned long long signed_char_ptr
-ctypedef unsigned long long unsigned_char_ptr
-ctypedef unsigned long long char_ptr
-ctypedef unsigned long long short_ptr
-ctypedef unsigned long long unsigned_short_ptr
-ctypedef unsigned long long int_ptr
-ctypedef unsigned long long long_int_ptr
-ctypedef unsigned long long long_long_int_ptr
-ctypedef unsigned long long unsigned_int_ptr
-ctypedef unsigned long long unsigned_long_int_ptr
-ctypedef unsigned long long unsigned_long_long_int_ptr
-ctypedef unsigned long long uint32_t_ptr
-ctypedef unsigned long long uint64_t_ptr
-ctypedef unsigned long long int32_t_ptr
-ctypedef unsigned long long int64_t_ptr
-ctypedef unsigned long long unsigned_ptr
-ctypedef unsigned long long unsigned_long_long_ptr
-ctypedef unsigned long long long_long_ptr
-ctypedef unsigned long long size_t_ptr
-ctypedef unsigned long long float_ptr
-ctypedef unsigned long long double_ptr
-ctypedef unsigned long long void_ptr
-
-#: Default page-locked allocation flag
-cudaHostAllocDefault = cyruntime.cudaHostAllocDefault
-
-#: Pinned memory accessible by all CUDA contexts
-cudaHostAllocPortable = cyruntime.cudaHostAllocPortable
-
-#: Map allocation into device space
-cudaHostAllocMapped = cyruntime.cudaHostAllocMapped
-
-#: Write-combined memory
-cudaHostAllocWriteCombined = cyruntime.cudaHostAllocWriteCombined
-
-#: Default host memory registration flag
-cudaHostRegisterDefault = cyruntime.cudaHostRegisterDefault
-
-#: Pinned memory accessible by all CUDA contexts
-cudaHostRegisterPortable = cyruntime.cudaHostRegisterPortable
-
-#: Map registered memory into device space
-cudaHostRegisterMapped = cyruntime.cudaHostRegisterMapped
-
-#: Memory-mapped I/O space
-cudaHostRegisterIoMemory = cyruntime.cudaHostRegisterIoMemory
-
-#: Memory-mapped read-only
-cudaHostRegisterReadOnly = cyruntime.cudaHostRegisterReadOnly
-
-#: Default peer addressing enable flag
-cudaPeerAccessDefault = cyruntime.cudaPeerAccessDefault
-
-#: Default stream flag
-cudaStreamDefault = cyruntime.cudaStreamDefault
-
-#: Stream does not synchronize with stream 0 (the NULL stream)
-cudaStreamNonBlocking = cyruntime.cudaStreamNonBlocking
-
-#: Legacy stream handle
-#:
-#: Stream handle that can be passed as a cudaStream_t to use an implicit
-#: stream with legacy synchronization behavior.
-#:
-#: See details of the \link_sync_behavior
-cudaStreamLegacy = cyruntime.cudaStreamLegacy
-
-#: Per-thread stream handle
-#:
-#: Stream handle that can be passed as a cudaStream_t to use an implicit
-#: stream with per-thread synchronization behavior.
-#:
-#: See details of the \link_sync_behavior
-cudaStreamPerThread = cyruntime.cudaStreamPerThread
-
-#: Default event flag
-cudaEventDefault = cyruntime.cudaEventDefault
-
-#: Event uses blocking synchronization
-cudaEventBlockingSync = cyruntime.cudaEventBlockingSync
-
-#: Event will not record timing data
-cudaEventDisableTiming = cyruntime.cudaEventDisableTiming
-
-#: Event is suitable for interprocess use. cudaEventDisableTiming must be
-#: set
-cudaEventInterprocess = cyruntime.cudaEventInterprocess
-
-#: Default event record flag
-cudaEventRecordDefault = cyruntime.cudaEventRecordDefault
-
-#: Event is captured in the graph as an external event node when performing
-#: stream capture
-cudaEventRecordExternal = cyruntime.cudaEventRecordExternal
-
-#: Default event wait flag
-cudaEventWaitDefault = cyruntime.cudaEventWaitDefault
-
-#: Event is captured in the graph as an external event node when performing
-#: stream capture
-cudaEventWaitExternal = cyruntime.cudaEventWaitExternal
-
-#: Device flag - Automatic scheduling
-cudaDeviceScheduleAuto = cyruntime.cudaDeviceScheduleAuto
-
-#: Device flag - Spin default scheduling
-cudaDeviceScheduleSpin = cyruntime.cudaDeviceScheduleSpin
-
-#: Device flag - Yield default scheduling
-cudaDeviceScheduleYield = cyruntime.cudaDeviceScheduleYield
-
-#: Device flag - Use blocking synchronization
-cudaDeviceScheduleBlockingSync = cyruntime.cudaDeviceScheduleBlockingSync
-
-#: Device flag - Use blocking synchronization [Deprecated]
-cudaDeviceBlockingSync = cyruntime.cudaDeviceBlockingSync
-
-#: Device schedule flags mask
-cudaDeviceScheduleMask = cyruntime.cudaDeviceScheduleMask
-
-#: Device flag - Support mapped pinned allocations
-cudaDeviceMapHost = cyruntime.cudaDeviceMapHost
-
-#: Device flag - Keep local memory allocation after launch
-cudaDeviceLmemResizeToMax = cyruntime.cudaDeviceLmemResizeToMax
-
-#: Device flag - Ensure synchronous memory operations on this context will
-#: synchronize
-cudaDeviceSyncMemops = cyruntime.cudaDeviceSyncMemops
-
-#: Device flags mask
-cudaDeviceMask = cyruntime.cudaDeviceMask
-
-#: Default CUDA array allocation flag
-cudaArrayDefault = cyruntime.cudaArrayDefault
-
-#: Must be set in cudaMalloc3DArray to create a layered CUDA array
-cudaArrayLayered = cyruntime.cudaArrayLayered
-
-#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind
-#: surfaces to the CUDA array
-cudaArraySurfaceLoadStore = cyruntime.cudaArraySurfaceLoadStore
-
-#: Must be set in cudaMalloc3DArray to create a cubemap CUDA array
-cudaArrayCubemap = cyruntime.cudaArrayCubemap
-
-#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform
-#: texture gather operations on the CUDA array
-cudaArrayTextureGather = cyruntime.cudaArrayTextureGather
-
-#: Must be set in cudaExternalMemoryGetMappedMipmappedArray if the
-#: mipmapped array is used as a color target in a graphics API
-cudaArrayColorAttachment = cyruntime.cudaArrayColorAttachment
-
-#: Must be set in cudaMallocArray, cudaMalloc3DArray or
-#: cudaMallocMipmappedArray in order to create a sparse CUDA array or CUDA
-#: mipmapped array
-cudaArraySparse = cyruntime.cudaArraySparse
-
-#: Must be set in cudaMallocArray, cudaMalloc3DArray or
-#: cudaMallocMipmappedArray in order to create a deferred mapping CUDA
-#: array or CUDA mipmapped array
-cudaArrayDeferredMapping = cyruntime.cudaArrayDeferredMapping
-
-#: Automatically enable peer access between remote devices as needed
-cudaIpcMemLazyEnablePeerAccess = cyruntime.cudaIpcMemLazyEnablePeerAccess
-
-#: Memory can be accessed by any stream on any device
-cudaMemAttachGlobal = cyruntime.cudaMemAttachGlobal
-
-#: Memory cannot be accessed by any stream on any device
-cudaMemAttachHost = cyruntime.cudaMemAttachHost
-
-#: Memory can only be accessed by a single stream on the associated device
-cudaMemAttachSingle = cyruntime.cudaMemAttachSingle
-
-#: Default behavior
-cudaOccupancyDefault = cyruntime.cudaOccupancyDefault
-
-#: Assume global caching is enabled and cannot be automatically turned off
-cudaOccupancyDisableCachingOverride = cyruntime.cudaOccupancyDisableCachingOverride
-
-#: Device id that represents the CPU
-cudaCpuDeviceId = cyruntime.cudaCpuDeviceId
-
-#: Device id that represents an invalid device
-cudaInvalidDeviceId = cyruntime.cudaInvalidDeviceId
-
-#: Tell the CUDA runtime that DeviceFlags is being set in cudaInitDevice
-#: call
-cudaInitDeviceFlagsAreValid = cyruntime.cudaInitDeviceFlagsAreValid
-
-#: If set, each kernel launched as part of
-#: :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` only waits for prior
-#: work in the stream corresponding to that GPU to complete before the
-#: kernel begins execution.
-cudaCooperativeLaunchMultiDeviceNoPreSync = cyruntime.cudaCooperativeLaunchMultiDeviceNoPreSync
-
-#: If set, any subsequent work pushed in a stream that participated in a
-#: call to :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` will only
-#: wait for the kernel launched on the GPU corresponding to that stream to
-#: complete before it begins execution.
-cudaCooperativeLaunchMultiDeviceNoPostSync = cyruntime.cudaCooperativeLaunchMultiDeviceNoPostSync
-
-#: Indicates that the layered sparse CUDA array or CUDA mipmapped array has
-#: a single mip tail region for all layers
-cudaArraySparsePropertiesSingleMipTail = cyruntime.cudaArraySparsePropertiesSingleMipTail
-
-#: CUDA IPC Handle Size
-CUDA_IPC_HANDLE_SIZE = cyruntime.CUDA_IPC_HANDLE_SIZE
-
-#: Indicates that the external memory object is a dedicated resource
-cudaExternalMemoryDedicated = cyruntime.cudaExternalMemoryDedicated
-
-#: When the /p flags parameter of
-#: :py:obj:`~.cudaExternalSemaphoreSignalParams` contains this flag, it
-#: indicates that signaling an external semaphore object should skip
-#: performing appropriate memory synchronization operations over all the
-#: external memory objects that are imported as
-#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are
-#: performed by default to ensure data coherency with other importers of
-#: the same NvSciBuf memory objects.
-cudaExternalSemaphoreSignalSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreSignalSkipNvSciBufMemSync
-
-#: When the /p flags parameter of
-#: :py:obj:`~.cudaExternalSemaphoreWaitParams` contains this flag, it
-#: indicates that waiting an external semaphore object should skip
-#: performing appropriate memory synchronization operations over all the
-#: external memory objects that are imported as
-#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are
-#: performed by default to ensure data coherency with other importers of
-#: the same NvSciBuf memory objects.
-cudaExternalSemaphoreWaitSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreWaitSkipNvSciBufMemSync
-
-#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to
-#: this, it indicates that application need signaler specific NvSciSyncAttr
-#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
-cudaNvSciSyncAttrSignal = cyruntime.cudaNvSciSyncAttrSignal
-
-#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to
-#: this, it indicates that application need waiter specific NvSciSyncAttr
-#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
-cudaNvSciSyncAttrWait = cyruntime.cudaNvSciSyncAttrWait
-
-#: This port activates when the kernel has finished executing.
-cudaGraphKernelNodePortDefault = cyruntime.cudaGraphKernelNodePortDefault
-
-#: This port activates when all blocks of the kernel have performed
-#: cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be
-#: used with edge type :py:obj:`~.cudaGraphDependencyTypeProgrammatic`. See
-#: also :py:obj:`~.cudaLaunchAttributeProgrammaticEvent`.
-cudaGraphKernelNodePortProgrammatic = cyruntime.cudaGraphKernelNodePortProgrammatic
-
-#: This port activates when all blocks of the kernel have begun execution.
-#: See also :py:obj:`~.cudaLaunchAttributeLaunchCompletionEvent`.
-cudaGraphKernelNodePortLaunchCompletion = cyruntime.cudaGraphKernelNodePortLaunchCompletion
-
-cudaStreamAttributeAccessPolicyWindow = cyruntime.cudaStreamAttributeAccessPolicyWindow
-
-cudaStreamAttributeSynchronizationPolicy = cyruntime.cudaStreamAttributeSynchronizationPolicy
-
-cudaStreamAttributeMemSyncDomainMap = cyruntime.cudaStreamAttributeMemSyncDomainMap
-
-cudaStreamAttributeMemSyncDomain = cyruntime.cudaStreamAttributeMemSyncDomain
-
-cudaStreamAttributePriority = cyruntime.cudaStreamAttributePriority
-
-cudaKernelNodeAttributeAccessPolicyWindow = cyruntime.cudaKernelNodeAttributeAccessPolicyWindow
-
-cudaKernelNodeAttributeCooperative = cyruntime.cudaKernelNodeAttributeCooperative
-
-cudaKernelNodeAttributePriority = cyruntime.cudaKernelNodeAttributePriority
-
-cudaKernelNodeAttributeClusterDimension = cyruntime.cudaKernelNodeAttributeClusterDimension
-
-cudaKernelNodeAttributeClusterSchedulingPolicyPreference = cyruntime.cudaKernelNodeAttributeClusterSchedulingPolicyPreference
-
-cudaKernelNodeAttributeMemSyncDomainMap = cyruntime.cudaKernelNodeAttributeMemSyncDomainMap
-
-cudaKernelNodeAttributeMemSyncDomain = cyruntime.cudaKernelNodeAttributeMemSyncDomain
-
-cudaKernelNodeAttributePreferredSharedMemoryCarveout = cyruntime.cudaKernelNodeAttributePreferredSharedMemoryCarveout
-
-cudaKernelNodeAttributeDeviceUpdatableKernelNode = cyruntime.cudaKernelNodeAttributeDeviceUpdatableKernelNode
-
-cudaSurfaceType1D = cyruntime.cudaSurfaceType1D
-
-cudaSurfaceType2D = cyruntime.cudaSurfaceType2D
-
-cudaSurfaceType3D = cyruntime.cudaSurfaceType3D
-
-cudaSurfaceTypeCubemap = cyruntime.cudaSurfaceTypeCubemap
-
-cudaSurfaceType1DLayered = cyruntime.cudaSurfaceType1DLayered
-
-cudaSurfaceType2DLayered = cyruntime.cudaSurfaceType2DLayered
-
-cudaSurfaceTypeCubemapLayered = cyruntime.cudaSurfaceTypeCubemapLayered
-
-cudaTextureType1D = cyruntime.cudaTextureType1D
-
-cudaTextureType2D = cyruntime.cudaTextureType2D
-
-cudaTextureType3D = cyruntime.cudaTextureType3D
-
-cudaTextureTypeCubemap = cyruntime.cudaTextureTypeCubemap
-
-cudaTextureType1DLayered = cyruntime.cudaTextureType1DLayered
-
-cudaTextureType2DLayered = cyruntime.cudaTextureType2DLayered
-
-cudaTextureTypeCubemapLayered = cyruntime.cudaTextureTypeCubemapLayered
-
-#: CUDA Runtime API Version
-CUDART_VERSION = cyruntime.CUDART_VERSION
-
-__CUDART_API_VERSION = cyruntime.__CUDART_API_VERSION
-
-#: Maximum number of planes per frame
-CUDA_EGL_MAX_PLANES = cyruntime.CUDA_EGL_MAX_PLANES
-
-{{if 'cudaError' in found_types}}
-
-class cudaError_t(IntEnum):
-    """
-    impl_private CUDA error types
-    """
-    {{if 'cudaSuccess' in found_values}}
-
-    #: The API call returned with no errors. In the case of query calls,
-    #: this also means that the operation being queried is complete (see
-    #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`).
-    cudaSuccess = cyruntime.cudaError.cudaSuccess{{endif}}
-    {{if 'cudaErrorInvalidValue' in found_values}}
-
-    #: This indicates that one or more of the parameters passed to the API
-    #: call is not within an acceptable range of values.
-    cudaErrorInvalidValue = cyruntime.cudaError.cudaErrorInvalidValue{{endif}}
-    {{if 'cudaErrorMemoryAllocation' in found_values}}
-
-    #: The API call failed because it was unable to allocate enough memory
-    #: or other resources to perform the requested operation.
-    cudaErrorMemoryAllocation = cyruntime.cudaError.cudaErrorMemoryAllocation{{endif}}
-    {{if 'cudaErrorInitializationError' in found_values}}
-
-    #: The API call failed because the CUDA driver and runtime could not be
-    #: initialized.
-    cudaErrorInitializationError = cyruntime.cudaError.cudaErrorInitializationError{{endif}}
-    {{if 'cudaErrorCudartUnloading' in found_values}}
-
-    #: This indicates that a CUDA Runtime API call cannot be executed
-    #: because it is being called during process shut down, at a point in
-    #: time after CUDA driver has been unloaded.
-    cudaErrorCudartUnloading = cyruntime.cudaError.cudaErrorCudartUnloading{{endif}}
-    {{if 'cudaErrorProfilerDisabled' in found_values}}
-
-    #: This indicates profiler is not initialized for this run. This can
-    #: happen when the application is running with external profiling tools
-    #: like visual profiler.
-    cudaErrorProfilerDisabled = cyruntime.cudaError.cudaErrorProfilerDisabled{{endif}}
-    {{if 'cudaErrorProfilerNotInitialized' in found_values}}
-
-    #: [Deprecated]
-    cudaErrorProfilerNotInitialized = cyruntime.cudaError.cudaErrorProfilerNotInitialized{{endif}}
-    {{if 'cudaErrorProfilerAlreadyStarted' in found_values}}
-
-    #: [Deprecated]
-    cudaErrorProfilerAlreadyStarted = cyruntime.cudaError.cudaErrorProfilerAlreadyStarted{{endif}}
-    {{if 'cudaErrorProfilerAlreadyStopped' in found_values}}
-
-    #: [Deprecated]
-    cudaErrorProfilerAlreadyStopped = cyruntime.cudaError.cudaErrorProfilerAlreadyStopped{{endif}}
-    {{if 'cudaErrorInvalidConfiguration' in found_values}}
-
-    #: This indicates that a kernel launch is requesting resources that can
-    #: never be satisfied by the current device. Requesting more shared
-    #: memory per block than the device supports will trigger this error,
-    #: as will requesting too many threads or blocks. See
-    #: :py:obj:`~.cudaDeviceProp` for more device limitations.
-    cudaErrorInvalidConfiguration = cyruntime.cudaError.cudaErrorInvalidConfiguration{{endif}}
-    {{if 'cudaErrorInvalidPitchValue' in found_values}}
-
-    #: This indicates that one or more of the pitch-related parameters
-    #: passed to the API call is not within the acceptable range for pitch.
-    cudaErrorInvalidPitchValue = cyruntime.cudaError.cudaErrorInvalidPitchValue{{endif}}
-    {{if 'cudaErrorInvalidSymbol' in found_values}}
-
-    #: This indicates that the symbol name/identifier passed to the API
-    #: call is not a valid name or identifier.
-    cudaErrorInvalidSymbol = cyruntime.cudaError.cudaErrorInvalidSymbol{{endif}}
-    {{if 'cudaErrorInvalidHostPointer' in found_values}}
-
-    #: This indicates that at least one host pointer passed to the API call
-    #: is not a valid host pointer. [Deprecated]
-    cudaErrorInvalidHostPointer = cyruntime.cudaError.cudaErrorInvalidHostPointer{{endif}}
-    {{if 'cudaErrorInvalidDevicePointer' in found_values}}
-
-    #: This indicates that at least one device pointer passed to the API
-    #: call is not a valid device pointer. [Deprecated]
-    cudaErrorInvalidDevicePointer = cyruntime.cudaError.cudaErrorInvalidDevicePointer{{endif}}
-    {{if 'cudaErrorInvalidTexture' in found_values}}
-
-    #: This indicates that the texture passed to the API call is not a
-    #: valid texture.
-    cudaErrorInvalidTexture = cyruntime.cudaError.cudaErrorInvalidTexture{{endif}}
-    {{if 'cudaErrorInvalidTextureBinding' in found_values}}
-
-    #: This indicates that the texture binding is not valid. This occurs if
-    #: you call :py:obj:`~.cudaGetTextureAlignmentOffset()` with an unbound
-    #: texture.
-    cudaErrorInvalidTextureBinding = cyruntime.cudaError.cudaErrorInvalidTextureBinding{{endif}}
-    {{if 'cudaErrorInvalidChannelDescriptor' in found_values}}
-
-    #: This indicates that the channel descriptor passed to the API call is
-    #: not valid. This occurs if the format is not one of the formats
-    #: specified by :py:obj:`~.cudaChannelFormatKind`, or if one of the
-    #: dimensions is invalid.
-    cudaErrorInvalidChannelDescriptor = cyruntime.cudaError.cudaErrorInvalidChannelDescriptor{{endif}}
-    {{if 'cudaErrorInvalidMemcpyDirection' in found_values}}
-
-    #: This indicates that the direction of the memcpy passed to the API
-    #: call is not one of the types specified by
-    #: :py:obj:`~.cudaMemcpyKind`.
-    cudaErrorInvalidMemcpyDirection = cyruntime.cudaError.cudaErrorInvalidMemcpyDirection{{endif}}
-    {{if 'cudaErrorAddressOfConstant' in found_values}}
-
-    #: This indicated that the user has taken the address of a constant
-    #: variable, which was forbidden up until the CUDA 3.1 release.
-    #: [Deprecated]
-    cudaErrorAddressOfConstant = cyruntime.cudaError.cudaErrorAddressOfConstant{{endif}}
-    {{if 'cudaErrorTextureFetchFailed' in found_values}}
-
-    #: This indicated that a texture fetch was not able to be performed.
-    #: This was previously used for device emulation of texture operations.
-    #: [Deprecated]
-    cudaErrorTextureFetchFailed = cyruntime.cudaError.cudaErrorTextureFetchFailed{{endif}}
-    {{if 'cudaErrorTextureNotBound' in found_values}}
-
-    #: This indicated that a texture was not bound for access. This was
-    #: previously used for device emulation of texture operations.
-    #: [Deprecated]
-    cudaErrorTextureNotBound = cyruntime.cudaError.cudaErrorTextureNotBound{{endif}}
-    {{if 'cudaErrorSynchronizationError' in found_values}}
-
-    #: This indicated that a synchronization operation had failed. This was
-    #: previously used for some device emulation functions. [Deprecated]
-    cudaErrorSynchronizationError = cyruntime.cudaError.cudaErrorSynchronizationError{{endif}}
-    {{if 'cudaErrorInvalidFilterSetting' in found_values}}
-
-    #: This indicates that a non-float texture was being accessed with
-    #: linear filtering. This is not supported by CUDA.
-    cudaErrorInvalidFilterSetting = cyruntime.cudaError.cudaErrorInvalidFilterSetting{{endif}}
-    {{if 'cudaErrorInvalidNormSetting' in found_values}}
-
-    #: This indicates that an attempt was made to read a non-float texture
-    #: as a normalized float. This is not supported by CUDA.
-    cudaErrorInvalidNormSetting = cyruntime.cudaError.cudaErrorInvalidNormSetting{{endif}}
-    {{if 'cudaErrorMixedDeviceExecution' in found_values}}
-
-    #: Mixing of device and device emulation code was not allowed.
-    #: [Deprecated]
-    cudaErrorMixedDeviceExecution = cyruntime.cudaError.cudaErrorMixedDeviceExecution{{endif}}
-    {{if 'cudaErrorNotYetImplemented' in found_values}}
-
-    #: This indicates that the API call is not yet implemented. Production
-    #: releases of CUDA will never return this error. [Deprecated]
-    cudaErrorNotYetImplemented = cyruntime.cudaError.cudaErrorNotYetImplemented{{endif}}
-    {{if 'cudaErrorMemoryValueTooLarge' in found_values}}
-
-    #: This indicated that an emulated device pointer exceeded the 32-bit
-    #: address range. [Deprecated]
-    cudaErrorMemoryValueTooLarge = cyruntime.cudaError.cudaErrorMemoryValueTooLarge{{endif}}
-    {{if 'cudaErrorStubLibrary' in found_values}}
-
-    #: This indicates that the CUDA driver that the application has loaded
-    #: is a stub library. Applications that run with the stub rather than a
-    #: real driver loaded will result in CUDA API returning this error.
-    cudaErrorStubLibrary = cyruntime.cudaError.cudaErrorStubLibrary{{endif}}
-    {{if 'cudaErrorInsufficientDriver' in found_values}}
-
-    #: This indicates that the installed NVIDIA CUDA driver is older than
-    #: the CUDA runtime library. This is not a supported configuration.
-    #: Users should install an updated NVIDIA display driver to allow the
-    #: application to run.
-    cudaErrorInsufficientDriver = cyruntime.cudaError.cudaErrorInsufficientDriver{{endif}}
-    {{if 'cudaErrorCallRequiresNewerDriver' in found_values}}
-
-    #: This indicates that the API call requires a newer CUDA driver than
-    #: the one currently installed. Users should install an updated NVIDIA
-    #: CUDA driver to allow the API call to succeed.
-    cudaErrorCallRequiresNewerDriver = cyruntime.cudaError.cudaErrorCallRequiresNewerDriver{{endif}}
-    {{if 'cudaErrorInvalidSurface' in found_values}}
-
-    #: This indicates that the surface passed to the API call is not a
-    #: valid surface.
-    cudaErrorInvalidSurface = cyruntime.cudaError.cudaErrorInvalidSurface{{endif}}
-    {{if 'cudaErrorDuplicateVariableName' in found_values}}
-
-    #: This indicates that multiple global or constant variables (across
-    #: separate CUDA source files in the application) share the same string
-    #: name.
-    cudaErrorDuplicateVariableName = cyruntime.cudaError.cudaErrorDuplicateVariableName{{endif}}
-    {{if 'cudaErrorDuplicateTextureName' in found_values}}
-
-    #: This indicates that multiple textures (across separate CUDA source
-    #: files in the application) share the same string name.
-    cudaErrorDuplicateTextureName = cyruntime.cudaError.cudaErrorDuplicateTextureName{{endif}}
-    {{if 'cudaErrorDuplicateSurfaceName' in found_values}}
-
-    #: This indicates that multiple surfaces (across separate CUDA source
-    #: files in the application) share the same string name.
-    cudaErrorDuplicateSurfaceName = cyruntime.cudaError.cudaErrorDuplicateSurfaceName{{endif}}
-    {{if 'cudaErrorDevicesUnavailable' in found_values}}
-
-    #: This indicates that all CUDA devices are busy or unavailable at the
-    #: current time. Devices are often busy/unavailable due to use of
-    #: :py:obj:`~.cudaComputeModeProhibited`,
-    #: :py:obj:`~.cudaComputeModeExclusiveProcess`, or when long running
-    #: CUDA kernels have filled up the GPU and are blocking new work from
-    #: starting. They can also be unavailable due to memory constraints on
-    #: a device that already has active CUDA work being performed.
-    cudaErrorDevicesUnavailable = cyruntime.cudaError.cudaErrorDevicesUnavailable{{endif}}
-    {{if 'cudaErrorIncompatibleDriverContext' in found_values}}
-
-    #: This indicates that the current context is not compatible with this
-    #: the CUDA Runtime. This can only occur if you are using CUDA
-    #: Runtime/Driver interoperability and have created an existing Driver
-    #: context using the driver API. The Driver context may be incompatible
-    #: either because the Driver context was created using an older version
-    #: of the API, because the Runtime API call expects a primary driver
-    #: context and the Driver context is not primary, or because the Driver
-    #: context has been destroyed. Please see :py:obj:`~.Interactions`with
-    #: the CUDA Driver API" for more information.
-    cudaErrorIncompatibleDriverContext = cyruntime.cudaError.cudaErrorIncompatibleDriverContext{{endif}}
-    {{if 'cudaErrorMissingConfiguration' in found_values}}
-
-    #: The device function being invoked (usually via
-    #: :py:obj:`~.cudaLaunchKernel()`) was not previously configured via
-    #: the :py:obj:`~.cudaConfigureCall()` function.
-    cudaErrorMissingConfiguration = cyruntime.cudaError.cudaErrorMissingConfiguration{{endif}}
-    {{if 'cudaErrorPriorLaunchFailure' in found_values}}
-
-    #: This indicated that a previous kernel launch failed. This was
-    #: previously used for device emulation of kernel launches.
-    #: [Deprecated]
-    cudaErrorPriorLaunchFailure = cyruntime.cudaError.cudaErrorPriorLaunchFailure{{endif}}
-    {{if 'cudaErrorLaunchMaxDepthExceeded' in found_values}}
-
-    #: This error indicates that a device runtime grid launch did not occur
-    #: because the depth of the child grid would exceed the maximum
-    #: supported number of nested grid launches.
-    cudaErrorLaunchMaxDepthExceeded = cyruntime.cudaError.cudaErrorLaunchMaxDepthExceeded{{endif}}
-    {{if 'cudaErrorLaunchFileScopedTex' in found_values}}
-
-    #: This error indicates that a grid launch did not occur because the
-    #: kernel uses file-scoped textures which are unsupported by the device
-    #: runtime. Kernels launched via the device runtime only support
-    #: textures created with the Texture Object API's.
-    cudaErrorLaunchFileScopedTex = cyruntime.cudaError.cudaErrorLaunchFileScopedTex{{endif}}
-    {{if 'cudaErrorLaunchFileScopedSurf' in found_values}}
-
-    #: This error indicates that a grid launch did not occur because the
-    #: kernel uses file-scoped surfaces which are unsupported by the device
-    #: runtime. Kernels launched via the device runtime only support
-    #: surfaces created with the Surface Object API's.
-    cudaErrorLaunchFileScopedSurf = cyruntime.cudaError.cudaErrorLaunchFileScopedSurf{{endif}}
-    {{if 'cudaErrorSyncDepthExceeded' in found_values}}
-
-    #: This error indicates that a call to
-    #: :py:obj:`~.cudaDeviceSynchronize` made from the device runtime
-    #: failed because the call was made at grid depth greater than than
-    #: either the default (2 levels of grids) or user specified device
-    #: limit :py:obj:`~.cudaLimitDevRuntimeSyncDepth`. To be able to
-    #: synchronize on launched grids at a greater depth successfully, the
-    #: maximum nested depth at which :py:obj:`~.cudaDeviceSynchronize` will
-    #: be called must be specified with the
-    #: :py:obj:`~.cudaLimitDevRuntimeSyncDepth` limit to the
-    #: :py:obj:`~.cudaDeviceSetLimit` api before the host-side launch of a
-    #: kernel using the device runtime. Keep in mind that additional levels
-    #: of sync depth require the runtime to reserve large amounts of device
-    #: memory that cannot be used for user allocations. Note that
-    #: :py:obj:`~.cudaDeviceSynchronize` made from device runtime is only
-    #: supported on devices of compute capability < 9.0.
-    cudaErrorSyncDepthExceeded = cyruntime.cudaError.cudaErrorSyncDepthExceeded{{endif}}
-    {{if 'cudaErrorLaunchPendingCountExceeded' in found_values}}
-
-    #: This error indicates that a device runtime grid launch failed
-    #: because the launch would exceed the limit
-    #: :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`. For this launch
-    #: to proceed successfully, :py:obj:`~.cudaDeviceSetLimit` must be
-    #: called to set the :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`
-    #: to be higher than the upper bound of outstanding launches that can
-    #: be issued to the device runtime. Keep in mind that raising the limit
-    #: of pending device runtime launches will require the runtime to
-    #: reserve device memory that cannot be used for user allocations.
-    cudaErrorLaunchPendingCountExceeded = cyruntime.cudaError.cudaErrorLaunchPendingCountExceeded{{endif}}
-    {{if 'cudaErrorInvalidDeviceFunction' in found_values}}
-
-    #: The requested device function does not exist or is not compiled for
-    #: the proper device architecture.
-    cudaErrorInvalidDeviceFunction = cyruntime.cudaError.cudaErrorInvalidDeviceFunction{{endif}}
-    {{if 'cudaErrorNoDevice' in found_values}}
-
-    #: This indicates that no CUDA-capable devices were detected by the
-    #: installed CUDA driver.
-    cudaErrorNoDevice = cyruntime.cudaError.cudaErrorNoDevice{{endif}}
-    {{if 'cudaErrorInvalidDevice' in found_values}}
-
-    #: This indicates that the device ordinal supplied by the user does not
-    #: correspond to a valid CUDA device or that the action requested is
-    #: invalid for the specified device.
-    cudaErrorInvalidDevice = cyruntime.cudaError.cudaErrorInvalidDevice{{endif}}
-    {{if 'cudaErrorDeviceNotLicensed' in found_values}}
-
-    #: This indicates that the device doesn't have a valid Grid License.
-    cudaErrorDeviceNotLicensed = cyruntime.cudaError.cudaErrorDeviceNotLicensed{{endif}}
-    {{if 'cudaErrorSoftwareValidityNotEstablished' in found_values}}
-
-    #: By default, the CUDA runtime may perform a minimal set of self-
-    #: tests, as well as CUDA driver tests, to establish the validity of
-    #: both. Introduced in CUDA 11.2, this error return indicates that at
-    #: least one of these tests has failed and the validity of either the
-    #: runtime or the driver could not be established.
-    cudaErrorSoftwareValidityNotEstablished = cyruntime.cudaError.cudaErrorSoftwareValidityNotEstablished{{endif}}
-    {{if 'cudaErrorStartupFailure' in found_values}}
-
-    #: This indicates an internal startup failure in the CUDA runtime.
-    cudaErrorStartupFailure = cyruntime.cudaError.cudaErrorStartupFailure{{endif}}
-    {{if 'cudaErrorInvalidKernelImage' in found_values}}
-
-    #: This indicates that the device kernel image is invalid.
-    cudaErrorInvalidKernelImage = cyruntime.cudaError.cudaErrorInvalidKernelImage{{endif}}
-    {{if 'cudaErrorDeviceUninitialized' in found_values}}
-
-    #: This most frequently indicates that there is no context bound to the
-    #: current thread. This can also be returned if the context passed to
-    #: an API call is not a valid handle (such as a context that has had
-    #: :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned
-    #: if a user mixes different API versions (i.e. 3010 context with 3020
-    #: API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details.
-    cudaErrorDeviceUninitialized = cyruntime.cudaError.cudaErrorDeviceUninitialized{{endif}}
-    {{if 'cudaErrorMapBufferObjectFailed' in found_values}}
-
-    #: This indicates that the buffer object could not be mapped.
-    cudaErrorMapBufferObjectFailed = cyruntime.cudaError.cudaErrorMapBufferObjectFailed{{endif}}
-    {{if 'cudaErrorUnmapBufferObjectFailed' in found_values}}
-
-    #: This indicates that the buffer object could not be unmapped.
-    cudaErrorUnmapBufferObjectFailed = cyruntime.cudaError.cudaErrorUnmapBufferObjectFailed{{endif}}
-    {{if 'cudaErrorArrayIsMapped' in found_values}}
-
-    #: This indicates that the specified array is currently mapped and thus
-    #: cannot be destroyed.
-    cudaErrorArrayIsMapped = cyruntime.cudaError.cudaErrorArrayIsMapped{{endif}}
-    {{if 'cudaErrorAlreadyMapped' in found_values}}
-
-    #: This indicates that the resource is already mapped.
-    cudaErrorAlreadyMapped = cyruntime.cudaError.cudaErrorAlreadyMapped{{endif}}
-    {{if 'cudaErrorNoKernelImageForDevice' in found_values}}
-
-    #: This indicates that there is no kernel image available that is
-    #: suitable for the device. This can occur when a user specifies code
-    #: generation options for a particular CUDA source file that do not
-    #: include the corresponding device configuration.
-    cudaErrorNoKernelImageForDevice = cyruntime.cudaError.cudaErrorNoKernelImageForDevice{{endif}}
-    {{if 'cudaErrorAlreadyAcquired' in found_values}}
-
-    #: This indicates that a resource has already been acquired.
-    cudaErrorAlreadyAcquired = cyruntime.cudaError.cudaErrorAlreadyAcquired{{endif}}
-    {{if 'cudaErrorNotMapped' in found_values}}
-
-    #: This indicates that a resource is not mapped.
-    cudaErrorNotMapped = cyruntime.cudaError.cudaErrorNotMapped{{endif}}
-    {{if 'cudaErrorNotMappedAsArray' in found_values}}
-
-    #: This indicates that a mapped resource is not available for access as
-    #: an array.
-    cudaErrorNotMappedAsArray = cyruntime.cudaError.cudaErrorNotMappedAsArray{{endif}}
-    {{if 'cudaErrorNotMappedAsPointer' in found_values}}
-
-    #: This indicates that a mapped resource is not available for access as
-    #: a pointer.
-    cudaErrorNotMappedAsPointer = cyruntime.cudaError.cudaErrorNotMappedAsPointer{{endif}}
-    {{if 'cudaErrorECCUncorrectable' in found_values}}
-
-    #: This indicates that an uncorrectable ECC error was detected during
-    #: execution.
-    cudaErrorECCUncorrectable = cyruntime.cudaError.cudaErrorECCUncorrectable{{endif}}
-    {{if 'cudaErrorUnsupportedLimit' in found_values}}
-
-    #: This indicates that the :py:obj:`~.cudaLimit` passed to the API call
-    #: is not supported by the active device.
-    cudaErrorUnsupportedLimit = cyruntime.cudaError.cudaErrorUnsupportedLimit{{endif}}
-    {{if 'cudaErrorDeviceAlreadyInUse' in found_values}}
-
-    #: This indicates that a call tried to access an exclusive-thread
-    #: device that is already in use by a different thread.
-    cudaErrorDeviceAlreadyInUse = cyruntime.cudaError.cudaErrorDeviceAlreadyInUse{{endif}}
-    {{if 'cudaErrorPeerAccessUnsupported' in found_values}}
-
-    #: This error indicates that P2P access is not supported across the
-    #: given devices.
-    cudaErrorPeerAccessUnsupported = cyruntime.cudaError.cudaErrorPeerAccessUnsupported{{endif}}
-    {{if 'cudaErrorInvalidPtx' in found_values}}
-
-    #: A PTX compilation failed. The runtime may fall back to compiling PTX
-    #: if an application does not contain a suitable binary for the current
-    #: device.
-    cudaErrorInvalidPtx = cyruntime.cudaError.cudaErrorInvalidPtx{{endif}}
-    {{if 'cudaErrorInvalidGraphicsContext' in found_values}}
-
-    #: This indicates an error with the OpenGL or DirectX context.
-    cudaErrorInvalidGraphicsContext = cyruntime.cudaError.cudaErrorInvalidGraphicsContext{{endif}}
-    {{if 'cudaErrorNvlinkUncorrectable' in found_values}}
-
-    #: This indicates that an uncorrectable NVLink error was detected
-    #: during the execution.
-    cudaErrorNvlinkUncorrectable = cyruntime.cudaError.cudaErrorNvlinkUncorrectable{{endif}}
-    {{if 'cudaErrorJitCompilerNotFound' in found_values}}
-
-    #: This indicates that the PTX JIT compiler library was not found. The
-    #: JIT Compiler library is used for PTX compilation. The runtime may
-    #: fall back to compiling PTX if an application does not contain a
-    #: suitable binary for the current device.
-    cudaErrorJitCompilerNotFound = cyruntime.cudaError.cudaErrorJitCompilerNotFound{{endif}}
-    {{if 'cudaErrorUnsupportedPtxVersion' in found_values}}
-
-    #: This indicates that the provided PTX was compiled with an
-    #: unsupported toolchain. The most common reason for this, is the PTX
-    #: was generated by a compiler newer than what is supported by the CUDA
-    #: driver and PTX JIT compiler.
-    cudaErrorUnsupportedPtxVersion = cyruntime.cudaError.cudaErrorUnsupportedPtxVersion{{endif}}
-    {{if 'cudaErrorJitCompilationDisabled' in found_values}}
-
-    #: This indicates that the JIT compilation was disabled. The JIT
-    #: compilation compiles PTX. The runtime may fall back to compiling PTX
-    #: if an application does not contain a suitable binary for the current
-    #: device.
-    cudaErrorJitCompilationDisabled = cyruntime.cudaError.cudaErrorJitCompilationDisabled{{endif}}
-    {{if 'cudaErrorUnsupportedExecAffinity' in found_values}}
-
-    #: This indicates that the provided execution affinity is not supported
-    #: by the device.
-    cudaErrorUnsupportedExecAffinity = cyruntime.cudaError.cudaErrorUnsupportedExecAffinity{{endif}}
-    {{if 'cudaErrorUnsupportedDevSideSync' in found_values}}
-
-    #: This indicates that the code to be compiled by the PTX JIT contains
-    #: unsupported call to cudaDeviceSynchronize.
-    cudaErrorUnsupportedDevSideSync = cyruntime.cudaError.cudaErrorUnsupportedDevSideSync{{endif}}
-    {{if 'cudaErrorInvalidSource' in found_values}}
-
-    #: This indicates that the device kernel source is invalid.
-    cudaErrorInvalidSource = cyruntime.cudaError.cudaErrorInvalidSource{{endif}}
-    {{if 'cudaErrorFileNotFound' in found_values}}
-
-    #: This indicates that the file specified was not found.
-    cudaErrorFileNotFound = cyruntime.cudaError.cudaErrorFileNotFound{{endif}}
-    {{if 'cudaErrorSharedObjectSymbolNotFound' in found_values}}
-
-    #: This indicates that a link to a shared object failed to resolve.
-    cudaErrorSharedObjectSymbolNotFound = cyruntime.cudaError.cudaErrorSharedObjectSymbolNotFound{{endif}}
-    {{if 'cudaErrorSharedObjectInitFailed' in found_values}}
-
-    #: This indicates that initialization of a shared object failed.
-    cudaErrorSharedObjectInitFailed = cyruntime.cudaError.cudaErrorSharedObjectInitFailed{{endif}}
-    {{if 'cudaErrorOperatingSystem' in found_values}}
-
-    #: This error indicates that an OS call failed.
-    cudaErrorOperatingSystem = cyruntime.cudaError.cudaErrorOperatingSystem{{endif}}
-    {{if 'cudaErrorInvalidResourceHandle' in found_values}}
-
-    #: This indicates that a resource handle passed to the API call was not
-    #: valid. Resource handles are opaque types like
-    #: :py:obj:`~.cudaStream_t` and :py:obj:`~.cudaEvent_t`.
-    cudaErrorInvalidResourceHandle = cyruntime.cudaError.cudaErrorInvalidResourceHandle{{endif}}
-    {{if 'cudaErrorIllegalState' in found_values}}
-
-    #: This indicates that a resource required by the API call is not in a
-    #: valid state to perform the requested operation.
-    cudaErrorIllegalState = cyruntime.cudaError.cudaErrorIllegalState{{endif}}
-    {{if 'cudaErrorLossyQuery' in found_values}}
-
-    #: This indicates an attempt was made to introspect an object in a way
-    #: that would discard semantically important information. This is
-    #: either due to the object using funtionality newer than the API
-    #: version used to introspect it or omission of optional return
-    #: arguments.
-    cudaErrorLossyQuery = cyruntime.cudaError.cudaErrorLossyQuery{{endif}}
-    {{if 'cudaErrorSymbolNotFound' in found_values}}
-
-    #: This indicates that a named symbol was not found. Examples of
-    #: symbols are global/constant variable names, driver function names,
-    #: texture names, and surface names.
-    cudaErrorSymbolNotFound = cyruntime.cudaError.cudaErrorSymbolNotFound{{endif}}
-    {{if 'cudaErrorNotReady' in found_values}}
-
-    #: This indicates that asynchronous operations issued previously have
-    #: not completed yet. This result is not actually an error, but must be
-    #: indicated differently than :py:obj:`~.cudaSuccess` (which indicates
-    #: completion). Calls that may return this value include
-    #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`.
-    cudaErrorNotReady = cyruntime.cudaError.cudaErrorNotReady{{endif}}
-    {{if 'cudaErrorIllegalAddress' in found_values}}
-
-    #: The device encountered a load or store instruction on an invalid
-    #: memory address. This leaves the process in an inconsistent state and
-    #: any further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    cudaErrorIllegalAddress = cyruntime.cudaError.cudaErrorIllegalAddress{{endif}}
-    {{if 'cudaErrorLaunchOutOfResources' in found_values}}
-
-    #: This indicates that a launch did not occur because it did not have
-    #: appropriate resources. Although this error is similar to
-    #: :py:obj:`~.cudaErrorInvalidConfiguration`, this error usually
-    #: indicates that the user has attempted to pass too many arguments to
-    #: the device kernel, or the kernel launch specifies too many threads
-    #: for the kernel's register count.
-    cudaErrorLaunchOutOfResources = cyruntime.cudaError.cudaErrorLaunchOutOfResources{{endif}}
-    {{if 'cudaErrorLaunchTimeout' in found_values}}
-
-    #: This indicates that the device kernel took too long to execute. This
-    #: can only occur if timeouts are enabled - see the device property
-    #: :py:obj:`~.kernelExecTimeoutEnabled` for more information. This
-    #: leaves the process in an inconsistent state and any further CUDA
-    #: work will return the same error. To continue using CUDA, the process
-    #: must be terminated and relaunched.
-    cudaErrorLaunchTimeout = cyruntime.cudaError.cudaErrorLaunchTimeout{{endif}}
-    {{if 'cudaErrorLaunchIncompatibleTexturing' in found_values}}
-
-    #: This error indicates a kernel launch that uses an incompatible
-    #: texturing mode.
-    cudaErrorLaunchIncompatibleTexturing = cyruntime.cudaError.cudaErrorLaunchIncompatibleTexturing{{endif}}
-    {{if 'cudaErrorPeerAccessAlreadyEnabled' in found_values}}
-
-    #: This error indicates that a call to
-    #: :py:obj:`~.cudaDeviceEnablePeerAccess()` is trying to re-enable peer
-    #: addressing on from a context which has already had peer addressing
-    #: enabled.
-    cudaErrorPeerAccessAlreadyEnabled = cyruntime.cudaError.cudaErrorPeerAccessAlreadyEnabled{{endif}}
-    {{if 'cudaErrorPeerAccessNotEnabled' in found_values}}
-
-    #: This error indicates that :py:obj:`~.cudaDeviceDisablePeerAccess()`
-    #: is trying to disable peer addressing which has not been enabled yet
-    #: via :py:obj:`~.cudaDeviceEnablePeerAccess()`.
-    cudaErrorPeerAccessNotEnabled = cyruntime.cudaError.cudaErrorPeerAccessNotEnabled{{endif}}
-    {{if 'cudaErrorSetOnActiveProcess' in found_values}}
-
-    #: This indicates that the user has called
-    #: :py:obj:`~.cudaSetValidDevices()`, :py:obj:`~.cudaSetDeviceFlags()`,
-    #: :py:obj:`~.cudaD3D9SetDirect3DDevice()`,
-    #: :py:obj:`~.cudaD3D10SetDirect3DDevice`,
-    #: :py:obj:`~.cudaD3D11SetDirect3DDevice()`, or
-    #: :py:obj:`~.cudaVDPAUSetVDPAUDevice()` after initializing the CUDA
-    #: runtime by calling non-device management operations (allocating
-    #: memory and launching kernels are examples of non-device management
-    #: operations). This error can also be returned if using runtime/driver
-    #: interoperability and there is an existing :py:obj:`~.CUcontext`
-    #: active on the host thread.
-    cudaErrorSetOnActiveProcess = cyruntime.cudaError.cudaErrorSetOnActiveProcess{{endif}}
-    {{if 'cudaErrorContextIsDestroyed' in found_values}}
-
-    #: This error indicates that the context current to the calling thread
-    #: has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary
-    #: context which has not yet been initialized.
-    cudaErrorContextIsDestroyed = cyruntime.cudaError.cudaErrorContextIsDestroyed{{endif}}
-    {{if 'cudaErrorAssert' in found_values}}
-
-    #: An assert triggered in device code during kernel execution. The
-    #: device cannot be used again. All existing allocations are invalid.
-    #: To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    cudaErrorAssert = cyruntime.cudaError.cudaErrorAssert{{endif}}
-    {{if 'cudaErrorTooManyPeers' in found_values}}
-
-    #: This error indicates that the hardware resources required to enable
-    #: peer access have been exhausted for one or more of the devices
-    #: passed to :py:obj:`~.cudaEnablePeerAccess()`.
-    cudaErrorTooManyPeers = cyruntime.cudaError.cudaErrorTooManyPeers{{endif}}
-    {{if 'cudaErrorHostMemoryAlreadyRegistered' in found_values}}
-
-    #: This error indicates that the memory range passed to
-    #: :py:obj:`~.cudaHostRegister()` has already been registered.
-    cudaErrorHostMemoryAlreadyRegistered = cyruntime.cudaError.cudaErrorHostMemoryAlreadyRegistered{{endif}}
-    {{if 'cudaErrorHostMemoryNotRegistered' in found_values}}
-
-    #: This error indicates that the pointer passed to
-    #: :py:obj:`~.cudaHostUnregister()` does not correspond to any
-    #: currently registered memory region.
-    cudaErrorHostMemoryNotRegistered = cyruntime.cudaError.cudaErrorHostMemoryNotRegistered{{endif}}
-    {{if 'cudaErrorHardwareStackError' in found_values}}
-
-    #: Device encountered an error in the call stack during kernel
-    #: execution, possibly due to stack corruption or exceeding the stack
-    #: size limit. This leaves the process in an inconsistent state and any
-    #: further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    cudaErrorHardwareStackError = cyruntime.cudaError.cudaErrorHardwareStackError{{endif}}
-    {{if 'cudaErrorIllegalInstruction' in found_values}}
-
-    #: The device encountered an illegal instruction during kernel
-    #: execution This leaves the process in an inconsistent state and any
-    #: further CUDA work will return the same error. To continue using
-    #: CUDA, the process must be terminated and relaunched.
-    cudaErrorIllegalInstruction = cyruntime.cudaError.cudaErrorIllegalInstruction{{endif}}
-    {{if 'cudaErrorMisalignedAddress' in found_values}}
-
-    #: The device encountered a load or store instruction on a memory
-    #: address which is not aligned. This leaves the process in an
-    #: inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    cudaErrorMisalignedAddress = cyruntime.cudaError.cudaErrorMisalignedAddress{{endif}}
-    {{if 'cudaErrorInvalidAddressSpace' in found_values}}
-
-    #: While executing a kernel, the device encountered an instruction
-    #: which can only operate on memory locations in certain address spaces
-    #: (global, shared, or local), but was supplied a memory address not
-    #: belonging to an allowed address space. This leaves the process in an
-    #: inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    cudaErrorInvalidAddressSpace = cyruntime.cudaError.cudaErrorInvalidAddressSpace{{endif}}
-    {{if 'cudaErrorInvalidPc' in found_values}}
-
-    #: The device encountered an invalid program counter. This leaves the
-    #: process in an inconsistent state and any further CUDA work will
-    #: return the same error. To continue using CUDA, the process must be
-    #: terminated and relaunched.
-    cudaErrorInvalidPc = cyruntime.cudaError.cudaErrorInvalidPc{{endif}}
-    {{if 'cudaErrorLaunchFailure' in found_values}}
-
-    #: An exception occurred on the device while executing a kernel. Common
-    #: causes include dereferencing an invalid device pointer and accessing
-    #: out of bounds shared memory. Less common cases can be system
-    #: specific - more information about these cases can be found in the
-    #: system specific user guide. This leaves the process in an
-    #: inconsistent state and any further CUDA work will return the same
-    #: error. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    cudaErrorLaunchFailure = cyruntime.cudaError.cudaErrorLaunchFailure{{endif}}
-    {{if 'cudaErrorCooperativeLaunchTooLarge' in found_values}}
-
-    #: This error indicates that the number of blocks launched per grid for
-    #: a kernel that was launched via either
-    #: :py:obj:`~.cudaLaunchCooperativeKernel` or
-    #: :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` exceeds the
-    #: maximum number of blocks as allowed by
-    #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor` or
-    #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
-    #: times the number of multiprocessors as specified by the device
-    #: attribute :py:obj:`~.cudaDevAttrMultiProcessorCount`.
-    cudaErrorCooperativeLaunchTooLarge = cyruntime.cudaError.cudaErrorCooperativeLaunchTooLarge{{endif}}
-    {{if 'cudaErrorNotPermitted' in found_values}}
-
-    #: This error indicates the attempted operation is not permitted.
-    cudaErrorNotPermitted = cyruntime.cudaError.cudaErrorNotPermitted{{endif}}
-    {{if 'cudaErrorNotSupported' in found_values}}
-
-    #: This error indicates the attempted operation is not supported on the
-    #: current system or device.
-    cudaErrorNotSupported = cyruntime.cudaError.cudaErrorNotSupported{{endif}}
-    {{if 'cudaErrorSystemNotReady' in found_values}}
-
-    #: This error indicates that the system is not yet ready to start any
-    #: CUDA work. To continue using CUDA, verify the system configuration
-    #: is in a valid state and all required driver daemons are actively
-    #: running. More information about this error can be found in the
-    #: system specific user guide.
-    cudaErrorSystemNotReady = cyruntime.cudaError.cudaErrorSystemNotReady{{endif}}
-    {{if 'cudaErrorSystemDriverMismatch' in found_values}}
-
-    #: This error indicates that there is a mismatch between the versions
-    #: of the display driver and the CUDA driver. Refer to the
-    #: compatibility documentation for supported versions.
-    cudaErrorSystemDriverMismatch = cyruntime.cudaError.cudaErrorSystemDriverMismatch{{endif}}
-    {{if 'cudaErrorCompatNotSupportedOnDevice' in found_values}}
-
-    #: This error indicates that the system was upgraded to run with
-    #: forward compatibility but the visible hardware detected by CUDA does
-    #: not support this configuration. Refer to the compatibility
-    #: documentation for the supported hardware matrix or ensure that only
-    #: supported hardware is visible during initialization via the
-    #: CUDA_VISIBLE_DEVICES environment variable.
-    cudaErrorCompatNotSupportedOnDevice = cyruntime.cudaError.cudaErrorCompatNotSupportedOnDevice{{endif}}
-    {{if 'cudaErrorMpsConnectionFailed' in found_values}}
-
-    #: This error indicates that the MPS client failed to connect to the
-    #: MPS control daemon or the MPS server.
-    cudaErrorMpsConnectionFailed = cyruntime.cudaError.cudaErrorMpsConnectionFailed{{endif}}
-    {{if 'cudaErrorMpsRpcFailure' in found_values}}
-
-    #: This error indicates that the remote procedural call between the MPS
-    #: server and the MPS client failed.
-    cudaErrorMpsRpcFailure = cyruntime.cudaError.cudaErrorMpsRpcFailure{{endif}}
-    {{if 'cudaErrorMpsServerNotReady' in found_values}}
-
-    #: This error indicates that the MPS server is not ready to accept new
-    #: MPS client requests. This error can be returned when the MPS server
-    #: is in the process of recovering from a fatal failure.
-    cudaErrorMpsServerNotReady = cyruntime.cudaError.cudaErrorMpsServerNotReady{{endif}}
-    {{if 'cudaErrorMpsMaxClientsReached' in found_values}}
-
-    #: This error indicates that the hardware resources required to create
-    #: MPS client have been exhausted.
-    cudaErrorMpsMaxClientsReached = cyruntime.cudaError.cudaErrorMpsMaxClientsReached{{endif}}
-    {{if 'cudaErrorMpsMaxConnectionsReached' in found_values}}
-
-    #: This error indicates the the hardware resources required to device
-    #: connections have been exhausted.
-    cudaErrorMpsMaxConnectionsReached = cyruntime.cudaError.cudaErrorMpsMaxConnectionsReached{{endif}}
-    {{if 'cudaErrorMpsClientTerminated' in found_values}}
-
-    #: This error indicates that the MPS client has been terminated by the
-    #: server. To continue using CUDA, the process must be terminated and
-    #: relaunched.
-    cudaErrorMpsClientTerminated = cyruntime.cudaError.cudaErrorMpsClientTerminated{{endif}}
-    {{if 'cudaErrorCdpNotSupported' in found_values}}
-
-    #: This error indicates, that the program is using CUDA Dynamic
-    #: Parallelism, but the current configuration, like MPS, does not
-    #: support it.
-    cudaErrorCdpNotSupported = cyruntime.cudaError.cudaErrorCdpNotSupported{{endif}}
-    {{if 'cudaErrorCdpVersionMismatch' in found_values}}
-
-    #: This error indicates, that the program contains an unsupported
-    #: interaction between different versions of CUDA Dynamic Parallelism.
-    cudaErrorCdpVersionMismatch = cyruntime.cudaError.cudaErrorCdpVersionMismatch{{endif}}
-    {{if 'cudaErrorStreamCaptureUnsupported' in found_values}}
-
-    #: The operation is not permitted when the stream is capturing.
-    cudaErrorStreamCaptureUnsupported = cyruntime.cudaError.cudaErrorStreamCaptureUnsupported{{endif}}
-    {{if 'cudaErrorStreamCaptureInvalidated' in found_values}}
-
-    #: The current capture sequence on the stream has been invalidated due
-    #: to a previous error.
-    cudaErrorStreamCaptureInvalidated = cyruntime.cudaError.cudaErrorStreamCaptureInvalidated{{endif}}
-    {{if 'cudaErrorStreamCaptureMerge' in found_values}}
-
-    #: The operation would have resulted in a merge of two independent
-    #: capture sequences.
-    cudaErrorStreamCaptureMerge = cyruntime.cudaError.cudaErrorStreamCaptureMerge{{endif}}
-    {{if 'cudaErrorStreamCaptureUnmatched' in found_values}}
-
-    #: The capture was not initiated in this stream.
-    cudaErrorStreamCaptureUnmatched = cyruntime.cudaError.cudaErrorStreamCaptureUnmatched{{endif}}
-    {{if 'cudaErrorStreamCaptureUnjoined' in found_values}}
-
-    #: The capture sequence contains a fork that was not joined to the
-    #: primary stream.
-    cudaErrorStreamCaptureUnjoined = cyruntime.cudaError.cudaErrorStreamCaptureUnjoined{{endif}}
-    {{if 'cudaErrorStreamCaptureIsolation' in found_values}}
-
-    #: A dependency would have been created which crosses the capture
-    #: sequence boundary. Only implicit in-stream ordering dependencies are
-    #: allowed to cross the boundary.
-    cudaErrorStreamCaptureIsolation = cyruntime.cudaError.cudaErrorStreamCaptureIsolation{{endif}}
-    {{if 'cudaErrorStreamCaptureImplicit' in found_values}}
-
-    #: The operation would have resulted in a disallowed implicit
-    #: dependency on a current capture sequence from cudaStreamLegacy.
-    cudaErrorStreamCaptureImplicit = cyruntime.cudaError.cudaErrorStreamCaptureImplicit{{endif}}
-    {{if 'cudaErrorCapturedEvent' in found_values}}
-
-    #: The operation is not permitted on an event which was last recorded
-    #: in a capturing stream.
-    cudaErrorCapturedEvent = cyruntime.cudaError.cudaErrorCapturedEvent{{endif}}
-    {{if 'cudaErrorStreamCaptureWrongThread' in found_values}}
-
-    #: A stream capture sequence not initiated with the
-    #: :py:obj:`~.cudaStreamCaptureModeRelaxed` argument to
-    #: :py:obj:`~.cudaStreamBeginCapture` was passed to
-    #: :py:obj:`~.cudaStreamEndCapture` in a different thread.
-    cudaErrorStreamCaptureWrongThread = cyruntime.cudaError.cudaErrorStreamCaptureWrongThread{{endif}}
-    {{if 'cudaErrorTimeout' in found_values}}
-
-    #: This indicates that the wait operation has timed out.
-    cudaErrorTimeout = cyruntime.cudaError.cudaErrorTimeout{{endif}}
-    {{if 'cudaErrorGraphExecUpdateFailure' in found_values}}
-
-    #: This error indicates that the graph update was not performed because
-    #: it included changes which violated constraints specific to
-    #: instantiated graph update.
-    cudaErrorGraphExecUpdateFailure = cyruntime.cudaError.cudaErrorGraphExecUpdateFailure{{endif}}
-    {{if 'cudaErrorExternalDevice' in found_values}}
-
-    #: This indicates that an async error has occurred in a device outside
-    #: of CUDA. If CUDA was waiting for an external device's signal before
-    #: consuming shared data, the external device signaled an error
-    #: indicating that the data is not valid for consumption. This leaves
-    #: the process in an inconsistent state and any further CUDA work will
-    #: return the same error. To continue using CUDA, the process must be
-    #: terminated and relaunched.
-    cudaErrorExternalDevice = cyruntime.cudaError.cudaErrorExternalDevice{{endif}}
-    {{if 'cudaErrorInvalidClusterSize' in found_values}}
-
-    #: This indicates that a kernel launch error has occurred due to
-    #: cluster misconfiguration.
-    cudaErrorInvalidClusterSize = cyruntime.cudaError.cudaErrorInvalidClusterSize{{endif}}
-    {{if 'cudaErrorFunctionNotLoaded' in found_values}}
-
-    #: Indiciates a function handle is not loaded when calling an API that
-    #: requires a loaded function.
-    cudaErrorFunctionNotLoaded = cyruntime.cudaError.cudaErrorFunctionNotLoaded{{endif}}
-    {{if 'cudaErrorInvalidResourceType' in found_values}}
-
-    #: This error indicates one or more resources passed in are not valid
-    #: resource types for the operation.
-    cudaErrorInvalidResourceType = cyruntime.cudaError.cudaErrorInvalidResourceType{{endif}}
-    {{if 'cudaErrorInvalidResourceConfiguration' in found_values}}
-
-    #: This error indicates one or more resources are insufficient or non-
-    #: applicable for the operation.
-    cudaErrorInvalidResourceConfiguration = cyruntime.cudaError.cudaErrorInvalidResourceConfiguration{{endif}}
-    {{if 'cudaErrorUnknown' in found_values}}
-
-    #: This indicates that an unknown internal error has occurred.
-    cudaErrorUnknown = cyruntime.cudaError.cudaErrorUnknown{{endif}}
-    {{if 'cudaErrorApiFailureBase' in found_values}}
-    cudaErrorApiFailureBase = cyruntime.cudaError.cudaErrorApiFailureBase{{endif}}
-{{endif}}
-{{if 'cudaGraphDependencyType_enum' in found_types}}
-
-class cudaGraphDependencyType(IntEnum):
-    """
-    Type annotations that can be applied to graph edges as part of
-    :py:obj:`~.cudaGraphEdgeData`.
-    """
-    {{if 'cudaGraphDependencyTypeDefault' in found_values}}
-
-    #: This is an ordinary dependency.
-    cudaGraphDependencyTypeDefault = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeDefault{{endif}}
-    {{if 'cudaGraphDependencyTypeProgrammatic' in found_values}}
-
-    #: This dependency type allows the downstream node to use
-    #: `cudaGridDependencySynchronize()`. It may only be used between
-    #: kernel nodes, and must be used with either the
-    #: :py:obj:`~.cudaGraphKernelNodePortProgrammatic` or
-    #: :py:obj:`~.cudaGraphKernelNodePortLaunchCompletion` outgoing port.
-    cudaGraphDependencyTypeProgrammatic = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeProgrammatic{{endif}}
-{{endif}}
-{{if 'cudaGraphInstantiateResult' in found_types}}
-
-class cudaGraphInstantiateResult(IntEnum):
-    """
-    Graph instantiation results
-    """
-    {{if 'cudaGraphInstantiateSuccess' in found_values}}
-
-    #: Instantiation succeeded
-    cudaGraphInstantiateSuccess = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateSuccess{{endif}}
-    {{if 'cudaGraphInstantiateError' in found_values}}
-
-    #: Instantiation failed for an unexpected reason which is described in
-    #: the return value of the function
-    cudaGraphInstantiateError = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateError{{endif}}
-    {{if 'cudaGraphInstantiateInvalidStructure' in found_values}}
-
-    #: Instantiation failed due to invalid structure, such as cycles
-    cudaGraphInstantiateInvalidStructure = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateInvalidStructure{{endif}}
-    {{if 'cudaGraphInstantiateNodeOperationNotSupported' in found_values}}
-
-    #: Instantiation for device launch failed because the graph contained
-    #: an unsupported operation
-    cudaGraphInstantiateNodeOperationNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateNodeOperationNotSupported{{endif}}
-    {{if 'cudaGraphInstantiateMultipleDevicesNotSupported' in found_values}}
-
-    #: Instantiation for device launch failed due to the nodes belonging to
-    #: different contexts
-    cudaGraphInstantiateMultipleDevicesNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateMultipleDevicesNotSupported{{endif}}
-{{endif}}
-{{if 'cudaLaunchMemSyncDomain' in found_types}}
-
-class cudaLaunchMemSyncDomain(IntEnum):
-    """
-    Memory Synchronization Domain  A kernel can be launched in a
-    specified memory synchronization domain that affects all memory
-    operations issued by that kernel. A memory barrier issued in one
-    domain will only order memory operations in that domain, thus
-    eliminating latency increase from memory barriers ordering
-    unrelated traffic.  By default, kernels are launched in domain 0.
-    Kernel launched with :py:obj:`~.cudaLaunchMemSyncDomainRemote` will
-    have a different domain ID. User may also alter the domain ID with
-    :py:obj:`~.cudaLaunchMemSyncDomainMap` for a specific stream /
-    graph node / kernel launch. See
-    :py:obj:`~.cudaLaunchAttributeMemSyncDomain`,
-    :py:obj:`~.cudaStreamSetAttribute`, :py:obj:`~.cudaLaunchKernelEx`,
-    :py:obj:`~.cudaGraphKernelNodeSetAttribute`.  Memory operations
-    done in kernels launched in different domains are considered
-    system-scope distanced. In other words, a GPU scoped memory
-    synchronization is not sufficient for memory order to be observed
-    by kernels in another memory synchronization domain even if they
-    are on the same GPU.
-    """
-    {{if 'cudaLaunchMemSyncDomainDefault' in found_values}}
-
-    #: Launch kernels in the default domain
-    cudaLaunchMemSyncDomainDefault = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainDefault{{endif}}
-    {{if 'cudaLaunchMemSyncDomainRemote' in found_values}}
-
-    #: Launch kernels in the remote domain
-    cudaLaunchMemSyncDomainRemote = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainRemote{{endif}}
-{{endif}}
-{{if 'cudaLaunchAttributeID' in found_types}}
-
-class cudaLaunchAttributeID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.cudaLaunchAttribute`
-    """
-    {{if 'cudaLaunchAttributeIgnore' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore{{endif}}
-    {{if 'cudaLaunchAttributeAccessPolicyWindow' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
-    cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow{{endif}}
-    {{if 'cudaLaunchAttributeCooperative' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
-    cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative{{endif}}
-    {{if 'cudaLaunchAttributeSynchronizationPolicy' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
-    cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy{{endif}}
-    {{if 'cudaLaunchAttributeClusterDimension' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
-    cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension{{endif}}
-    {{if 'cudaLaunchAttributeClusterSchedulingPolicyPreference' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticStreamSerialization' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event programmatically in a future
-    #: CUDA release. A trigger can also be inserted at the beginning of
-    #: each block's execution if triggerAtBlockStart is set to non-0. The
-    #: dependent launches can choose to wait on the dependency using the
-    #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
-    #: trigger long after the associated kernel has completed. This
-    #: recording type is primarily meant for establishing programmatic
-    #: dependency between device tasks. Note also this type of dependency
-    #: allows, but does not guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent{{endif}}
-    {{if 'cudaLaunchAttributePriority' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
-    cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomainMap' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
-    cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomain' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
-    cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain{{endif}}
-    {{if 'cudaLaunchAttributeLaunchCompletionEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent{{endif}}
-    {{if 'cudaLaunchAttributeDeviceUpdatableKernelNode' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
-    #: this functionality, a node cannot opt out, and any attempt to set
-    #: the deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}}
-    {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
-    #: percentage between 0-100 signals sets the shared memory carveout
-    #: preference in percent of the total shared memory for that kernel
-    #: launch. This attribute takes precedence over
-    #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
-    #: only a hint, and the driver can choose a different configuration if
-    #: required for the launch.
-    cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}}
-{{endif}}
-{{if 'cudaAsyncNotificationType_enum' in found_types}}
-
-class cudaAsyncNotificationType(IntEnum):
-    """
-    Types of async notification that can occur
-    """
-    {{if 'cudaAsyncNotificationTypeOverBudget' in found_values}}
-    cudaAsyncNotificationTypeOverBudget = cyruntime.cudaAsyncNotificationType_enum.cudaAsyncNotificationTypeOverBudget{{endif}}
-{{endif}}
-{{if 'cudaDataType_t' in found_types}}
-
-class cudaDataType(IntEnum):
-    """"""
-    {{if 'CUDA_R_32F' in found_values}}
-    CUDA_R_32F = cyruntime.cudaDataType_t.CUDA_R_32F{{endif}}
-    {{if 'CUDA_R_64F' in found_values}}
-    CUDA_R_64F = cyruntime.cudaDataType_t.CUDA_R_64F{{endif}}
-    {{if 'CUDA_R_16F' in found_values}}
-    CUDA_R_16F = cyruntime.cudaDataType_t.CUDA_R_16F{{endif}}
-    {{if 'CUDA_R_8I' in found_values}}
-    CUDA_R_8I = cyruntime.cudaDataType_t.CUDA_R_8I{{endif}}
-    {{if 'CUDA_C_32F' in found_values}}
-    CUDA_C_32F = cyruntime.cudaDataType_t.CUDA_C_32F{{endif}}
-    {{if 'CUDA_C_64F' in found_values}}
-    CUDA_C_64F = cyruntime.cudaDataType_t.CUDA_C_64F{{endif}}
-    {{if 'CUDA_C_16F' in found_values}}
-    CUDA_C_16F = cyruntime.cudaDataType_t.CUDA_C_16F{{endif}}
-    {{if 'CUDA_C_8I' in found_values}}
-    CUDA_C_8I = cyruntime.cudaDataType_t.CUDA_C_8I{{endif}}
-    {{if 'CUDA_R_8U' in found_values}}
-    CUDA_R_8U = cyruntime.cudaDataType_t.CUDA_R_8U{{endif}}
-    {{if 'CUDA_C_8U' in found_values}}
-    CUDA_C_8U = cyruntime.cudaDataType_t.CUDA_C_8U{{endif}}
-    {{if 'CUDA_R_32I' in found_values}}
-    CUDA_R_32I = cyruntime.cudaDataType_t.CUDA_R_32I{{endif}}
-    {{if 'CUDA_C_32I' in found_values}}
-    CUDA_C_32I = cyruntime.cudaDataType_t.CUDA_C_32I{{endif}}
-    {{if 'CUDA_R_32U' in found_values}}
-    CUDA_R_32U = cyruntime.cudaDataType_t.CUDA_R_32U{{endif}}
-    {{if 'CUDA_C_32U' in found_values}}
-    CUDA_C_32U = cyruntime.cudaDataType_t.CUDA_C_32U{{endif}}
-    {{if 'CUDA_R_16BF' in found_values}}
-    CUDA_R_16BF = cyruntime.cudaDataType_t.CUDA_R_16BF{{endif}}
-    {{if 'CUDA_C_16BF' in found_values}}
-    CUDA_C_16BF = cyruntime.cudaDataType_t.CUDA_C_16BF{{endif}}
-    {{if 'CUDA_R_4I' in found_values}}
-    CUDA_R_4I = cyruntime.cudaDataType_t.CUDA_R_4I{{endif}}
-    {{if 'CUDA_C_4I' in found_values}}
-    CUDA_C_4I = cyruntime.cudaDataType_t.CUDA_C_4I{{endif}}
-    {{if 'CUDA_R_4U' in found_values}}
-    CUDA_R_4U = cyruntime.cudaDataType_t.CUDA_R_4U{{endif}}
-    {{if 'CUDA_C_4U' in found_values}}
-    CUDA_C_4U = cyruntime.cudaDataType_t.CUDA_C_4U{{endif}}
-    {{if 'CUDA_R_16I' in found_values}}
-    CUDA_R_16I = cyruntime.cudaDataType_t.CUDA_R_16I{{endif}}
-    {{if 'CUDA_C_16I' in found_values}}
-    CUDA_C_16I = cyruntime.cudaDataType_t.CUDA_C_16I{{endif}}
-    {{if 'CUDA_R_16U' in found_values}}
-    CUDA_R_16U = cyruntime.cudaDataType_t.CUDA_R_16U{{endif}}
-    {{if 'CUDA_C_16U' in found_values}}
-    CUDA_C_16U = cyruntime.cudaDataType_t.CUDA_C_16U{{endif}}
-    {{if 'CUDA_R_64I' in found_values}}
-    CUDA_R_64I = cyruntime.cudaDataType_t.CUDA_R_64I{{endif}}
-    {{if 'CUDA_C_64I' in found_values}}
-    CUDA_C_64I = cyruntime.cudaDataType_t.CUDA_C_64I{{endif}}
-    {{if 'CUDA_R_64U' in found_values}}
-    CUDA_R_64U = cyruntime.cudaDataType_t.CUDA_R_64U{{endif}}
-    {{if 'CUDA_C_64U' in found_values}}
-    CUDA_C_64U = cyruntime.cudaDataType_t.CUDA_C_64U{{endif}}
-    {{if 'CUDA_R_8F_E4M3' in found_values}}
-    CUDA_R_8F_E4M3 = cyruntime.cudaDataType_t.CUDA_R_8F_E4M3{{endif}}
-    {{if 'CUDA_R_8F_E5M2' in found_values}}
-    CUDA_R_8F_E5M2 = cyruntime.cudaDataType_t.CUDA_R_8F_E5M2{{endif}}
-{{endif}}
-{{if 'libraryPropertyType_t' in found_types}}
-
-class libraryPropertyType(IntEnum):
-    """"""
-    {{if 'MAJOR_VERSION' in found_values}}
-    MAJOR_VERSION = cyruntime.libraryPropertyType_t.MAJOR_VERSION{{endif}}
-    {{if 'MINOR_VERSION' in found_values}}
-    MINOR_VERSION = cyruntime.libraryPropertyType_t.MINOR_VERSION{{endif}}
-    {{if 'PATCH_LEVEL' in found_values}}
-    PATCH_LEVEL = cyruntime.libraryPropertyType_t.PATCH_LEVEL{{endif}}
-{{endif}}
-{{if True}}
-
-class cudaEglFrameType(IntEnum):
-    """
-    CUDA EglFrame type - array or pointer
-    """
-    {{if True}}
-
-    #: Frame type CUDA array
-    cudaEglFrameTypeArray = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypeArray{{endif}}
-    {{if True}}
-
-    #: Frame type CUDA pointer
-    cudaEglFrameTypePitch = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypePitch{{endif}}
-{{endif}}
-{{if True}}
-
-class cudaEglResourceLocationFlags(IntEnum):
-    """
-    Resource location flags- sysmem or vidmem  For CUDA context on
-    iGPU, since video and system memory are equivalent - these flags
-    will not have an effect on the execution.  For CUDA context on
-    dGPU, applications can use the flag
-    :py:obj:`~.cudaEglResourceLocationFlags` to give a hint about the
-    desired location.  :py:obj:`~.cudaEglResourceLocationSysmem` - the
-    frame data is made resident on the system memory to be accessed by
-    CUDA.  :py:obj:`~.cudaEglResourceLocationVidmem` - the frame data
-    is made resident on the dedicated video memory to be accessed by
-    CUDA.  There may be an additional latency due to new allocation and
-    data migration, if the frame is produced on a different memory.
-    """
-    {{if True}}
-
-    #: Resource location sysmem
-    cudaEglResourceLocationSysmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationSysmem{{endif}}
-    {{if True}}
-
-    #: Resource location vidmem
-    cudaEglResourceLocationVidmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationVidmem{{endif}}
-{{endif}}
-{{if True}}
-
-class cudaEglColorFormat(IntEnum):
-    """
-    CUDA EGL Color Format - The different planar and multiplanar
-    formats currently supported for CUDA_EGL interops.
-    """
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
-    #: width, height ratio same as YUV420Planar.
-    cudaEglColorFormatYUV420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V
-    #: height = Y height.
-    cudaEglColorFormatYUV422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces with VU byte ordering, width, height ratio
-    #: same as YUV422Planar.
-    cudaEglColorFormatYUV422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with BGRA byte ordering.
-    cudaEglColorFormatARGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatARGB{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with ABGR byte ordering.
-    cudaEglColorFormatRGBA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRGBA{{endif}}
-    {{if True}}
-
-    #: single luminance channel in one surface.
-    cudaEglColorFormatL = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatL{{endif}}
-    {{if True}}
-
-    #: single color channel in one surface.
-    cudaEglColorFormatR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatR{{endif}}
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width = Y
-    #: width, U/V height = Y height.
-    cudaEglColorFormatYUV444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
-    #: width, height ratio same as YUV444Planar.
-    cudaEglColorFormatYUV444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as UYVY in one channel.
-    cudaEglColorFormatYUYV422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV422{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as YUYV in one channel.
-    cudaEglColorFormatUYVY422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY422{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with RGBA byte ordering.
-    cudaEglColorFormatABGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatABGR{{endif}}
-    {{if True}}
-
-    #: R/G/B/A four channels in one surface with ARGB byte ordering.
-    cudaEglColorFormatBGRA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBGRA{{endif}}
-    {{if True}}
-
-    #: Alpha color format - one channel in one surface.
-    cudaEglColorFormatA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatA{{endif}}
-    {{if True}}
-
-    #: R/G color format - two channels in one surface with GR byte ordering
-    cudaEglColorFormatRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRG{{endif}}
-    {{if True}}
-
-    #: Y, U, V, A four channels in one surface, interleaved as VUYA.
-    cudaEglColorFormatAYUV = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = Y width, U/V height = Y height.
-    cudaEglColorFormatYVU444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = 1/2 Y width, U/V height = Y height.
-    cudaEglColorFormatYVU422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = Y width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY10V10U10_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = Y width, U/V height = Y height.
-    cudaEglColorFormatY12V12U12_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
-    #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY12V12U12_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as YVYU in one
-    #: channel.
-    cudaEglColorFormatVYUY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as YUYV in one
-    #: channel.
-    cudaEglColorFormatUYVY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as UYVY in one
-    #: channel.
-    cudaEglColorFormatYUYV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in one surface, interleaved as VYUY in one
-    #: channel.
-    cudaEglColorFormatYVYU_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V, A four channels in one surface, interleaved
-    #: as AVUY.
-    cudaEglColorFormatYUVA_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V, A four channels in one surface, interleaved
-    #: as VUYA.
-    cudaEglColorFormatAYUV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V
-    #: height = Y height.
-    cudaEglColorFormatYUV444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = Y height.
-    cudaEglColorFormatYUV422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = Y width, U/V height = Y height.
-    cudaEglColorFormatYUV444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-    cudaEglColorFormatYUV422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V
-    #: height = Y height.
-    cudaEglColorFormatYVU444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = Y height.
-    cudaEglColorFormatYVU422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = Y width, U/V height = Y height.
-    cudaEglColorFormatYVU444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-    cudaEglColorFormatYVU422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
-    #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved RGGB
-    #: ordering.
-    cudaEglColorFormatBayerRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRGGB{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved BGGR
-    #: ordering.
-    cudaEglColorFormatBayerBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBGGR{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved GRBG
-    #: ordering.
-    cudaEglColorFormatBayerGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGRBG{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved GBRG
-    #: ordering.
-    cudaEglColorFormatBayerGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGBRG{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    cudaEglColorFormatBayer10RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    cudaEglColorFormatBayer10BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    cudaEglColorFormatBayer10GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    cudaEglColorFormatBayer10GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    cudaEglColorFormatBayer14RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    cudaEglColorFormatBayer14BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    cudaEglColorFormatBayer14GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer14 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-    cudaEglColorFormatBayer14GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GBRG{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved RGGB
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    cudaEglColorFormatBayer20RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20RGGB{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved BGGR
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    cudaEglColorFormatBayer20BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20BGGR{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved GRBG
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    cudaEglColorFormatBayer20GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GRBG{{endif}}
-    {{if True}}
-
-    #: Bayer20 format - one channel in one surface with interleaved GBRG
-    #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-    cudaEglColorFormatBayer20GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GBRG{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width = Y
-    #: width, U/V height = Y height.
-    cudaEglColorFormatYVU444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = Y height.
-    cudaEglColorFormatYVU422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved RGGB ordering and mapped to opaque integer
-    #: datatype.
-    cudaEglColorFormatBayerIspRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspRGGB{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved BGGR ordering and mapped to opaque integer
-    #: datatype.
-    cudaEglColorFormatBayerIspBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspBGGR{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved GRBG ordering and mapped to opaque integer
-    #: datatype.
-    cudaEglColorFormatBayerIspGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGRBG{{endif}}
-    {{if True}}
-
-    #: Nvidia proprietary Bayer ISP format - one channel in one surface
-    #: with interleaved GBRG ordering and mapped to opaque integer
-    #: datatype.
-    cudaEglColorFormatBayerIspGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGBRG{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved BCCR
-    #: ordering.
-    cudaEglColorFormatBayerBCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBCCR{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved RCCB
-    #: ordering.
-    cudaEglColorFormatBayerRCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRCCB{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved CRBC
-    #: ordering.
-    cudaEglColorFormatBayerCRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCRBC{{endif}}
-    {{if True}}
-
-    #: Bayer format - one channel in one surface with interleaved CBRC
-    #: ordering.
-    cudaEglColorFormatBayerCBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCBRC{{endif}}
-    {{if True}}
-
-    #: Bayer10 format - one channel in one surface with interleaved CCCC
-    #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-    cudaEglColorFormatBayer10CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10CCCC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved BCCR
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12BCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BCCR{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved RCCB
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12RCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RCCB{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CRBC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12CRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CRBC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CBRC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12CBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CBRC{{endif}}
-    {{if True}}
-
-    #: Bayer12 format - one channel in one surface with interleaved CCCC
-    #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-    cudaEglColorFormatBayer12CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CCCC{{endif}}
-    {{if True}}
-
-    #: Color format for single Y plane.
-    cudaEglColorFormatY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_2020{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_2020{{endif}}
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_2020{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_2020{{endif}}
-    {{if True}}
-
-    #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_709{{endif}}
-    {{if True}}
-
-    #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
-    #: U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_709{{endif}}
-    {{if True}}
-
-    #: Y, U, V in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYUV420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_709{{endif}}
-    {{if True}}
-
-    #: Y, V, U in three surfaces, each in a separate surface, U/V width =
-    #: 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatYVU420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_709{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY10V10U10_420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_2020{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_2020{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar{{endif}}
-    {{if True}}
-
-    #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
-    #: width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_422SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_709{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y plane.
-    cudaEglColorFormatY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y plane.
-    cudaEglColorFormatY_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y10 plane.
-    cudaEglColorFormatY10_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y10 plane.
-    cudaEglColorFormatY10_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y12 plane.
-    cudaEglColorFormatY12_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Color format for single Y12 plane.
-    cudaEglColorFormatY12_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_709_ER{{endif}}
-    {{if True}}
-
-    #: Y, U, V, A four channels in one surface, interleaved as AVUY.
-    cudaEglColorFormatYUVA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as YVYU in one channel.
-    cudaEglColorFormatYVYU = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU{{endif}}
-    {{if True}}
-
-    #: Y, U, V in one surface, interleaved as VYUY in one channel.
-    cudaEglColorFormatVYUY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY10V10U10_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY12V12U12_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = 1/2 Y width, U/V height = 1/2 Y height.
-    cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    cudaEglColorFormatY12V12U12_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_ER{{endif}}
-    {{if True}}
-
-    #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
-    #: width = Y width, U/V height = Y height.
-    cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER{{endif}}
-{{endif}}
-{{if 'cudaChannelFormatKind' in found_types}}
-
-class cudaChannelFormatKind(IntEnum):
-    """
-    Channel format kind
-    """
-    {{if 'cudaChannelFormatKindSigned' in found_values}}
-
-    #: Signed channel format
-    cudaChannelFormatKindSigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSigned{{endif}}
-    {{if 'cudaChannelFormatKindUnsigned' in found_values}}
-
-    #: Unsigned channel format
-    cudaChannelFormatKindUnsigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsigned{{endif}}
-    {{if 'cudaChannelFormatKindFloat' in found_values}}
-
-    #: Float channel format
-    cudaChannelFormatKindFloat = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindFloat{{endif}}
-    {{if 'cudaChannelFormatKindNone' in found_values}}
-
-    #: No channel format
-    cudaChannelFormatKindNone = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNone{{endif}}
-    {{if 'cudaChannelFormatKindNV12' in found_values}}
-
-    #: Unsigned 8-bit integers, planar 4:2:0 YUV format
-    cudaChannelFormatKindNV12 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNV12{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized8X1' in found_values}}
-
-    #: 1 channel unsigned 8-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized8X2' in found_values}}
-
-    #: 2 channel unsigned 8-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized8X4' in found_values}}
-
-    #: 4 channel unsigned 8-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized16X1' in found_values}}
-
-    #: 1 channel unsigned 16-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized16X2' in found_values}}
-
-    #: 2 channel unsigned 16-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedNormalized16X4' in found_values}}
-
-    #: 4 channel unsigned 16-bit normalized integer
-    cudaChannelFormatKindUnsignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized8X1' in found_values}}
-
-    #: 1 channel signed 8-bit normalized integer
-    cudaChannelFormatKindSignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized8X2' in found_values}}
-
-    #: 2 channel signed 8-bit normalized integer
-    cudaChannelFormatKindSignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized8X4' in found_values}}
-
-    #: 4 channel signed 8-bit normalized integer
-    cudaChannelFormatKindSignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized16X1' in found_values}}
-
-    #: 1 channel signed 16-bit normalized integer
-    cudaChannelFormatKindSignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized16X2' in found_values}}
-
-    #: 2 channel signed 16-bit normalized integer
-    cudaChannelFormatKindSignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2{{endif}}
-    {{if 'cudaChannelFormatKindSignedNormalized16X4' in found_values}}
-
-    #: 4 channel signed 16-bit normalized integer
-    cudaChannelFormatKindSignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed1' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC1 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed1SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC1 compression)
-    #: format with sRGB encoding
-    cudaChannelFormatKindUnsignedBlockCompressed1SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed2' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC2 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed2SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC2 compression)
-    #: format with sRGB encoding
-    cudaChannelFormatKindUnsignedBlockCompressed2SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed3' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC3 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed3 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed3SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC3 compression)
-    #: format with sRGB encoding
-    cudaChannelFormatKindUnsignedBlockCompressed3SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed4' in found_values}}
-
-    #: 1 channel unsigned normalized block-compressed (BC4 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4{{endif}}
-    {{if 'cudaChannelFormatKindSignedBlockCompressed4' in found_values}}
-
-    #: 1 channel signed normalized block-compressed (BC4 compression)
-    #: format
-    cudaChannelFormatKindSignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed5' in found_values}}
-
-    #: 2 channel unsigned normalized block-compressed (BC5 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5{{endif}}
-    {{if 'cudaChannelFormatKindSignedBlockCompressed5' in found_values}}
-
-    #: 2 channel signed normalized block-compressed (BC5 compression)
-    #: format
-    cudaChannelFormatKindSignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed6H' in found_values}}
-
-    #: 3 channel unsigned half-float block-compressed (BC6H compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H{{endif}}
-    {{if 'cudaChannelFormatKindSignedBlockCompressed6H' in found_values}}
-
-    #: 3 channel signed half-float block-compressed (BC6H compression)
-    #: format
-    cudaChannelFormatKindSignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed7' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC7 compression)
-    #: format
-    cudaChannelFormatKindUnsignedBlockCompressed7 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7{{endif}}
-    {{if 'cudaChannelFormatKindUnsignedBlockCompressed7SRGB' in found_values}}
-
-    #: 4 channel unsigned normalized block-compressed (BC7 compression)
-    #: format with sRGB encoding
-    cudaChannelFormatKindUnsignedBlockCompressed7SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB{{endif}}
-{{endif}}
-{{if 'cudaMemoryType' in found_types}}
-
-class cudaMemoryType(IntEnum):
-    """
-    CUDA memory types
-    """
-    {{if 'cudaMemoryTypeUnregistered' in found_values}}
-
-    #: Unregistered memory
-    cudaMemoryTypeUnregistered = cyruntime.cudaMemoryType.cudaMemoryTypeUnregistered{{endif}}
-    {{if 'cudaMemoryTypeHost' in found_values}}
-
-    #: Host memory
-    cudaMemoryTypeHost = cyruntime.cudaMemoryType.cudaMemoryTypeHost{{endif}}
-    {{if 'cudaMemoryTypeDevice' in found_values}}
-
-    #: Device memory
-    cudaMemoryTypeDevice = cyruntime.cudaMemoryType.cudaMemoryTypeDevice{{endif}}
-    {{if 'cudaMemoryTypeManaged' in found_values}}
-
-    #: Managed memory
-    cudaMemoryTypeManaged = cyruntime.cudaMemoryType.cudaMemoryTypeManaged{{endif}}
-{{endif}}
-{{if 'cudaMemcpyKind' in found_types}}
-
-class cudaMemcpyKind(IntEnum):
-    """
-    CUDA memory copy types
-    """
-    {{if 'cudaMemcpyHostToHost' in found_values}}
-
-    #: Host -> Host
-    cudaMemcpyHostToHost = cyruntime.cudaMemcpyKind.cudaMemcpyHostToHost{{endif}}
-    {{if 'cudaMemcpyHostToDevice' in found_values}}
-
-    #: Host -> Device
-    cudaMemcpyHostToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyHostToDevice{{endif}}
-    {{if 'cudaMemcpyDeviceToHost' in found_values}}
-
-    #: Device -> Host
-    cudaMemcpyDeviceToHost = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToHost{{endif}}
-    {{if 'cudaMemcpyDeviceToDevice' in found_values}}
-
-    #: Device -> Device
-    cudaMemcpyDeviceToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToDevice{{endif}}
-    {{if 'cudaMemcpyDefault' in found_values}}
-
-    #: Direction of the transfer is inferred from the pointer values.
-    #: Requires unified virtual addressing
-    cudaMemcpyDefault = cyruntime.cudaMemcpyKind.cudaMemcpyDefault{{endif}}
-{{endif}}
-{{if 'cudaAccessProperty' in found_types}}
-
-class cudaAccessProperty(IntEnum):
-    """
-    Specifies performance hint with :py:obj:`~.cudaAccessPolicyWindow`
-    for hitProp and missProp members.
-    """
-    {{if 'cudaAccessPropertyNormal' in found_values}}
-
-    #: Normal cache persistence.
-    cudaAccessPropertyNormal = cyruntime.cudaAccessProperty.cudaAccessPropertyNormal{{endif}}
-    {{if 'cudaAccessPropertyStreaming' in found_values}}
-
-    #: Streaming access is less likely to persit from cache.
-    cudaAccessPropertyStreaming = cyruntime.cudaAccessProperty.cudaAccessPropertyStreaming{{endif}}
-    {{if 'cudaAccessPropertyPersisting' in found_values}}
-
-    #: Persisting access is more likely to persist in cache.
-    cudaAccessPropertyPersisting = cyruntime.cudaAccessProperty.cudaAccessPropertyPersisting{{endif}}
-{{endif}}
-{{if 'cudaStreamCaptureStatus' in found_types}}
-
-class cudaStreamCaptureStatus(IntEnum):
-    """
-    Possible stream capture statuses returned by
-    :py:obj:`~.cudaStreamIsCapturing`
-    """
-    {{if 'cudaStreamCaptureStatusNone' in found_values}}
-
-    #: Stream is not capturing
-    cudaStreamCaptureStatusNone = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusNone{{endif}}
-    {{if 'cudaStreamCaptureStatusActive' in found_values}}
-
-    #: Stream is actively capturing
-    cudaStreamCaptureStatusActive = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusActive{{endif}}
-    {{if 'cudaStreamCaptureStatusInvalidated' in found_values}}
-
-    #: Stream is part of a capture sequence that has been invalidated, but
-    #: not terminated
-    cudaStreamCaptureStatusInvalidated = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusInvalidated{{endif}}
-{{endif}}
-{{if 'cudaStreamCaptureMode' in found_types}}
-
-class cudaStreamCaptureMode(IntEnum):
-    """
-    Possible modes for stream capture thread interactions. For more
-    details see :py:obj:`~.cudaStreamBeginCapture` and
-    :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
-    """
-    {{if 'cudaStreamCaptureModeGlobal' in found_values}}
-    cudaStreamCaptureModeGlobal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal{{endif}}
-    {{if 'cudaStreamCaptureModeThreadLocal' in found_values}}
-    cudaStreamCaptureModeThreadLocal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal{{endif}}
-    {{if 'cudaStreamCaptureModeRelaxed' in found_values}}
-    cudaStreamCaptureModeRelaxed = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed{{endif}}
-{{endif}}
-{{if 'cudaSynchronizationPolicy' in found_types}}
-
-class cudaSynchronizationPolicy(IntEnum):
-    """
-
-    """
-    {{if 'cudaSyncPolicyAuto' in found_values}}
-    cudaSyncPolicyAuto = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyAuto{{endif}}
-    {{if 'cudaSyncPolicySpin' in found_values}}
-    cudaSyncPolicySpin = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicySpin{{endif}}
-    {{if 'cudaSyncPolicyYield' in found_values}}
-    cudaSyncPolicyYield = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyYield{{endif}}
-    {{if 'cudaSyncPolicyBlockingSync' in found_values}}
-    cudaSyncPolicyBlockingSync = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyBlockingSync{{endif}}
-{{endif}}
-{{if 'cudaClusterSchedulingPolicy' in found_types}}
-
-class cudaClusterSchedulingPolicy(IntEnum):
-    """
-    Cluster scheduling policies. These may be passed to
-    :py:obj:`~.cudaFuncSetAttribute`
-    """
-    {{if 'cudaClusterSchedulingPolicyDefault' in found_values}}
-
-    #: the default policy
-    cudaClusterSchedulingPolicyDefault = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyDefault{{endif}}
-    {{if 'cudaClusterSchedulingPolicySpread' in found_values}}
-
-    #: spread the blocks within a cluster to the SMs
-    cudaClusterSchedulingPolicySpread = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicySpread{{endif}}
-    {{if 'cudaClusterSchedulingPolicyLoadBalancing' in found_values}}
-
-    #: allow the hardware to load-balance the blocks in a cluster to the
-    #: SMs
-    cudaClusterSchedulingPolicyLoadBalancing = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyLoadBalancing{{endif}}
-{{endif}}
-{{if 'cudaStreamUpdateCaptureDependenciesFlags' in found_types}}
-
-class cudaStreamUpdateCaptureDependenciesFlags(IntEnum):
-    """
-    Flags for :py:obj:`~.cudaStreamUpdateCaptureDependencies`
-    """
-    {{if 'cudaStreamAddCaptureDependencies' in found_values}}
-
-    #: Add new nodes to the dependency set
-    cudaStreamAddCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamAddCaptureDependencies{{endif}}
-    {{if 'cudaStreamSetCaptureDependencies' in found_values}}
-
-    #: Replace the dependency set with the new nodes
-    cudaStreamSetCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamSetCaptureDependencies{{endif}}
-{{endif}}
-{{if 'cudaUserObjectFlags' in found_types}}
-
-class cudaUserObjectFlags(IntEnum):
-    """
-    Flags for user objects for graphs
-    """
-    {{if 'cudaUserObjectNoDestructorSync' in found_values}}
-
-    #: Indicates the destructor execution is not synchronized by any CUDA
-    #: handle.
-    cudaUserObjectNoDestructorSync = cyruntime.cudaUserObjectFlags.cudaUserObjectNoDestructorSync{{endif}}
-{{endif}}
-{{if 'cudaUserObjectRetainFlags' in found_types}}
-
-class cudaUserObjectRetainFlags(IntEnum):
-    """
-    Flags for retaining user object references for graphs
-    """
-    {{if 'cudaGraphUserObjectMove' in found_values}}
-
-    #: Transfer references from the caller rather than creating new
-    #: references.
-    cudaGraphUserObjectMove = cyruntime.cudaUserObjectRetainFlags.cudaGraphUserObjectMove{{endif}}
-{{endif}}
-{{if 'cudaGraphicsRegisterFlags' in found_types}}
-
-class cudaGraphicsRegisterFlags(IntEnum):
-    """
-    CUDA graphics interop register flags
-    """
-    {{if 'cudaGraphicsRegisterFlagsNone' in found_values}}
-
-    #: Default
-    cudaGraphicsRegisterFlagsNone = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone{{endif}}
-    {{if 'cudaGraphicsRegisterFlagsReadOnly' in found_values}}
-
-    #: CUDA will not write to this resource
-    cudaGraphicsRegisterFlagsReadOnly = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsReadOnly{{endif}}
-    {{if 'cudaGraphicsRegisterFlagsWriteDiscard' in found_values}}
-
-    #: CUDA will only write to and will not read from this resource
-    cudaGraphicsRegisterFlagsWriteDiscard = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard{{endif}}
-    {{if 'cudaGraphicsRegisterFlagsSurfaceLoadStore' in found_values}}
-
-    #: CUDA will bind this resource to a surface reference
-    cudaGraphicsRegisterFlagsSurfaceLoadStore = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsSurfaceLoadStore{{endif}}
-    {{if 'cudaGraphicsRegisterFlagsTextureGather' in found_values}}
-
-    #: CUDA will perform texture gather operations on this resource
-    cudaGraphicsRegisterFlagsTextureGather = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsTextureGather{{endif}}
-{{endif}}
-{{if 'cudaGraphicsMapFlags' in found_types}}
-
-class cudaGraphicsMapFlags(IntEnum):
-    """
-    CUDA graphics interop map flags
-    """
-    {{if 'cudaGraphicsMapFlagsNone' in found_values}}
-
-    #: Default; Assume resource can be read/written
-    cudaGraphicsMapFlagsNone = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsNone{{endif}}
-    {{if 'cudaGraphicsMapFlagsReadOnly' in found_values}}
-
-    #: CUDA will not write to this resource
-    cudaGraphicsMapFlagsReadOnly = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsReadOnly{{endif}}
-    {{if 'cudaGraphicsMapFlagsWriteDiscard' in found_values}}
-
-    #: CUDA will only write to and will not read from this resource
-    cudaGraphicsMapFlagsWriteDiscard = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsWriteDiscard{{endif}}
-{{endif}}
-{{if 'cudaGraphicsCubeFace' in found_types}}
-
-class cudaGraphicsCubeFace(IntEnum):
-    """
-    CUDA graphics interop array indices for cube maps
-    """
-    {{if 'cudaGraphicsCubeFacePositiveX' in found_values}}
-
-    #: Positive X face of cubemap
-    cudaGraphicsCubeFacePositiveX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveX{{endif}}
-    {{if 'cudaGraphicsCubeFaceNegativeX' in found_values}}
-
-    #: Negative X face of cubemap
-    cudaGraphicsCubeFaceNegativeX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeX{{endif}}
-    {{if 'cudaGraphicsCubeFacePositiveY' in found_values}}
-
-    #: Positive Y face of cubemap
-    cudaGraphicsCubeFacePositiveY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveY{{endif}}
-    {{if 'cudaGraphicsCubeFaceNegativeY' in found_values}}
-
-    #: Negative Y face of cubemap
-    cudaGraphicsCubeFaceNegativeY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeY{{endif}}
-    {{if 'cudaGraphicsCubeFacePositiveZ' in found_values}}
-
-    #: Positive Z face of cubemap
-    cudaGraphicsCubeFacePositiveZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveZ{{endif}}
-    {{if 'cudaGraphicsCubeFaceNegativeZ' in found_values}}
-
-    #: Negative Z face of cubemap
-    cudaGraphicsCubeFaceNegativeZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeZ{{endif}}
-{{endif}}
-{{if 'cudaResourceType' in found_types}}
-
-class cudaResourceType(IntEnum):
-    """
-    CUDA resource types
-    """
-    {{if 'cudaResourceTypeArray' in found_values}}
-
-    #: Array resource
-    cudaResourceTypeArray = cyruntime.cudaResourceType.cudaResourceTypeArray{{endif}}
-    {{if 'cudaResourceTypeMipmappedArray' in found_values}}
-
-    #: Mipmapped array resource
-    cudaResourceTypeMipmappedArray = cyruntime.cudaResourceType.cudaResourceTypeMipmappedArray{{endif}}
-    {{if 'cudaResourceTypeLinear' in found_values}}
-
-    #: Linear resource
-    cudaResourceTypeLinear = cyruntime.cudaResourceType.cudaResourceTypeLinear{{endif}}
-    {{if 'cudaResourceTypePitch2D' in found_values}}
-
-    #: Pitch 2D resource
-    cudaResourceTypePitch2D = cyruntime.cudaResourceType.cudaResourceTypePitch2D{{endif}}
-{{endif}}
-{{if 'cudaResourceViewFormat' in found_types}}
-
-class cudaResourceViewFormat(IntEnum):
-    """
-    CUDA texture resource view formats
-    """
-    {{if 'cudaResViewFormatNone' in found_values}}
-
-    #: No resource view format (use underlying resource format)
-    cudaResViewFormatNone = cyruntime.cudaResourceViewFormat.cudaResViewFormatNone{{endif}}
-    {{if 'cudaResViewFormatUnsignedChar1' in found_values}}
-
-    #: 1 channel unsigned 8-bit integers
-    cudaResViewFormatUnsignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar1{{endif}}
-    {{if 'cudaResViewFormatUnsignedChar2' in found_values}}
-
-    #: 2 channel unsigned 8-bit integers
-    cudaResViewFormatUnsignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar2{{endif}}
-    {{if 'cudaResViewFormatUnsignedChar4' in found_values}}
-
-    #: 4 channel unsigned 8-bit integers
-    cudaResViewFormatUnsignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar4{{endif}}
-    {{if 'cudaResViewFormatSignedChar1' in found_values}}
-
-    #: 1 channel signed 8-bit integers
-    cudaResViewFormatSignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar1{{endif}}
-    {{if 'cudaResViewFormatSignedChar2' in found_values}}
-
-    #: 2 channel signed 8-bit integers
-    cudaResViewFormatSignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar2{{endif}}
-    {{if 'cudaResViewFormatSignedChar4' in found_values}}
-
-    #: 4 channel signed 8-bit integers
-    cudaResViewFormatSignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar4{{endif}}
-    {{if 'cudaResViewFormatUnsignedShort1' in found_values}}
-
-    #: 1 channel unsigned 16-bit integers
-    cudaResViewFormatUnsignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort1{{endif}}
-    {{if 'cudaResViewFormatUnsignedShort2' in found_values}}
-
-    #: 2 channel unsigned 16-bit integers
-    cudaResViewFormatUnsignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort2{{endif}}
-    {{if 'cudaResViewFormatUnsignedShort4' in found_values}}
-
-    #: 4 channel unsigned 16-bit integers
-    cudaResViewFormatUnsignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort4{{endif}}
-    {{if 'cudaResViewFormatSignedShort1' in found_values}}
-
-    #: 1 channel signed 16-bit integers
-    cudaResViewFormatSignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort1{{endif}}
-    {{if 'cudaResViewFormatSignedShort2' in found_values}}
-
-    #: 2 channel signed 16-bit integers
-    cudaResViewFormatSignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort2{{endif}}
-    {{if 'cudaResViewFormatSignedShort4' in found_values}}
-
-    #: 4 channel signed 16-bit integers
-    cudaResViewFormatSignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort4{{endif}}
-    {{if 'cudaResViewFormatUnsignedInt1' in found_values}}
-
-    #: 1 channel unsigned 32-bit integers
-    cudaResViewFormatUnsignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt1{{endif}}
-    {{if 'cudaResViewFormatUnsignedInt2' in found_values}}
-
-    #: 2 channel unsigned 32-bit integers
-    cudaResViewFormatUnsignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt2{{endif}}
-    {{if 'cudaResViewFormatUnsignedInt4' in found_values}}
-
-    #: 4 channel unsigned 32-bit integers
-    cudaResViewFormatUnsignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt4{{endif}}
-    {{if 'cudaResViewFormatSignedInt1' in found_values}}
-
-    #: 1 channel signed 32-bit integers
-    cudaResViewFormatSignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt1{{endif}}
-    {{if 'cudaResViewFormatSignedInt2' in found_values}}
-
-    #: 2 channel signed 32-bit integers
-    cudaResViewFormatSignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt2{{endif}}
-    {{if 'cudaResViewFormatSignedInt4' in found_values}}
-
-    #: 4 channel signed 32-bit integers
-    cudaResViewFormatSignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt4{{endif}}
-    {{if 'cudaResViewFormatHalf1' in found_values}}
-
-    #: 1 channel 16-bit floating point
-    cudaResViewFormatHalf1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf1{{endif}}
-    {{if 'cudaResViewFormatHalf2' in found_values}}
-
-    #: 2 channel 16-bit floating point
-    cudaResViewFormatHalf2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf2{{endif}}
-    {{if 'cudaResViewFormatHalf4' in found_values}}
-
-    #: 4 channel 16-bit floating point
-    cudaResViewFormatHalf4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf4{{endif}}
-    {{if 'cudaResViewFormatFloat1' in found_values}}
-
-    #: 1 channel 32-bit floating point
-    cudaResViewFormatFloat1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat1{{endif}}
-    {{if 'cudaResViewFormatFloat2' in found_values}}
-
-    #: 2 channel 32-bit floating point
-    cudaResViewFormatFloat2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat2{{endif}}
-    {{if 'cudaResViewFormatFloat4' in found_values}}
-
-    #: 4 channel 32-bit floating point
-    cudaResViewFormatFloat4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat4{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed1' in found_values}}
-
-    #: Block compressed 1
-    cudaResViewFormatUnsignedBlockCompressed1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed1{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed2' in found_values}}
-
-    #: Block compressed 2
-    cudaResViewFormatUnsignedBlockCompressed2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed2{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed3' in found_values}}
-
-    #: Block compressed 3
-    cudaResViewFormatUnsignedBlockCompressed3 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed3{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed4' in found_values}}
-
-    #: Block compressed 4 unsigned
-    cudaResViewFormatUnsignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed4{{endif}}
-    {{if 'cudaResViewFormatSignedBlockCompressed4' in found_values}}
-
-    #: Block compressed 4 signed
-    cudaResViewFormatSignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed4{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed5' in found_values}}
-
-    #: Block compressed 5 unsigned
-    cudaResViewFormatUnsignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed5{{endif}}
-    {{if 'cudaResViewFormatSignedBlockCompressed5' in found_values}}
-
-    #: Block compressed 5 signed
-    cudaResViewFormatSignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed5{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed6H' in found_values}}
-
-    #: Block compressed 6 unsigned half-float
-    cudaResViewFormatUnsignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed6H{{endif}}
-    {{if 'cudaResViewFormatSignedBlockCompressed6H' in found_values}}
-
-    #: Block compressed 6 signed half-float
-    cudaResViewFormatSignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed6H{{endif}}
-    {{if 'cudaResViewFormatUnsignedBlockCompressed7' in found_values}}
-
-    #: Block compressed 7
-    cudaResViewFormatUnsignedBlockCompressed7 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed7{{endif}}
-{{endif}}
-{{if 'cudaFuncAttribute' in found_types}}
-
-class cudaFuncAttribute(IntEnum):
-    """
-    CUDA function attributes that can be set using
-    :py:obj:`~.cudaFuncSetAttribute`
-    """
-    {{if 'cudaFuncAttributeMaxDynamicSharedMemorySize' in found_values}}
-
-    #: Maximum dynamic shared memory size
-    cudaFuncAttributeMaxDynamicSharedMemorySize = cyruntime.cudaFuncAttribute.cudaFuncAttributeMaxDynamicSharedMemorySize{{endif}}
-    {{if 'cudaFuncAttributePreferredSharedMemoryCarveout' in found_values}}
-
-    #: Preferred shared memory-L1 cache split
-    cudaFuncAttributePreferredSharedMemoryCarveout = cyruntime.cudaFuncAttribute.cudaFuncAttributePreferredSharedMemoryCarveout{{endif}}
-    {{if 'cudaFuncAttributeClusterDimMustBeSet' in found_values}}
-
-    #: Indicator to enforce valid cluster dimension specification on kernel
-    #: launch
-    cudaFuncAttributeClusterDimMustBeSet = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterDimMustBeSet{{endif}}
-    {{if 'cudaFuncAttributeRequiredClusterWidth' in found_values}}
-
-    #: Required cluster width
-    cudaFuncAttributeRequiredClusterWidth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterWidth{{endif}}
-    {{if 'cudaFuncAttributeRequiredClusterHeight' in found_values}}
-
-    #: Required cluster height
-    cudaFuncAttributeRequiredClusterHeight = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterHeight{{endif}}
-    {{if 'cudaFuncAttributeRequiredClusterDepth' in found_values}}
-
-    #: Required cluster depth
-    cudaFuncAttributeRequiredClusterDepth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterDepth{{endif}}
-    {{if 'cudaFuncAttributeNonPortableClusterSizeAllowed' in found_values}}
-
-    #: Whether non-portable cluster scheduling policy is supported
-    cudaFuncAttributeNonPortableClusterSizeAllowed = cyruntime.cudaFuncAttribute.cudaFuncAttributeNonPortableClusterSizeAllowed{{endif}}
-    {{if 'cudaFuncAttributeClusterSchedulingPolicyPreference' in found_values}}
-
-    #: Required cluster scheduling policy preference
-    cudaFuncAttributeClusterSchedulingPolicyPreference = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterSchedulingPolicyPreference{{endif}}
-    {{if 'cudaFuncAttributeMax' in found_values}}
-    cudaFuncAttributeMax = cyruntime.cudaFuncAttribute.cudaFuncAttributeMax{{endif}}
-{{endif}}
-{{if 'cudaFuncCache' in found_types}}
-
-class cudaFuncCache(IntEnum):
-    """
-    CUDA function cache configurations
-    """
-    {{if 'cudaFuncCachePreferNone' in found_values}}
-
-    #: Default function cache configuration, no preference
-    cudaFuncCachePreferNone = cyruntime.cudaFuncCache.cudaFuncCachePreferNone{{endif}}
-    {{if 'cudaFuncCachePreferShared' in found_values}}
-
-    #: Prefer larger shared memory and smaller L1 cache
-    cudaFuncCachePreferShared = cyruntime.cudaFuncCache.cudaFuncCachePreferShared{{endif}}
-    {{if 'cudaFuncCachePreferL1' in found_values}}
-
-    #: Prefer larger L1 cache and smaller shared memory
-    cudaFuncCachePreferL1 = cyruntime.cudaFuncCache.cudaFuncCachePreferL1{{endif}}
-    {{if 'cudaFuncCachePreferEqual' in found_values}}
-
-    #: Prefer equal size L1 cache and shared memory
-    cudaFuncCachePreferEqual = cyruntime.cudaFuncCache.cudaFuncCachePreferEqual{{endif}}
-{{endif}}
-{{if 'cudaSharedMemConfig' in found_types}}
-
-class cudaSharedMemConfig(IntEnum):
-    """
-    CUDA shared memory configuration [Deprecated]
-    """
-    {{if 'cudaSharedMemBankSizeDefault' in found_values}}
-    cudaSharedMemBankSizeDefault = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeDefault{{endif}}
-    {{if 'cudaSharedMemBankSizeFourByte' in found_values}}
-    cudaSharedMemBankSizeFourByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeFourByte{{endif}}
-    {{if 'cudaSharedMemBankSizeEightByte' in found_values}}
-    cudaSharedMemBankSizeEightByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeEightByte{{endif}}
-{{endif}}
-{{if 'cudaSharedCarveout' in found_types}}
-
-class cudaSharedCarveout(IntEnum):
-    """
-    Shared memory carveout configurations. These may be passed to
-    cudaFuncSetAttribute
-    """
-    {{if 'cudaSharedmemCarveoutDefault' in found_values}}
-
-    #: No preference for shared memory or L1 (default)
-    cudaSharedmemCarveoutDefault = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutDefault{{endif}}
-    {{if 'cudaSharedmemCarveoutMaxL1' in found_values}}
-
-    #: Prefer maximum available L1 cache, minimum shared memory
-    cudaSharedmemCarveoutMaxL1 = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxL1{{endif}}
-    {{if 'cudaSharedmemCarveoutMaxShared' in found_values}}
-
-    #: Prefer maximum available shared memory, minimum L1 cache
-    cudaSharedmemCarveoutMaxShared = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxShared{{endif}}
-{{endif}}
-{{if 'cudaComputeMode' in found_types}}
-
-class cudaComputeMode(IntEnum):
-    """
-    CUDA device compute modes
-    """
-    {{if 'cudaComputeModeDefault' in found_values}}
-
-    #: Default compute mode (Multiple threads can use
-    #: :py:obj:`~.cudaSetDevice()` with this device)
-    cudaComputeModeDefault = cyruntime.cudaComputeMode.cudaComputeModeDefault{{endif}}
-    {{if 'cudaComputeModeExclusive' in found_values}}
-
-    #: Compute-exclusive-thread mode (Only one thread in one process will
-    #: be able to use :py:obj:`~.cudaSetDevice()` with this device)
-    cudaComputeModeExclusive = cyruntime.cudaComputeMode.cudaComputeModeExclusive{{endif}}
-    {{if 'cudaComputeModeProhibited' in found_values}}
-
-    #: Compute-prohibited mode (No threads can use
-    #: :py:obj:`~.cudaSetDevice()` with this device)
-    cudaComputeModeProhibited = cyruntime.cudaComputeMode.cudaComputeModeProhibited{{endif}}
-    {{if 'cudaComputeModeExclusiveProcess' in found_values}}
-
-    #: Compute-exclusive-process mode (Many threads in one process will be
-    #: able to use :py:obj:`~.cudaSetDevice()` with this device)
-    cudaComputeModeExclusiveProcess = cyruntime.cudaComputeMode.cudaComputeModeExclusiveProcess{{endif}}
-{{endif}}
-{{if 'cudaLimit' in found_types}}
-
-class cudaLimit(IntEnum):
-    """
-    CUDA Limits
-    """
-    {{if 'cudaLimitStackSize' in found_values}}
-
-    #: GPU thread stack size
-    cudaLimitStackSize = cyruntime.cudaLimit.cudaLimitStackSize{{endif}}
-    {{if 'cudaLimitPrintfFifoSize' in found_values}}
-
-    #: GPU printf FIFO size
-    cudaLimitPrintfFifoSize = cyruntime.cudaLimit.cudaLimitPrintfFifoSize{{endif}}
-    {{if 'cudaLimitMallocHeapSize' in found_values}}
-
-    #: GPU malloc heap size
-    cudaLimitMallocHeapSize = cyruntime.cudaLimit.cudaLimitMallocHeapSize{{endif}}
-    {{if 'cudaLimitDevRuntimeSyncDepth' in found_values}}
-
-    #: GPU device runtime synchronize depth
-    cudaLimitDevRuntimeSyncDepth = cyruntime.cudaLimit.cudaLimitDevRuntimeSyncDepth{{endif}}
-    {{if 'cudaLimitDevRuntimePendingLaunchCount' in found_values}}
-
-    #: GPU device runtime pending launch count
-    cudaLimitDevRuntimePendingLaunchCount = cyruntime.cudaLimit.cudaLimitDevRuntimePendingLaunchCount{{endif}}
-    {{if 'cudaLimitMaxL2FetchGranularity' in found_values}}
-
-    #: A value between 0 and 128 that indicates the maximum fetch
-    #: granularity of L2 (in Bytes). This is a hint
-    cudaLimitMaxL2FetchGranularity = cyruntime.cudaLimit.cudaLimitMaxL2FetchGranularity{{endif}}
-    {{if 'cudaLimitPersistingL2CacheSize' in found_values}}
-
-    #: A size in bytes for L2 persisting lines cache size
-    cudaLimitPersistingL2CacheSize = cyruntime.cudaLimit.cudaLimitPersistingL2CacheSize{{endif}}
-{{endif}}
-{{if 'cudaMemoryAdvise' in found_types}}
-
-class cudaMemoryAdvise(IntEnum):
-    """
-    CUDA Memory Advise values
-    """
-    {{if 'cudaMemAdviseSetReadMostly' in found_values}}
-
-    #: Data will mostly be read and only occassionally be written to
-    cudaMemAdviseSetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetReadMostly{{endif}}
-    {{if 'cudaMemAdviseUnsetReadMostly' in found_values}}
-
-    #: Undo the effect of :py:obj:`~.cudaMemAdviseSetReadMostly`
-    cudaMemAdviseUnsetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetReadMostly{{endif}}
-    {{if 'cudaMemAdviseSetPreferredLocation' in found_values}}
-
-    #: Set the preferred location for the data as the specified device
-    cudaMemAdviseSetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetPreferredLocation{{endif}}
-    {{if 'cudaMemAdviseUnsetPreferredLocation' in found_values}}
-
-    #: Clear the preferred location for the data
-    cudaMemAdviseUnsetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetPreferredLocation{{endif}}
-    {{if 'cudaMemAdviseSetAccessedBy' in found_values}}
-
-    #: Data will be accessed by the specified device, so prevent page
-    #: faults as much as possible
-    cudaMemAdviseSetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy{{endif}}
-    {{if 'cudaMemAdviseUnsetAccessedBy' in found_values}}
-
-    #: Let the Unified Memory subsystem decide on the page faulting policy
-    #: for the specified device
-    cudaMemAdviseUnsetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetAccessedBy{{endif}}
-{{endif}}
-{{if 'cudaMemRangeAttribute' in found_types}}
-
-class cudaMemRangeAttribute(IntEnum):
-    """
-    CUDA range attributes
-    """
-    {{if 'cudaMemRangeAttributeReadMostly' in found_values}}
-
-    #: Whether the range will mostly be read and only occassionally be
-    #: written to
-    cudaMemRangeAttributeReadMostly = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeReadMostly{{endif}}
-    {{if 'cudaMemRangeAttributePreferredLocation' in found_values}}
-
-    #: The preferred location of the range
-    cudaMemRangeAttributePreferredLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocation{{endif}}
-    {{if 'cudaMemRangeAttributeAccessedBy' in found_values}}
-
-    #: Memory range has :py:obj:`~.cudaMemAdviseSetAccessedBy` set for
-    #: specified device
-    cudaMemRangeAttributeAccessedBy = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeAccessedBy{{endif}}
-    {{if 'cudaMemRangeAttributeLastPrefetchLocation' in found_values}}
-
-    #: The last location to which the range was prefetched
-    cudaMemRangeAttributeLastPrefetchLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocation{{endif}}
-    {{if 'cudaMemRangeAttributePreferredLocationType' in found_values}}
-
-    #: The preferred location type of the range
-    cudaMemRangeAttributePreferredLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationType{{endif}}
-    {{if 'cudaMemRangeAttributePreferredLocationId' in found_values}}
-
-    #: The preferred location id of the range
-    cudaMemRangeAttributePreferredLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationId{{endif}}
-    {{if 'cudaMemRangeAttributeLastPrefetchLocationType' in found_values}}
-
-    #: The last location type to which the range was prefetched
-    cudaMemRangeAttributeLastPrefetchLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationType{{endif}}
-    {{if 'cudaMemRangeAttributeLastPrefetchLocationId' in found_values}}
-
-    #: The last location id to which the range was prefetched
-    cudaMemRangeAttributeLastPrefetchLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationId{{endif}}
-{{endif}}
-{{if 'cudaFlushGPUDirectRDMAWritesOptions' in found_types}}
-
-class cudaFlushGPUDirectRDMAWritesOptions(IntEnum):
-    """
-    CUDA GPUDirect RDMA flush writes APIs supported on the device
-    """
-    {{if 'cudaFlushGPUDirectRDMAWritesOptionHost' in found_values}}
-
-    #: :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()` and its CUDA Driver
-    #: API counterpart are supported on the device.
-    cudaFlushGPUDirectRDMAWritesOptionHost = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionHost{{endif}}
-    {{if 'cudaFlushGPUDirectRDMAWritesOptionMemOps' in found_values}}
-
-    #: The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the
-    #: :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported
-    #: on the CUDA device.
-    cudaFlushGPUDirectRDMAWritesOptionMemOps = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionMemOps{{endif}}
-{{endif}}
-{{if 'cudaGPUDirectRDMAWritesOrdering' in found_types}}
-
-class cudaGPUDirectRDMAWritesOrdering(IntEnum):
-    """
-    CUDA GPUDirect RDMA flush writes ordering features of the device
-    """
-    {{if 'cudaGPUDirectRDMAWritesOrderingNone' in found_values}}
-
-    #: The device does not natively support ordering of GPUDirect RDMA
-    #: writes. :py:obj:`~.cudaFlushGPUDirectRDMAWrites()` can be leveraged
-    #: if supported.
-    cudaGPUDirectRDMAWritesOrderingNone = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingNone{{endif}}
-    {{if 'cudaGPUDirectRDMAWritesOrderingOwner' in found_values}}
-
-    #: Natively, the device can consistently consume GPUDirect RDMA writes,
-    #: although other CUDA devices may not.
-    cudaGPUDirectRDMAWritesOrderingOwner = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingOwner{{endif}}
-    {{if 'cudaGPUDirectRDMAWritesOrderingAllDevices' in found_values}}
-
-    #: Any CUDA device in the system can consistently consume GPUDirect
-    #: RDMA writes to this device.
-    cudaGPUDirectRDMAWritesOrderingAllDevices = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingAllDevices{{endif}}
-{{endif}}
-{{if 'cudaFlushGPUDirectRDMAWritesScope' in found_types}}
-
-class cudaFlushGPUDirectRDMAWritesScope(IntEnum):
-    """
-    CUDA GPUDirect RDMA flush writes scopes
-    """
-    {{if 'cudaFlushGPUDirectRDMAWritesToOwner' in found_values}}
-
-    #: Blocks until remote writes are visible to the CUDA device context
-    #: owning the data.
-    cudaFlushGPUDirectRDMAWritesToOwner = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToOwner{{endif}}
-    {{if 'cudaFlushGPUDirectRDMAWritesToAllDevices' in found_values}}
-
-    #: Blocks until remote writes are visible to all CUDA device contexts.
-    cudaFlushGPUDirectRDMAWritesToAllDevices = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToAllDevices{{endif}}
-{{endif}}
-{{if 'cudaFlushGPUDirectRDMAWritesTarget' in found_types}}
-
-class cudaFlushGPUDirectRDMAWritesTarget(IntEnum):
-    """
-    CUDA GPUDirect RDMA flush writes targets
-    """
-    {{if 'cudaFlushGPUDirectRDMAWritesTargetCurrentDevice' in found_values}}
-
-    #: Sets the target for :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()`
-    #: to the currently active CUDA device context.
-    cudaFlushGPUDirectRDMAWritesTargetCurrentDevice = cyruntime.cudaFlushGPUDirectRDMAWritesTarget.cudaFlushGPUDirectRDMAWritesTargetCurrentDevice{{endif}}
-{{endif}}
-{{if 'cudaDeviceAttr' in found_types}}
-
-class cudaDeviceAttr(IntEnum):
-    """
-    CUDA device attributes
-    """
-    {{if 'cudaDevAttrMaxThreadsPerBlock' in found_values}}
-
-    #: Maximum number of threads per block
-    cudaDevAttrMaxThreadsPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock{{endif}}
-    {{if 'cudaDevAttrMaxBlockDimX' in found_values}}
-
-    #: Maximum block dimension X
-    cudaDevAttrMaxBlockDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimX{{endif}}
-    {{if 'cudaDevAttrMaxBlockDimY' in found_values}}
-
-    #: Maximum block dimension Y
-    cudaDevAttrMaxBlockDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimY{{endif}}
-    {{if 'cudaDevAttrMaxBlockDimZ' in found_values}}
-
-    #: Maximum block dimension Z
-    cudaDevAttrMaxBlockDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimZ{{endif}}
-    {{if 'cudaDevAttrMaxGridDimX' in found_values}}
-
-    #: Maximum grid dimension X
-    cudaDevAttrMaxGridDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimX{{endif}}
-    {{if 'cudaDevAttrMaxGridDimY' in found_values}}
-
-    #: Maximum grid dimension Y
-    cudaDevAttrMaxGridDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimY{{endif}}
-    {{if 'cudaDevAttrMaxGridDimZ' in found_values}}
-
-    #: Maximum grid dimension Z
-    cudaDevAttrMaxGridDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimZ{{endif}}
-    {{if 'cudaDevAttrMaxSharedMemoryPerBlock' in found_values}}
-
-    #: Maximum shared memory available per block in bytes
-    cudaDevAttrMaxSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlock{{endif}}
-    {{if 'cudaDevAttrTotalConstantMemory' in found_values}}
-
-    #: Memory available on device for constant variables in a CUDA C kernel
-    #: in bytes
-    cudaDevAttrTotalConstantMemory = cyruntime.cudaDeviceAttr.cudaDevAttrTotalConstantMemory{{endif}}
-    {{if 'cudaDevAttrWarpSize' in found_values}}
-
-    #: Warp size in threads
-    cudaDevAttrWarpSize = cyruntime.cudaDeviceAttr.cudaDevAttrWarpSize{{endif}}
-    {{if 'cudaDevAttrMaxPitch' in found_values}}
-
-    #: Maximum pitch in bytes allowed by memory copies
-    cudaDevAttrMaxPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPitch{{endif}}
-    {{if 'cudaDevAttrMaxRegistersPerBlock' in found_values}}
-
-    #: Maximum number of 32-bit registers available per block
-    cudaDevAttrMaxRegistersPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerBlock{{endif}}
-    {{if 'cudaDevAttrClockRate' in found_values}}
-
-    #: Peak clock frequency in kilohertz
-    cudaDevAttrClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrClockRate{{endif}}
-    {{if 'cudaDevAttrTextureAlignment' in found_values}}
-
-    #: Alignment requirement for textures
-    cudaDevAttrTextureAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTextureAlignment{{endif}}
-    {{if 'cudaDevAttrGpuOverlap' in found_values}}
-
-    #: Device can possibly copy memory and execute a kernel concurrently
-    cudaDevAttrGpuOverlap = cyruntime.cudaDeviceAttr.cudaDevAttrGpuOverlap{{endif}}
-    {{if 'cudaDevAttrMultiProcessorCount' in found_values}}
-
-    #: Number of multiprocessors on device
-    cudaDevAttrMultiProcessorCount = cyruntime.cudaDeviceAttr.cudaDevAttrMultiProcessorCount{{endif}}
-    {{if 'cudaDevAttrKernelExecTimeout' in found_values}}
-
-    #: Specifies whether there is a run time limit on kernels
-    cudaDevAttrKernelExecTimeout = cyruntime.cudaDeviceAttr.cudaDevAttrKernelExecTimeout{{endif}}
-    {{if 'cudaDevAttrIntegrated' in found_values}}
-
-    #: Device is integrated with host memory
-    cudaDevAttrIntegrated = cyruntime.cudaDeviceAttr.cudaDevAttrIntegrated{{endif}}
-    {{if 'cudaDevAttrCanMapHostMemory' in found_values}}
-
-    #: Device can map host memory into CUDA address space
-    cudaDevAttrCanMapHostMemory = cyruntime.cudaDeviceAttr.cudaDevAttrCanMapHostMemory{{endif}}
-    {{if 'cudaDevAttrComputeMode' in found_values}}
-
-    #: Compute mode (See :py:obj:`~.cudaComputeMode` for details)
-    cudaDevAttrComputeMode = cyruntime.cudaDeviceAttr.cudaDevAttrComputeMode{{endif}}
-    {{if 'cudaDevAttrMaxTexture1DWidth' in found_values}}
-
-    #: Maximum 1D texture width
-    cudaDevAttrMaxTexture1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DWidth' in found_values}}
-
-    #: Maximum 2D texture width
-    cudaDevAttrMaxTexture2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DHeight' in found_values}}
-
-    #: Maximum 2D texture height
-    cudaDevAttrMaxTexture2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DHeight{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DWidth' in found_values}}
-
-    #: Maximum 3D texture width
-    cudaDevAttrMaxTexture3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DHeight' in found_values}}
-
-    #: Maximum 3D texture height
-    cudaDevAttrMaxTexture3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeight{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DDepth' in found_values}}
-
-    #: Maximum 3D texture depth
-    cudaDevAttrMaxTexture3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLayeredWidth' in found_values}}
-
-    #: Maximum 2D layered texture width
-    cudaDevAttrMaxTexture2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLayeredHeight' in found_values}}
-
-    #: Maximum 2D layered texture height
-    cudaDevAttrMaxTexture2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredHeight{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLayeredLayers' in found_values}}
-
-    #: Maximum layers in a 2D layered texture
-    cudaDevAttrMaxTexture2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredLayers{{endif}}
-    {{if 'cudaDevAttrSurfaceAlignment' in found_values}}
-
-    #: Alignment requirement for surfaces
-    cudaDevAttrSurfaceAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrSurfaceAlignment{{endif}}
-    {{if 'cudaDevAttrConcurrentKernels' in found_values}}
-
-    #: Device can possibly execute multiple kernels concurrently
-    cudaDevAttrConcurrentKernels = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentKernels{{endif}}
-    {{if 'cudaDevAttrEccEnabled' in found_values}}
-
-    #: Device has ECC support enabled
-    cudaDevAttrEccEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrEccEnabled{{endif}}
-    {{if 'cudaDevAttrPciBusId' in found_values}}
-
-    #: PCI bus ID of the device
-    cudaDevAttrPciBusId = cyruntime.cudaDeviceAttr.cudaDevAttrPciBusId{{endif}}
-    {{if 'cudaDevAttrPciDeviceId' in found_values}}
-
-    #: PCI device ID of the device
-    cudaDevAttrPciDeviceId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDeviceId{{endif}}
-    {{if 'cudaDevAttrTccDriver' in found_values}}
-
-    #: Device is using TCC driver model
-    cudaDevAttrTccDriver = cyruntime.cudaDeviceAttr.cudaDevAttrTccDriver{{endif}}
-    {{if 'cudaDevAttrMemoryClockRate' in found_values}}
-
-    #: Peak memory clock frequency in kilohertz
-    cudaDevAttrMemoryClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryClockRate{{endif}}
-    {{if 'cudaDevAttrGlobalMemoryBusWidth' in found_values}}
-
-    #: Global memory bus width in bits
-    cudaDevAttrGlobalMemoryBusWidth = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalMemoryBusWidth{{endif}}
-    {{if 'cudaDevAttrL2CacheSize' in found_values}}
-
-    #: Size of L2 cache in bytes
-    cudaDevAttrL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrL2CacheSize{{endif}}
-    {{if 'cudaDevAttrMaxThreadsPerMultiProcessor' in found_values}}
-
-    #: Maximum resident threads per multiprocessor
-    cudaDevAttrMaxThreadsPerMultiProcessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerMultiProcessor{{endif}}
-    {{if 'cudaDevAttrAsyncEngineCount' in found_values}}
-
-    #: Number of asynchronous engines
-    cudaDevAttrAsyncEngineCount = cyruntime.cudaDeviceAttr.cudaDevAttrAsyncEngineCount{{endif}}
-    {{if 'cudaDevAttrUnifiedAddressing' in found_values}}
-
-    #: Device shares a unified address space with the host
-    cudaDevAttrUnifiedAddressing = cyruntime.cudaDeviceAttr.cudaDevAttrUnifiedAddressing{{endif}}
-    {{if 'cudaDevAttrMaxTexture1DLayeredWidth' in found_values}}
-
-    #: Maximum 1D layered texture width
-    cudaDevAttrMaxTexture1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture1DLayeredLayers' in found_values}}
-
-    #: Maximum layers in a 1D layered texture
-    cudaDevAttrMaxTexture1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredLayers{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DGatherWidth' in found_values}}
-
-    #: Maximum 2D texture width if cudaArrayTextureGather is set
-    cudaDevAttrMaxTexture2DGatherWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DGatherHeight' in found_values}}
-
-    #: Maximum 2D texture height if cudaArrayTextureGather is set
-    cudaDevAttrMaxTexture2DGatherHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherHeight{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DWidthAlt' in found_values}}
-
-    #: Alternate maximum 3D texture width
-    cudaDevAttrMaxTexture3DWidthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidthAlt{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DHeightAlt' in found_values}}
-
-    #: Alternate maximum 3D texture height
-    cudaDevAttrMaxTexture3DHeightAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeightAlt{{endif}}
-    {{if 'cudaDevAttrMaxTexture3DDepthAlt' in found_values}}
-
-    #: Alternate maximum 3D texture depth
-    cudaDevAttrMaxTexture3DDepthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepthAlt{{endif}}
-    {{if 'cudaDevAttrPciDomainId' in found_values}}
-
-    #: PCI domain ID of the device
-    cudaDevAttrPciDomainId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDomainId{{endif}}
-    {{if 'cudaDevAttrTexturePitchAlignment' in found_values}}
-
-    #: Pitch alignment requirement for textures
-    cudaDevAttrTexturePitchAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTexturePitchAlignment{{endif}}
-    {{if 'cudaDevAttrMaxTextureCubemapWidth' in found_values}}
-
-    #: Maximum cubemap texture width/height
-    cudaDevAttrMaxTextureCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapWidth{{endif}}
-    {{if 'cudaDevAttrMaxTextureCubemapLayeredWidth' in found_values}}
-
-    #: Maximum cubemap layered texture width/height
-    cudaDevAttrMaxTextureCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxTextureCubemapLayeredLayers' in found_values}}
-
-    #: Maximum layers in a cubemap layered texture
-    cudaDevAttrMaxTextureCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredLayers{{endif}}
-    {{if 'cudaDevAttrMaxSurface1DWidth' in found_values}}
-
-    #: Maximum 1D surface width
-    cudaDevAttrMaxSurface1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurface2DWidth' in found_values}}
-
-    #: Maximum 2D surface width
-    cudaDevAttrMaxSurface2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurface2DHeight' in found_values}}
-
-    #: Maximum 2D surface height
-    cudaDevAttrMaxSurface2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DHeight{{endif}}
-    {{if 'cudaDevAttrMaxSurface3DWidth' in found_values}}
-
-    #: Maximum 3D surface width
-    cudaDevAttrMaxSurface3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurface3DHeight' in found_values}}
-
-    #: Maximum 3D surface height
-    cudaDevAttrMaxSurface3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DHeight{{endif}}
-    {{if 'cudaDevAttrMaxSurface3DDepth' in found_values}}
-
-    #: Maximum 3D surface depth
-    cudaDevAttrMaxSurface3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DDepth{{endif}}
-    {{if 'cudaDevAttrMaxSurface1DLayeredWidth' in found_values}}
-
-    #: Maximum 1D layered surface width
-    cudaDevAttrMaxSurface1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurface1DLayeredLayers' in found_values}}
-
-    #: Maximum layers in a 1D layered surface
-    cudaDevAttrMaxSurface1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredLayers{{endif}}
-    {{if 'cudaDevAttrMaxSurface2DLayeredWidth' in found_values}}
-
-    #: Maximum 2D layered surface width
-    cudaDevAttrMaxSurface2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurface2DLayeredHeight' in found_values}}
-
-    #: Maximum 2D layered surface height
-    cudaDevAttrMaxSurface2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredHeight{{endif}}
-    {{if 'cudaDevAttrMaxSurface2DLayeredLayers' in found_values}}
-
-    #: Maximum layers in a 2D layered surface
-    cudaDevAttrMaxSurface2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredLayers{{endif}}
-    {{if 'cudaDevAttrMaxSurfaceCubemapWidth' in found_values}}
-
-    #: Maximum cubemap surface width
-    cudaDevAttrMaxSurfaceCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurfaceCubemapLayeredWidth' in found_values}}
-
-    #: Maximum cubemap layered surface width
-    cudaDevAttrMaxSurfaceCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredWidth{{endif}}
-    {{if 'cudaDevAttrMaxSurfaceCubemapLayeredLayers' in found_values}}
-
-    #: Maximum layers in a cubemap layered surface
-    cudaDevAttrMaxSurfaceCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredLayers{{endif}}
-    {{if 'cudaDevAttrMaxTexture1DLinearWidth' in found_values}}
-
-    #: Maximum 1D linear texture width
-    cudaDevAttrMaxTexture1DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLinearWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLinearWidth' in found_values}}
-
-    #: Maximum 2D linear texture width
-    cudaDevAttrMaxTexture2DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLinearHeight' in found_values}}
-
-    #: Maximum 2D linear texture height
-    cudaDevAttrMaxTexture2DLinearHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearHeight{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DLinearPitch' in found_values}}
-
-    #: Maximum 2D linear texture pitch in bytes
-    cudaDevAttrMaxTexture2DLinearPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearPitch{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DMipmappedWidth' in found_values}}
-
-    #: Maximum mipmapped 2D texture width
-    cudaDevAttrMaxTexture2DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedWidth{{endif}}
-    {{if 'cudaDevAttrMaxTexture2DMipmappedHeight' in found_values}}
-
-    #: Maximum mipmapped 2D texture height
-    cudaDevAttrMaxTexture2DMipmappedHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedHeight{{endif}}
-    {{if 'cudaDevAttrComputeCapabilityMajor' in found_values}}
-
-    #: Major compute capability version number
-    cudaDevAttrComputeCapabilityMajor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor{{endif}}
-    {{if 'cudaDevAttrComputeCapabilityMinor' in found_values}}
-
-    #: Minor compute capability version number
-    cudaDevAttrComputeCapabilityMinor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor{{endif}}
-    {{if 'cudaDevAttrMaxTexture1DMipmappedWidth' in found_values}}
-
-    #: Maximum mipmapped 1D texture width
-    cudaDevAttrMaxTexture1DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DMipmappedWidth{{endif}}
-    {{if 'cudaDevAttrStreamPrioritiesSupported' in found_values}}
-
-    #: Device supports stream priorities
-    cudaDevAttrStreamPrioritiesSupported = cyruntime.cudaDeviceAttr.cudaDevAttrStreamPrioritiesSupported{{endif}}
-    {{if 'cudaDevAttrGlobalL1CacheSupported' in found_values}}
-
-    #: Device supports caching globals in L1
-    cudaDevAttrGlobalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalL1CacheSupported{{endif}}
-    {{if 'cudaDevAttrLocalL1CacheSupported' in found_values}}
-
-    #: Device supports caching locals in L1
-    cudaDevAttrLocalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrLocalL1CacheSupported{{endif}}
-    {{if 'cudaDevAttrMaxSharedMemoryPerMultiprocessor' in found_values}}
-
-    #: Maximum shared memory available per multiprocessor in bytes
-    cudaDevAttrMaxSharedMemoryPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerMultiprocessor{{endif}}
-    {{if 'cudaDevAttrMaxRegistersPerMultiprocessor' in found_values}}
-
-    #: Maximum number of 32-bit registers available per multiprocessor
-    cudaDevAttrMaxRegistersPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerMultiprocessor{{endif}}
-    {{if 'cudaDevAttrManagedMemory' in found_values}}
-
-    #: Device can allocate managed memory on this system
-    cudaDevAttrManagedMemory = cyruntime.cudaDeviceAttr.cudaDevAttrManagedMemory{{endif}}
-    {{if 'cudaDevAttrIsMultiGpuBoard' in found_values}}
-
-    #: Device is on a multi-GPU board
-    cudaDevAttrIsMultiGpuBoard = cyruntime.cudaDeviceAttr.cudaDevAttrIsMultiGpuBoard{{endif}}
-    {{if 'cudaDevAttrMultiGpuBoardGroupID' in found_values}}
-
-    #: Unique identifier for a group of devices on the same multi-GPU board
-    cudaDevAttrMultiGpuBoardGroupID = cyruntime.cudaDeviceAttr.cudaDevAttrMultiGpuBoardGroupID{{endif}}
-    {{if 'cudaDevAttrHostNativeAtomicSupported' in found_values}}
-
-    #: Link between the device and the host supports native atomic
-    #: operations
-    cudaDevAttrHostNativeAtomicSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNativeAtomicSupported{{endif}}
-    {{if 'cudaDevAttrSingleToDoublePrecisionPerfRatio' in found_values}}
-
-    #: Ratio of single precision performance (in floating-point operations
-    #: per second) to double precision performance
-    cudaDevAttrSingleToDoublePrecisionPerfRatio = cyruntime.cudaDeviceAttr.cudaDevAttrSingleToDoublePrecisionPerfRatio{{endif}}
-    {{if 'cudaDevAttrPageableMemoryAccess' in found_values}}
-
-    #: Device supports coherently accessing pageable memory without calling
-    #: cudaHostRegister on it
-    cudaDevAttrPageableMemoryAccess = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess{{endif}}
-    {{if 'cudaDevAttrConcurrentManagedAccess' in found_values}}
-
-    #: Device can coherently access managed memory concurrently with the
-    #: CPU
-    cudaDevAttrConcurrentManagedAccess = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess{{endif}}
-    {{if 'cudaDevAttrComputePreemptionSupported' in found_values}}
-
-    #: Device supports Compute Preemption
-    cudaDevAttrComputePreemptionSupported = cyruntime.cudaDeviceAttr.cudaDevAttrComputePreemptionSupported{{endif}}
-    {{if 'cudaDevAttrCanUseHostPointerForRegisteredMem' in found_values}}
-
-    #: Device can access host registered memory at the same virtual address
-    #: as the CPU
-    cudaDevAttrCanUseHostPointerForRegisteredMem = cyruntime.cudaDeviceAttr.cudaDevAttrCanUseHostPointerForRegisteredMem{{endif}}
-    {{if 'cudaDevAttrReserved92' in found_values}}
-    cudaDevAttrReserved92 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved92{{endif}}
-    {{if 'cudaDevAttrReserved93' in found_values}}
-    cudaDevAttrReserved93 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved93{{endif}}
-    {{if 'cudaDevAttrReserved94' in found_values}}
-    cudaDevAttrReserved94 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved94{{endif}}
-    {{if 'cudaDevAttrCooperativeLaunch' in found_values}}
-
-    #: Device supports launching cooperative kernels via
-    #: :py:obj:`~.cudaLaunchCooperativeKernel`
-    cudaDevAttrCooperativeLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrCooperativeLaunch{{endif}}
-    {{if 'cudaDevAttrCooperativeMultiDeviceLaunch' in found_values}}
-
-    #: Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.
-    cudaDevAttrCooperativeMultiDeviceLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrCooperativeMultiDeviceLaunch{{endif}}
-    {{if 'cudaDevAttrMaxSharedMemoryPerBlockOptin' in found_values}}
-
-    #: The maximum optin shared memory per block. This value may vary by
-    #: chip. See :py:obj:`~.cudaFuncSetAttribute`
-    cudaDevAttrMaxSharedMemoryPerBlockOptin = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlockOptin{{endif}}
-    {{if 'cudaDevAttrCanFlushRemoteWrites' in found_values}}
-
-    #: Device supports flushing of outstanding remote writes.
-    cudaDevAttrCanFlushRemoteWrites = cyruntime.cudaDeviceAttr.cudaDevAttrCanFlushRemoteWrites{{endif}}
-    {{if 'cudaDevAttrHostRegisterSupported' in found_values}}
-
-    #: Device supports host memory registration via
-    #: :py:obj:`~.cudaHostRegister`.
-    cudaDevAttrHostRegisterSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterSupported{{endif}}
-    {{if 'cudaDevAttrPageableMemoryAccessUsesHostPageTables' in found_values}}
-
-    #: Device accesses pageable memory via the host's page tables.
-    cudaDevAttrPageableMemoryAccessUsesHostPageTables = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccessUsesHostPageTables{{endif}}
-    {{if 'cudaDevAttrDirectManagedMemAccessFromHost' in found_values}}
-
-    #: Host can directly access managed memory on the device without
-    #: migration.
-    cudaDevAttrDirectManagedMemAccessFromHost = cyruntime.cudaDeviceAttr.cudaDevAttrDirectManagedMemAccessFromHost{{endif}}
-    {{if 'cudaDevAttrMaxBlocksPerMultiprocessor' in found_values}}
-
-    #: Maximum number of blocks per multiprocessor
-    cudaDevAttrMaxBlocksPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlocksPerMultiprocessor{{endif}}
-    {{if 'cudaDevAttrMaxPersistingL2CacheSize' in found_values}}
-
-    #: Maximum L2 persisting lines capacity setting in bytes.
-    cudaDevAttrMaxPersistingL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPersistingL2CacheSize{{endif}}
-    {{if 'cudaDevAttrMaxAccessPolicyWindowSize' in found_values}}
-
-    #: Maximum value of :py:obj:`~.cudaAccessPolicyWindow.num_bytes`.
-    cudaDevAttrMaxAccessPolicyWindowSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxAccessPolicyWindowSize{{endif}}
-    {{if 'cudaDevAttrReservedSharedMemoryPerBlock' in found_values}}
-
-    #: Shared memory reserved by CUDA driver per block in bytes
-    cudaDevAttrReservedSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrReservedSharedMemoryPerBlock{{endif}}
-    {{if 'cudaDevAttrSparseCudaArraySupported' in found_values}}
-
-    #: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
-    cudaDevAttrSparseCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported{{endif}}
-    {{if 'cudaDevAttrHostRegisterReadOnlySupported' in found_values}}
-
-    #: Device supports using the :py:obj:`~.cudaHostRegister` flag
-    #: cudaHostRegisterReadOnly to register memory that must be mapped as
-    #: read-only to the GPU
-    cudaDevAttrHostRegisterReadOnlySupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterReadOnlySupported{{endif}}
-    {{if 'cudaDevAttrTimelineSemaphoreInteropSupported' in found_values}}
-
-    #: External timeline semaphore interop is supported on the device
-    cudaDevAttrTimelineSemaphoreInteropSupported = cyruntime.cudaDeviceAttr.cudaDevAttrTimelineSemaphoreInteropSupported{{endif}}
-    {{if 'cudaDevAttrMaxTimelineSemaphoreInteropSupported' in found_values}}
-
-    #: Deprecated, External timeline semaphore interop is supported on the
-    #: device
-    cudaDevAttrMaxTimelineSemaphoreInteropSupported = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTimelineSemaphoreInteropSupported{{endif}}
-    {{if 'cudaDevAttrMemoryPoolsSupported' in found_values}}
-
-    #: Device supports using the :py:obj:`~.cudaMallocAsync` and
-    #: :py:obj:`~.cudaMemPool` family of APIs
-    cudaDevAttrMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported{{endif}}
-    {{if 'cudaDevAttrGPUDirectRDMASupported' in found_values}}
-
-    #: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
-    #: https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
-    cudaDevAttrGPUDirectRDMASupported = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMASupported{{endif}}
-    {{if 'cudaDevAttrGPUDirectRDMAFlushWritesOptions' in found_values}}
-
-    #: The returned attribute shall be interpreted as a bitmask, where the
-    #: individual bits are listed in the
-    #: :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum
-    cudaDevAttrGPUDirectRDMAFlushWritesOptions = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAFlushWritesOptions{{endif}}
-    {{if 'cudaDevAttrGPUDirectRDMAWritesOrdering' in found_values}}
-
-    #: GPUDirect RDMA writes to the device do not need to be flushed for
-    #: consumers within the scope indicated by the returned attribute. See
-    #: :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` for the numerical values
-    #: returned here.
-    cudaDevAttrGPUDirectRDMAWritesOrdering = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAWritesOrdering{{endif}}
-    {{if 'cudaDevAttrMemoryPoolSupportedHandleTypes' in found_values}}
-
-    #: Handle types supported with mempool based IPC
-    cudaDevAttrMemoryPoolSupportedHandleTypes = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolSupportedHandleTypes{{endif}}
-    {{if 'cudaDevAttrClusterLaunch' in found_values}}
-
-    #: Indicates device supports cluster launch
-    cudaDevAttrClusterLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrClusterLaunch{{endif}}
-    {{if 'cudaDevAttrDeferredMappingCudaArraySupported' in found_values}}
-
-    #: Device supports deferred mapping CUDA arrays and CUDA mipmapped
-    #: arrays
-    cudaDevAttrDeferredMappingCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrDeferredMappingCudaArraySupported{{endif}}
-    {{if 'cudaDevAttrReserved122' in found_values}}
-    cudaDevAttrReserved122 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved122{{endif}}
-    {{if 'cudaDevAttrReserved123' in found_values}}
-    cudaDevAttrReserved123 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved123{{endif}}
-    {{if 'cudaDevAttrReserved124' in found_values}}
-    cudaDevAttrReserved124 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved124{{endif}}
-    {{if 'cudaDevAttrIpcEventSupport' in found_values}}
-
-    #: Device supports IPC Events.
-    cudaDevAttrIpcEventSupport = cyruntime.cudaDeviceAttr.cudaDevAttrIpcEventSupport{{endif}}
-    {{if 'cudaDevAttrMemSyncDomainCount' in found_values}}
-
-    #: Number of memory synchronization domains the device supports.
-    cudaDevAttrMemSyncDomainCount = cyruntime.cudaDeviceAttr.cudaDevAttrMemSyncDomainCount{{endif}}
-    {{if 'cudaDevAttrReserved127' in found_values}}
-    cudaDevAttrReserved127 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved127{{endif}}
-    {{if 'cudaDevAttrReserved128' in found_values}}
-    cudaDevAttrReserved128 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved128{{endif}}
-    {{if 'cudaDevAttrReserved129' in found_values}}
-    cudaDevAttrReserved129 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved129{{endif}}
-    {{if 'cudaDevAttrNumaConfig' in found_values}}
-
-    #: NUMA configuration of a device: value is of type
-    #: :py:obj:`~.cudaDeviceNumaConfig` enum
-    cudaDevAttrNumaConfig = cyruntime.cudaDeviceAttr.cudaDevAttrNumaConfig{{endif}}
-    {{if 'cudaDevAttrNumaId' in found_values}}
-
-    #: NUMA node ID of the GPU memory
-    cudaDevAttrNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrNumaId{{endif}}
-    {{if 'cudaDevAttrReserved132' in found_values}}
-    cudaDevAttrReserved132 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved132{{endif}}
-    {{if 'cudaDevAttrMpsEnabled' in found_values}}
-
-    #: Contexts created on this device will be shared via MPS
-    cudaDevAttrMpsEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrMpsEnabled{{endif}}
-    {{if 'cudaDevAttrHostNumaId' in found_values}}
-
-    #: NUMA ID of the host node closest to the device. Returns -1 when
-    #: system does not support NUMA.
-    cudaDevAttrHostNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaId{{endif}}
-    {{if 'cudaDevAttrD3D12CigSupported' in found_values}}
-
-    #: Device supports CIG with D3D12.
-    cudaDevAttrD3D12CigSupported = cyruntime.cudaDeviceAttr.cudaDevAttrD3D12CigSupported{{endif}}
-    {{if 'cudaDevAttrMax' in found_values}}
-    cudaDevAttrMax = cyruntime.cudaDeviceAttr.cudaDevAttrMax{{endif}}
-{{endif}}
-{{if 'cudaMemPoolAttr' in found_types}}
-
-class cudaMemPoolAttr(IntEnum):
-    """
-    CUDA memory pool attributes
-    """
-    {{if 'cudaMemPoolReuseFollowEventDependencies' in found_values}}
-
-    #: (value type = int) Allow cuMemAllocAsync to use memory
-    #: asynchronously freed in another streams as long as a stream ordering
-    #: dependency of the allocating stream on the free action exists. Cuda
-    #: events and null stream interactions can create the required stream
-    #: ordered dependencies. (default enabled)
-    cudaMemPoolReuseFollowEventDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies{{endif}}
-    {{if 'cudaMemPoolReuseAllowOpportunistic' in found_values}}
-
-    #: (value type = int) Allow reuse of already completed frees when there
-    #: is no dependency between the free and allocation. (default enabled)
-    cudaMemPoolReuseAllowOpportunistic = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic{{endif}}
-    {{if 'cudaMemPoolReuseAllowInternalDependencies' in found_values}}
-
-    #: (value type = int) Allow cuMemAllocAsync to insert new stream
-    #: dependencies in order to establish the stream ordering required to
-    #: reuse a piece of memory released by cuFreeAsync (default enabled).
-    cudaMemPoolReuseAllowInternalDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies{{endif}}
-    {{if 'cudaMemPoolAttrReleaseThreshold' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of reserved memory in bytes to hold
-    #: onto before trying to release memory back to the OS. When more than
-    #: the release threshold bytes of memory are held by the memory pool,
-    #: the allocator will try to release memory back to the OS on the next
-    #: call to stream, event or context synchronize. (default 0)
-    cudaMemPoolAttrReleaseThreshold = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold{{endif}}
-    {{if 'cudaMemPoolAttrReservedMemCurrent' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of backing memory currently
-    #: allocated for the mempool.
-    cudaMemPoolAttrReservedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemCurrent{{endif}}
-    {{if 'cudaMemPoolAttrReservedMemHigh' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of backing memory allocated
-    #: for the mempool since the last time it was reset. High watermark can
-    #: only be reset to zero.
-    cudaMemPoolAttrReservedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemHigh{{endif}}
-    {{if 'cudaMemPoolAttrUsedMemCurrent' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory from the pool that is
-    #: currently in use by the application.
-    cudaMemPoolAttrUsedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemCurrent{{endif}}
-    {{if 'cudaMemPoolAttrUsedMemHigh' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of the amount of memory
-    #: from the pool that was in use by the application since the last time
-    #: it was reset. High watermark can only be reset to zero.
-    cudaMemPoolAttrUsedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemHigh{{endif}}
-{{endif}}
-{{if 'cudaMemLocationType' in found_types}}
-
-class cudaMemLocationType(IntEnum):
-    """
-    Specifies the type of location
-    """
-    {{if 'cudaMemLocationTypeInvalid' in found_values}}
-    cudaMemLocationTypeInvalid = cyruntime.cudaMemLocationType.cudaMemLocationTypeInvalid{{endif}}
-    {{if 'cudaMemLocationTypeDevice' in found_values}}
-
-    #: Location is a device location, thus id is a device ordinal
-    cudaMemLocationTypeDevice = cyruntime.cudaMemLocationType.cudaMemLocationTypeDevice{{endif}}
-    {{if 'cudaMemLocationTypeHost' in found_values}}
-
-    #: Location is host, id is ignored
-    cudaMemLocationTypeHost = cyruntime.cudaMemLocationType.cudaMemLocationTypeHost{{endif}}
-    {{if 'cudaMemLocationTypeHostNuma' in found_values}}
-
-    #: Location is a host NUMA node, thus id is a host NUMA node id
-    cudaMemLocationTypeHostNuma = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNuma{{endif}}
-    {{if 'cudaMemLocationTypeHostNumaCurrent' in found_values}}
-
-    #: Location is the host NUMA node closest to the current thread's CPU,
-    #: id is ignored
-    cudaMemLocationTypeHostNumaCurrent = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNumaCurrent{{endif}}
-{{endif}}
-{{if 'cudaMemAccessFlags' in found_types}}
-
-class cudaMemAccessFlags(IntEnum):
-    """
-    Specifies the memory protection flags for mapping.
-    """
-    {{if 'cudaMemAccessFlagsProtNone' in found_values}}
-
-    #: Default, make the address range not accessible
-    cudaMemAccessFlagsProtNone = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtNone{{endif}}
-    {{if 'cudaMemAccessFlagsProtRead' in found_values}}
-
-    #: Make the address range read accessible
-    cudaMemAccessFlagsProtRead = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtRead{{endif}}
-    {{if 'cudaMemAccessFlagsProtReadWrite' in found_values}}
-
-    #: Make the address range read-write accessible
-    cudaMemAccessFlagsProtReadWrite = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtReadWrite{{endif}}
-{{endif}}
-{{if 'cudaMemAllocationType' in found_types}}
-
-class cudaMemAllocationType(IntEnum):
-    """
-    Defines the allocation types available
-    """
-    {{if 'cudaMemAllocationTypeInvalid' in found_values}}
-    cudaMemAllocationTypeInvalid = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeInvalid{{endif}}
-    {{if 'cudaMemAllocationTypePinned' in found_values}}
-
-    #: This allocation type is 'pinned', i.e. cannot migrate from its
-    #: current location while the application is actively using it
-    cudaMemAllocationTypePinned = cyruntime.cudaMemAllocationType.cudaMemAllocationTypePinned{{endif}}
-    {{if 'cudaMemAllocationTypeMax' in found_values}}
-    cudaMemAllocationTypeMax = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeMax{{endif}}
-{{endif}}
-{{if 'cudaMemAllocationHandleType' in found_types}}
-
-class cudaMemAllocationHandleType(IntEnum):
-    """
-    Flags for specifying particular handle types
-    """
-    {{if 'cudaMemHandleTypeNone' in found_values}}
-
-    #: Does not allow any export mechanism. >
-    cudaMemHandleTypeNone = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeNone{{endif}}
-    {{if 'cudaMemHandleTypePosixFileDescriptor' in found_values}}
-
-    #: Allows a file descriptor to be used for exporting. Permitted only on
-    #: POSIX systems. (int)
-    cudaMemHandleTypePosixFileDescriptor = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypePosixFileDescriptor{{endif}}
-    {{if 'cudaMemHandleTypeWin32' in found_values}}
-
-    #: Allows a Win32 NT handle to be used for exporting. (HANDLE)
-    cudaMemHandleTypeWin32 = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32{{endif}}
-    {{if 'cudaMemHandleTypeWin32Kmt' in found_values}}
-
-    #: Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
-    cudaMemHandleTypeWin32Kmt = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32Kmt{{endif}}
-    {{if 'cudaMemHandleTypeFabric' in found_values}}
-
-    #: Allows a fabric handle to be used for exporting.
-    #: (cudaMemFabricHandle_t)
-    cudaMemHandleTypeFabric = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeFabric{{endif}}
-{{endif}}
-{{if 'cudaGraphMemAttributeType' in found_types}}
-
-class cudaGraphMemAttributeType(IntEnum):
-    """
-    Graph memory attributes
-    """
-    {{if 'cudaGraphMemAttrUsedMemCurrent' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory, in bytes, currently
-    #: associated with graphs.
-    cudaGraphMemAttrUsedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemCurrent{{endif}}
-    {{if 'cudaGraphMemAttrUsedMemHigh' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of memory, in bytes,
-    #: associated with graphs since the last time it was reset. High
-    #: watermark can only be reset to zero.
-    cudaGraphMemAttrUsedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemHigh{{endif}}
-    {{if 'cudaGraphMemAttrReservedMemCurrent' in found_values}}
-
-    #: (value type = cuuint64_t) Amount of memory, in bytes, currently
-    #: allocated for use by the CUDA graphs asynchronous allocator.
-    cudaGraphMemAttrReservedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemCurrent{{endif}}
-    {{if 'cudaGraphMemAttrReservedMemHigh' in found_values}}
-
-    #: (value type = cuuint64_t) High watermark of memory, in bytes,
-    #: currently allocated for use by the CUDA graphs asynchronous
-    #: allocator.
-    cudaGraphMemAttrReservedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemHigh{{endif}}
-{{endif}}
-{{if 'cudaDeviceP2PAttr' in found_types}}
-
-class cudaDeviceP2PAttr(IntEnum):
-    """
-    CUDA device P2P attributes
-    """
-    {{if 'cudaDevP2PAttrPerformanceRank' in found_values}}
-
-    #: A relative value indicating the performance of the link between two
-    #: devices
-    cudaDevP2PAttrPerformanceRank = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrPerformanceRank{{endif}}
-    {{if 'cudaDevP2PAttrAccessSupported' in found_values}}
-
-    #: Peer access is enabled
-    cudaDevP2PAttrAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrAccessSupported{{endif}}
-    {{if 'cudaDevP2PAttrNativeAtomicSupported' in found_values}}
-
-    #: Native atomic operation over the link supported
-    cudaDevP2PAttrNativeAtomicSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrNativeAtomicSupported{{endif}}
-    {{if 'cudaDevP2PAttrCudaArrayAccessSupported' in found_values}}
-
-    #: Accessing CUDA arrays over the link supported
-    cudaDevP2PAttrCudaArrayAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrCudaArrayAccessSupported{{endif}}
-{{endif}}
-{{if 'cudaExternalMemoryHandleType' in found_types}}
-
-class cudaExternalMemoryHandleType(IntEnum):
-    """
-    External memory handle types
-    """
-    {{if 'cudaExternalMemoryHandleTypeOpaqueFd' in found_values}}
-
-    #: Handle is an opaque file descriptor
-    cudaExternalMemoryHandleTypeOpaqueFd = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeOpaqueWin32' in found_values}}
-
-    #: Handle is an opaque shared NT handle
-    cudaExternalMemoryHandleTypeOpaqueWin32 = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeOpaqueWin32Kmt' in found_values}}
-
-    #: Handle is an opaque, globally shared handle
-    cudaExternalMemoryHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeD3D12Heap' in found_values}}
-
-    #: Handle is a D3D12 heap object
-    cudaExternalMemoryHandleTypeD3D12Heap = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeD3D12Resource' in found_values}}
-
-    #: Handle is a D3D12 committed resource
-    cudaExternalMemoryHandleTypeD3D12Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeD3D11Resource' in found_values}}
-
-    #: Handle is a shared NT handle to a D3D11 resource
-    cudaExternalMemoryHandleTypeD3D11Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeD3D11ResourceKmt' in found_values}}
-
-    #: Handle is a globally shared handle to a D3D11 resource
-    cudaExternalMemoryHandleTypeD3D11ResourceKmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt{{endif}}
-    {{if 'cudaExternalMemoryHandleTypeNvSciBuf' in found_values}}
-
-    #: Handle is an NvSciBuf object
-    cudaExternalMemoryHandleTypeNvSciBuf = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf{{endif}}
-{{endif}}
-{{if 'cudaExternalSemaphoreHandleType' in found_types}}
-
-class cudaExternalSemaphoreHandleType(IntEnum):
-    """
-    External semaphore handle types
-    """
-    {{if 'cudaExternalSemaphoreHandleTypeOpaqueFd' in found_values}}
-
-    #: Handle is an opaque file descriptor
-    cudaExternalSemaphoreHandleTypeOpaqueFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeOpaqueWin32' in found_values}}
-
-    #: Handle is an opaque shared NT handle
-    cudaExternalSemaphoreHandleTypeOpaqueWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt' in found_values}}
-
-    #: Handle is an opaque, globally shared handle
-    cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeD3D12Fence' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D12 fence object
-    cudaExternalSemaphoreHandleTypeD3D12Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeD3D11Fence' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D11 fence object
-    cudaExternalSemaphoreHandleTypeD3D11Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeNvSciSync' in found_values}}
-
-    #: Opaque handle to NvSciSync Object
-    cudaExternalSemaphoreHandleTypeNvSciSync = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeKeyedMutex' in found_values}}
-
-    #: Handle is a shared NT handle referencing a D3D11 keyed mutex object
-    cudaExternalSemaphoreHandleTypeKeyedMutex = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeKeyedMutexKmt' in found_values}}
-
-    #: Handle is a shared KMT handle referencing a D3D11 keyed mutex object
-    cudaExternalSemaphoreHandleTypeKeyedMutexKmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd' in found_values}}
-
-    #: Handle is an opaque handle file descriptor referencing a timeline
-    #: semaphore
-    cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd{{endif}}
-    {{if 'cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32' in found_values}}
-
-    #: Handle is an opaque handle file descriptor referencing a timeline
-    #: semaphore
-    cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32{{endif}}
-{{endif}}
-{{if 'cudaCGScope' in found_types}}
-
-class cudaCGScope(IntEnum):
-    """
-    CUDA cooperative group scope
-    """
-    {{if 'cudaCGScopeInvalid' in found_values}}
-
-    #: Invalid cooperative group scope
-    cudaCGScopeInvalid = cyruntime.cudaCGScope.cudaCGScopeInvalid{{endif}}
-    {{if 'cudaCGScopeGrid' in found_values}}
-
-    #: Scope represented by a grid_group
-    cudaCGScopeGrid = cyruntime.cudaCGScope.cudaCGScopeGrid{{endif}}
-    {{if 'cudaCGScopeMultiGrid' in found_values}}
-
-    #: Scope represented by a multi_grid_group
-    cudaCGScopeMultiGrid = cyruntime.cudaCGScope.cudaCGScopeMultiGrid{{endif}}
-{{endif}}
-{{if 'cudaGraphConditionalHandleFlags' in found_types}}
-
-class cudaGraphConditionalHandleFlags(IntEnum):
-    """
-
-    """
-    {{if 'cudaGraphCondAssignDefault' in found_values}}
-
-    #: Apply default handle value when graph is launched.
-    cudaGraphCondAssignDefault = cyruntime.cudaGraphConditionalHandleFlags.cudaGraphCondAssignDefault{{endif}}
-{{endif}}
-{{if 'cudaGraphConditionalNodeType' in found_types}}
-
-class cudaGraphConditionalNodeType(IntEnum):
-    """
-    CUDA conditional node types
-    """
-    {{if 'cudaGraphCondTypeIf' in found_values}}
-
-    #: Conditional 'if' Node. Body executed once if condition value is non-
-    #: zero.
-    cudaGraphCondTypeIf = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeIf{{endif}}
-    {{if 'cudaGraphCondTypeWhile' in found_values}}
-
-    #: Conditional 'while' Node. Body executed repeatedly while condition
-    #: value is non-zero.
-    cudaGraphCondTypeWhile = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeWhile{{endif}}
-{{endif}}
-{{if 'cudaGraphNodeType' in found_types}}
-
-class cudaGraphNodeType(IntEnum):
-    """
-    CUDA Graph node types
-    """
-    {{if 'cudaGraphNodeTypeKernel' in found_values}}
-
-    #: GPU kernel node
-    cudaGraphNodeTypeKernel = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeKernel{{endif}}
-    {{if 'cudaGraphNodeTypeMemcpy' in found_values}}
-
-    #: Memcpy node
-    cudaGraphNodeTypeMemcpy = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemcpy{{endif}}
-    {{if 'cudaGraphNodeTypeMemset' in found_values}}
-
-    #: Memset node
-    cudaGraphNodeTypeMemset = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemset{{endif}}
-    {{if 'cudaGraphNodeTypeHost' in found_values}}
-
-    #: Host (executable) node
-    cudaGraphNodeTypeHost = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeHost{{endif}}
-    {{if 'cudaGraphNodeTypeGraph' in found_values}}
-
-    #: Node which executes an embedded graph
-    cudaGraphNodeTypeGraph = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeGraph{{endif}}
-    {{if 'cudaGraphNodeTypeEmpty' in found_values}}
-
-    #: Empty (no-op) node
-    cudaGraphNodeTypeEmpty = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEmpty{{endif}}
-    {{if 'cudaGraphNodeTypeWaitEvent' in found_values}}
-
-    #: External event wait node
-    cudaGraphNodeTypeWaitEvent = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeWaitEvent{{endif}}
-    {{if 'cudaGraphNodeTypeEventRecord' in found_values}}
-
-    #: External event record node
-    cudaGraphNodeTypeEventRecord = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEventRecord{{endif}}
-    {{if 'cudaGraphNodeTypeExtSemaphoreSignal' in found_values}}
-
-    #: External semaphore signal node
-    cudaGraphNodeTypeExtSemaphoreSignal = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreSignal{{endif}}
-    {{if 'cudaGraphNodeTypeExtSemaphoreWait' in found_values}}
-
-    #: External semaphore wait node
-    cudaGraphNodeTypeExtSemaphoreWait = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreWait{{endif}}
-    {{if 'cudaGraphNodeTypeMemAlloc' in found_values}}
-
-    #: Memory allocation node
-    cudaGraphNodeTypeMemAlloc = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemAlloc{{endif}}
-    {{if 'cudaGraphNodeTypeMemFree' in found_values}}
-
-    #: Memory free node
-    cudaGraphNodeTypeMemFree = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemFree{{endif}}
-    {{if 'cudaGraphNodeTypeConditional' in found_values}}
-
-    #: Conditional node                                    May be used to
-    #: implement a conditional execution path or loop
-    #:                                    inside of a graph. The graph(s)
-    #: contained within the body of the conditional node
-    #:                                    can be selectively executed or
-    #: iterated upon based on the value of a conditional
-    #:                                    variable.
-    #:
-    #:                                    Handles must be created in
-    #: advance of creating the node
-    #:                                    using
-    #: :py:obj:`~.cudaGraphConditionalHandleCreate`.
-    #:
-    #:                                    The following restrictions apply
-    #: to graphs which contain conditional nodes:
-    #:                                      The graph cannot be used in a
-    #: child node.
-    #:                                      Only one instantiation of the
-    #: graph may exist at any point in time.
-    #:                                      The graph cannot be cloned.
-    #:
-    #:                                    To set the control value, supply
-    #: a default value when creating the handle and/or
-    #:                                    call
-    #: :py:obj:`~.cudaGraphSetConditional` from device code.
-    cudaGraphNodeTypeConditional = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeConditional{{endif}}
-    {{if 'cudaGraphNodeTypeCount' in found_values}}
-    cudaGraphNodeTypeCount = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeCount{{endif}}
-{{endif}}
-{{if 'cudaGraphExecUpdateResult' in found_types}}
-
-class cudaGraphExecUpdateResult(IntEnum):
-    """
-    CUDA Graph Update error types
-    """
-    {{if 'cudaGraphExecUpdateSuccess' in found_values}}
-
-    #: The update succeeded
-    cudaGraphExecUpdateSuccess = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateSuccess{{endif}}
-    {{if 'cudaGraphExecUpdateError' in found_values}}
-
-    #: The update failed for an unexpected reason which is described in the
-    #: return value of the function
-    cudaGraphExecUpdateError = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateError{{endif}}
-    {{if 'cudaGraphExecUpdateErrorTopologyChanged' in found_values}}
-
-    #: The update failed because the topology changed
-    cudaGraphExecUpdateErrorTopologyChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorTopologyChanged{{endif}}
-    {{if 'cudaGraphExecUpdateErrorNodeTypeChanged' in found_values}}
-
-    #: The update failed because a node type changed
-    cudaGraphExecUpdateErrorNodeTypeChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNodeTypeChanged{{endif}}
-    {{if 'cudaGraphExecUpdateErrorFunctionChanged' in found_values}}
-
-    #: The update failed because the function of a kernel node changed
-    #: (CUDA driver < 11.2)
-    cudaGraphExecUpdateErrorFunctionChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorFunctionChanged{{endif}}
-    {{if 'cudaGraphExecUpdateErrorParametersChanged' in found_values}}
-
-    #: The update failed because the parameters changed in a way that is
-    #: not supported
-    cudaGraphExecUpdateErrorParametersChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorParametersChanged{{endif}}
-    {{if 'cudaGraphExecUpdateErrorNotSupported' in found_values}}
-
-    #: The update failed because something about the node is not supported
-    cudaGraphExecUpdateErrorNotSupported = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNotSupported{{endif}}
-    {{if 'cudaGraphExecUpdateErrorUnsupportedFunctionChange' in found_values}}
-
-    #: The update failed because the function of a kernel node changed in
-    #: an unsupported way
-    cudaGraphExecUpdateErrorUnsupportedFunctionChange = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorUnsupportedFunctionChange{{endif}}
-    {{if 'cudaGraphExecUpdateErrorAttributesChanged' in found_values}}
-
-    #: The update failed because the node attributes changed in a way that
-    #: is not supported
-    cudaGraphExecUpdateErrorAttributesChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorAttributesChanged{{endif}}
-{{endif}}
-{{if 'cudaGraphKernelNodeField' in found_types}}
-
-class cudaGraphKernelNodeField(IntEnum):
-    """
-    Specifies the field to update when performing multiple node updates
-    from the device
-    """
-    {{if 'cudaGraphKernelNodeFieldInvalid' in found_values}}
-
-    #: Invalid field
-    cudaGraphKernelNodeFieldInvalid = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldInvalid{{endif}}
-    {{if 'cudaGraphKernelNodeFieldGridDim' in found_values}}
-
-    #: Grid dimension update
-    cudaGraphKernelNodeFieldGridDim = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldGridDim{{endif}}
-    {{if 'cudaGraphKernelNodeFieldParam' in found_values}}
-
-    #: Kernel parameter update
-    cudaGraphKernelNodeFieldParam = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldParam{{endif}}
-    {{if 'cudaGraphKernelNodeFieldEnabled' in found_values}}
-
-    #: Node enable/disable
-    cudaGraphKernelNodeFieldEnabled = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldEnabled{{endif}}
-{{endif}}
-{{if 'cudaGetDriverEntryPointFlags' in found_types}}
-
-class cudaGetDriverEntryPointFlags(IntEnum):
-    """
-    Flags to specify search options to be used with
-    :py:obj:`~.cudaGetDriverEntryPoint` For more details see
-    :py:obj:`~.cuGetProcAddress`
-    """
-    {{if 'cudaEnableDefault' in found_values}}
-
-    #: Default search mode for driver symbols.
-    cudaEnableDefault = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableDefault{{endif}}
-    {{if 'cudaEnableLegacyStream' in found_values}}
-
-    #: Search for legacy versions of driver symbols.
-    cudaEnableLegacyStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableLegacyStream{{endif}}
-    {{if 'cudaEnablePerThreadDefaultStream' in found_values}}
-
-    #: Search for per-thread versions of driver symbols.
-    cudaEnablePerThreadDefaultStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnablePerThreadDefaultStream{{endif}}
-{{endif}}
-{{if 'cudaDriverEntryPointQueryResult' in found_types}}
-
-class cudaDriverEntryPointQueryResult(IntEnum):
-    """
-    Enum for status from obtaining driver entry points, used with
-    :py:obj:`~.cudaApiGetDriverEntryPoint`
-    """
-    {{if 'cudaDriverEntryPointSuccess' in found_values}}
-
-    #: Search for symbol found a match
-    cudaDriverEntryPointSuccess = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSuccess{{endif}}
-    {{if 'cudaDriverEntryPointSymbolNotFound' in found_values}}
-
-    #: Search for symbol was not found
-    cudaDriverEntryPointSymbolNotFound = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSymbolNotFound{{endif}}
-    {{if 'cudaDriverEntryPointVersionNotSufficent' in found_values}}
-
-    #: Search for symbol was found but version wasn't great enough
-    cudaDriverEntryPointVersionNotSufficent = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointVersionNotSufficent{{endif}}
-{{endif}}
-{{if 'cudaGraphDebugDotFlags' in found_types}}
-
-class cudaGraphDebugDotFlags(IntEnum):
-    """
-    CUDA Graph debug write options
-    """
-    {{if 'cudaGraphDebugDotFlagsVerbose' in found_values}}
-
-    #: Output all debug data as if every debug flag is enabled
-    cudaGraphDebugDotFlagsVerbose = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsVerbose{{endif}}
-    {{if 'cudaGraphDebugDotFlagsKernelNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaKernelNodeParams` to output
-    cudaGraphDebugDotFlagsKernelNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsMemcpyNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaMemcpy3DParms` to output
-    cudaGraphDebugDotFlagsMemcpyNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemcpyNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsMemsetNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaMemsetParams` to output
-    cudaGraphDebugDotFlagsMemsetNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemsetNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsHostNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaHostNodeParams` to output
-    cudaGraphDebugDotFlagsHostNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHostNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsEventNodeParams' in found_values}}
-
-    #: Adds cudaEvent_t handle from record and wait nodes to output
-    cudaGraphDebugDotFlagsEventNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsEventNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsExtSemasSignalNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaExternalSemaphoreSignalNodeParams` values to
-    #: output
-    cudaGraphDebugDotFlagsExtSemasSignalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasSignalNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsExtSemasWaitNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaExternalSemaphoreWaitNodeParams` to output
-    cudaGraphDebugDotFlagsExtSemasWaitNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasWaitNodeParams{{endif}}
-    {{if 'cudaGraphDebugDotFlagsKernelNodeAttributes' in found_values}}
-
-    #: Adds cudaKernelNodeAttrID values to output
-    cudaGraphDebugDotFlagsKernelNodeAttributes = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeAttributes{{endif}}
-    {{if 'cudaGraphDebugDotFlagsHandles' in found_values}}
-
-    #: Adds node handles and every kernel function handle to output
-    cudaGraphDebugDotFlagsHandles = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHandles{{endif}}
-    {{if 'cudaGraphDebugDotFlagsConditionalNodeParams' in found_values}}
-
-    #: Adds :py:obj:`~.cudaConditionalNodeParams` to output
-    cudaGraphDebugDotFlagsConditionalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsConditionalNodeParams{{endif}}
-{{endif}}
-{{if 'cudaGraphInstantiateFlags' in found_types}}
-
-class cudaGraphInstantiateFlags(IntEnum):
-    """
-    Flags for instantiating a graph
-    """
-    {{if 'cudaGraphInstantiateFlagAutoFreeOnLaunch' in found_values}}
-
-    #: Automatically free memory allocated in a graph before relaunching.
-    cudaGraphInstantiateFlagAutoFreeOnLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagAutoFreeOnLaunch{{endif}}
-    {{if 'cudaGraphInstantiateFlagUpload' in found_values}}
-
-    #: Automatically upload the graph after instantiation. Only supported
-    #: by
-    #:  :py:obj:`~.cudaGraphInstantiateWithParams`. The upload will be
-    #: performed using the
-    #:  stream provided in `instantiateParams`.
-    cudaGraphInstantiateFlagUpload = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUpload{{endif}}
-    {{if 'cudaGraphInstantiateFlagDeviceLaunch' in found_values}}
-
-    #: Instantiate the graph to be launchable from the device. This flag
-    #: can only
-    #:  be used on platforms which support unified addressing. This flag
-    #: cannot be
-    #:  used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch.
-    cudaGraphInstantiateFlagDeviceLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagDeviceLaunch{{endif}}
-    {{if 'cudaGraphInstantiateFlagUseNodePriority' in found_values}}
-
-    #: Run the graph using the per-node priority attributes rather than the
-    #: priority of the stream it is launched into.
-    cudaGraphInstantiateFlagUseNodePriority = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUseNodePriority{{endif}}
-{{endif}}
-{{if 'cudaDeviceNumaConfig' in found_types}}
-
-class cudaDeviceNumaConfig(IntEnum):
-    """
-    CUDA device NUMA config
-    """
-    {{if 'cudaDeviceNumaConfigNone' in found_values}}
-
-    #: The GPU is not a NUMA node
-    cudaDeviceNumaConfigNone = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNone{{endif}}
-    {{if 'cudaDeviceNumaConfigNumaNode' in found_values}}
-
-    #: The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID
-    cudaDeviceNumaConfigNumaNode = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNumaNode{{endif}}
-{{endif}}
-{{if 'cudaSurfaceBoundaryMode' in found_types}}
-
-class cudaSurfaceBoundaryMode(IntEnum):
-    """
-    CUDA Surface boundary modes
-    """
-    {{if 'cudaBoundaryModeZero' in found_values}}
-
-    #: Zero boundary mode
-    cudaBoundaryModeZero = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeZero{{endif}}
-    {{if 'cudaBoundaryModeClamp' in found_values}}
-
-    #: Clamp boundary mode
-    cudaBoundaryModeClamp = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeClamp{{endif}}
-    {{if 'cudaBoundaryModeTrap' in found_values}}
-
-    #: Trap boundary mode
-    cudaBoundaryModeTrap = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeTrap{{endif}}
-{{endif}}
-{{if 'cudaSurfaceFormatMode' in found_types}}
-
-class cudaSurfaceFormatMode(IntEnum):
-    """
-    CUDA Surface format modes
-    """
-    {{if 'cudaFormatModeForced' in found_values}}
-
-    #: Forced format mode
-    cudaFormatModeForced = cyruntime.cudaSurfaceFormatMode.cudaFormatModeForced{{endif}}
-    {{if 'cudaFormatModeAuto' in found_values}}
-
-    #: Auto format mode
-    cudaFormatModeAuto = cyruntime.cudaSurfaceFormatMode.cudaFormatModeAuto{{endif}}
-{{endif}}
-{{if 'cudaTextureAddressMode' in found_types}}
-
-class cudaTextureAddressMode(IntEnum):
-    """
-    CUDA texture address modes
-    """
-    {{if 'cudaAddressModeWrap' in found_values}}
-
-    #: Wrapping address mode
-    cudaAddressModeWrap = cyruntime.cudaTextureAddressMode.cudaAddressModeWrap{{endif}}
-    {{if 'cudaAddressModeClamp' in found_values}}
-
-    #: Clamp to edge address mode
-    cudaAddressModeClamp = cyruntime.cudaTextureAddressMode.cudaAddressModeClamp{{endif}}
-    {{if 'cudaAddressModeMirror' in found_values}}
-
-    #: Mirror address mode
-    cudaAddressModeMirror = cyruntime.cudaTextureAddressMode.cudaAddressModeMirror{{endif}}
-    {{if 'cudaAddressModeBorder' in found_values}}
-
-    #: Border address mode
-    cudaAddressModeBorder = cyruntime.cudaTextureAddressMode.cudaAddressModeBorder{{endif}}
-{{endif}}
-{{if 'cudaTextureFilterMode' in found_types}}
-
-class cudaTextureFilterMode(IntEnum):
-    """
-    CUDA texture filter modes
-    """
-    {{if 'cudaFilterModePoint' in found_values}}
-
-    #: Point filter mode
-    cudaFilterModePoint = cyruntime.cudaTextureFilterMode.cudaFilterModePoint{{endif}}
-    {{if 'cudaFilterModeLinear' in found_values}}
-
-    #: Linear filter mode
-    cudaFilterModeLinear = cyruntime.cudaTextureFilterMode.cudaFilterModeLinear{{endif}}
-{{endif}}
-{{if 'cudaTextureReadMode' in found_types}}
-
-class cudaTextureReadMode(IntEnum):
-    """
-    CUDA texture read modes
-    """
-    {{if 'cudaReadModeElementType' in found_values}}
-
-    #: Read texture as specified element type
-    cudaReadModeElementType = cyruntime.cudaTextureReadMode.cudaReadModeElementType{{endif}}
-    {{if 'cudaReadModeNormalizedFloat' in found_values}}
-
-    #: Read texture as normalized float
-    cudaReadModeNormalizedFloat = cyruntime.cudaTextureReadMode.cudaReadModeNormalizedFloat{{endif}}
-{{endif}}
-{{if 'cudaRoundMode' in found_types}}
-
-class cudaRoundMode(IntEnum):
-    """"""
-    {{if 'cudaRoundNearest' in found_values}}
-    cudaRoundNearest = cyruntime.cudaRoundMode.cudaRoundNearest{{endif}}
-    {{if 'cudaRoundZero' in found_values}}
-    cudaRoundZero = cyruntime.cudaRoundMode.cudaRoundZero{{endif}}
-    {{if 'cudaRoundPosInf' in found_values}}
-    cudaRoundPosInf = cyruntime.cudaRoundMode.cudaRoundPosInf{{endif}}
-    {{if 'cudaRoundMinInf' in found_values}}
-    cudaRoundMinInf = cyruntime.cudaRoundMode.cudaRoundMinInf{{endif}}
-{{endif}}
-{{if True}}
-
-class cudaGLDeviceList(IntEnum):
-    """
-    CUDA devices corresponding to the current OpenGL context
-    """
-    {{if True}}
-
-    #: The CUDA devices for all GPUs used by the current OpenGL context
-    cudaGLDeviceListAll = cyruntime.cudaGLDeviceList.cudaGLDeviceListAll{{endif}}
-    {{if True}}
-
-    #: The CUDA devices for the GPUs used by the current OpenGL context in
-    #: its currently rendering frame
-    cudaGLDeviceListCurrentFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListCurrentFrame{{endif}}
-    {{if True}}
-
-    #: The CUDA devices for the GPUs to be used by the current OpenGL
-    #: context in the next frame
-    cudaGLDeviceListNextFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListNextFrame{{endif}}
-{{endif}}
-{{if True}}
-
-class cudaGLMapFlags(IntEnum):
-    """
-    CUDA GL Map Flags
-    """
-    {{if True}}
-
-    #: Default; Assume resource can be read/written
-    cudaGLMapFlagsNone = cyruntime.cudaGLMapFlags.cudaGLMapFlagsNone{{endif}}
-    {{if True}}
-
-    #: CUDA kernels will not write to this resource
-    cudaGLMapFlagsReadOnly = cyruntime.cudaGLMapFlags.cudaGLMapFlagsReadOnly{{endif}}
-    {{if True}}
-
-    #: CUDA kernels will only write to and will not read from this resource
-    cudaGLMapFlagsWriteDiscard = cyruntime.cudaGLMapFlags.cudaGLMapFlagsWriteDiscard{{endif}}
-{{endif}}
-{{if 'cudaLaunchAttributeID' in found_types}}
-
-class cudaStreamAttrID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.cudaLaunchAttribute`
-    """
-    {{if 'cudaLaunchAttributeIgnore' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore{{endif}}
-    {{if 'cudaLaunchAttributeAccessPolicyWindow' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
-    cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow{{endif}}
-    {{if 'cudaLaunchAttributeCooperative' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
-    cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative{{endif}}
-    {{if 'cudaLaunchAttributeSynchronizationPolicy' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
-    cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy{{endif}}
-    {{if 'cudaLaunchAttributeClusterDimension' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
-    cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension{{endif}}
-    {{if 'cudaLaunchAttributeClusterSchedulingPolicyPreference' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticStreamSerialization' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event programmatically in a future
-    #: CUDA release. A trigger can also be inserted at the beginning of
-    #: each block's execution if triggerAtBlockStart is set to non-0. The
-    #: dependent launches can choose to wait on the dependency using the
-    #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
-    #: trigger long after the associated kernel has completed. This
-    #: recording type is primarily meant for establishing programmatic
-    #: dependency between device tasks. Note also this type of dependency
-    #: allows, but does not guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent{{endif}}
-    {{if 'cudaLaunchAttributePriority' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
-    cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomainMap' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
-    cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomain' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
-    cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain{{endif}}
-    {{if 'cudaLaunchAttributeLaunchCompletionEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent{{endif}}
-    {{if 'cudaLaunchAttributeDeviceUpdatableKernelNode' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
-    #: this functionality, a node cannot opt out, and any attempt to set
-    #: the deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}}
-    {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
-    #: percentage between 0-100 signals sets the shared memory carveout
-    #: preference in percent of the total shared memory for that kernel
-    #: launch. This attribute takes precedence over
-    #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
-    #: only a hint, and the driver can choose a different configuration if
-    #: required for the launch.
-    cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}}
-{{endif}}
-{{if 'cudaLaunchAttributeID' in found_types}}
-
-class cudaKernelNodeAttrID(IntEnum):
-    """
-    Launch attributes enum; used as id field of
-    :py:obj:`~.cudaLaunchAttribute`
-    """
-    {{if 'cudaLaunchAttributeIgnore' in found_values}}
-
-    #: Ignored entry, for convenient composition
-    cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore{{endif}}
-    {{if 'cudaLaunchAttributeAccessPolicyWindow' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
-    cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow{{endif}}
-    {{if 'cudaLaunchAttributeCooperative' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
-    cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative{{endif}}
-    {{if 'cudaLaunchAttributeSynchronizationPolicy' in found_values}}
-
-    #: Valid for streams. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
-    cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy{{endif}}
-    {{if 'cudaLaunchAttributeClusterDimension' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
-    cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension{{endif}}
-    {{if 'cudaLaunchAttributeClusterSchedulingPolicyPreference' in found_values}}
-
-    #: Valid for graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
-    cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticStreamSerialization' in found_values}}
-
-    #: Valid for launches. Setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
-    #: to non-0 signals that the kernel will use programmatic means to
-    #: resolve its stream dependency, so that the CUDA runtime should
-    #: opportunistically allow the grid's execution to overlap with the
-    #: previous kernel in the stream, if that kernel requests the overlap.
-    #: The dependent launches can choose to wait on the dependency using
-    #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
-    #: PTX instructions).
-    cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization{{endif}}
-    {{if 'cudaLaunchAttributeProgrammaticEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
-    #: event. Event recorded through this launch attribute is guaranteed to
-    #: only trigger after all block in the associated kernel trigger the
-    #: event. A block can trigger the event programmatically in a future
-    #: CUDA release. A trigger can also be inserted at the beginning of
-    #: each block's execution if triggerAtBlockStart is set to non-0. The
-    #: dependent launches can choose to wait on the dependency using the
-    #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
-    #: instructions). Note that dependents (including the CPU thread
-    #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
-    #: observe the release precisely when it is released. For example,
-    #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
-    #: trigger long after the associated kernel has completed. This
-    #: recording type is primarily meant for establishing programmatic
-    #: dependency between device tasks. Note also this type of dependency
-    #: allows, but does not guarantee, concurrent execution of tasks.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent{{endif}}
-    {{if 'cudaLaunchAttributePriority' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
-    cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomainMap' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
-    cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap{{endif}}
-    {{if 'cudaLaunchAttributeMemSyncDomain' in found_values}}
-
-    #: Valid for streams, graph nodes, launches. See
-    #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
-    cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain{{endif}}
-    {{if 'cudaLaunchAttributeLaunchCompletionEvent' in found_values}}
-
-    #: Valid for launches. Set
-    #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
-    #: the event.
-    #:  Nominally, the event is triggered once all blocks of the kernel
-    #: have begun execution. Currently this is a best effort. If a kernel B
-    #: has a launch completion dependency on a kernel A, B may wait until A
-    #: is complete. Alternatively, blocks of B may begin before all blocks
-    #: of A have begun, for example if B can claim execution resources
-    #: unavailable to A (e.g. they run on different GPUs) or if B is a
-    #: higher priority than A. Exercise caution if such an ordering
-    #: inversion could lead to deadlock.
-    #:  A launch completion event is nominally similar to a programmatic
-    #: event with `triggerAtBlockStart` set except that it is not visible
-    #: to `cudaGridDependencySynchronize()` and can be used with compute
-    #: capability less than 9.0.
-    #:  The event supplied must not be an interprocess or interop event.
-    #: The event must disable timing (i.e. must be created with the
-    #: :py:obj:`~.cudaEventDisableTiming` flag set).
-    cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent{{endif}}
-    {{if 'cudaLaunchAttributeDeviceUpdatableKernelNode' in found_values}}
-
-    #: Valid for graph nodes, launches. This attribute is graphs-only, and
-    #: passing it to a launch in a non-capturing stream will result in an
-    #: error.
-    #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
-    #: can only be set to 0 or 1. Setting the field to 1 indicates that the
-    #: corresponding kernel node should be device-updatable. On success, a
-    #: handle will be returned via
-    #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
-    #: which can be passed to the various device-side update functions to
-    #: update the node's kernel parameters from within another kernel. For
-    #: more information on the types of device updates that can be made, as
-    #: well as the relevant limitations thereof, see
-    #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
-    #:  Nodes which are device-updatable have additional restrictions
-    #: compared to regular kernel nodes. Firstly, device-updatable nodes
-    #: cannot be removed from their graph via
-    #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
-    #: this functionality, a node cannot opt out, and any attempt to set
-    #: the deviceUpdatable attribute to 0 will result in an error. Device-
-    #: updatable kernel nodes also cannot have their attributes copied
-    #: to/from another kernel node via
-    #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
-    #: or more device-updatable nodes also do not allow multiple
-    #: instantiation, and neither the graph nor its instantiated version
-    #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
-    #:  If a graph contains device-updatable nodes and updates those nodes
-    #: from the device from within the graph, the graph must be uploaded
-    #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
-    #: graph, if host-side executable graph updates are made to the device-
-    #: updatable nodes, the graph must be uploaded before it is launched
-    #: again.
-    cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode{{endif}}
-    {{if 'cudaLaunchAttributePreferredSharedMemoryCarveout' in found_values}}
-
-    #: Valid for launches. On devices where the L1 cache and shared memory
-    #: use the same hardware resources, setting
-    #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
-    #: percentage between 0-100 signals sets the shared memory carveout
-    #: preference in percent of the total shared memory for that kernel
-    #: launch. This attribute takes precedence over
-    #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
-    #: only a hint, and the driver can choose a different configuration if
-    #: required for the launch.
-    cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout{{endif}}
-{{endif}}
-{{if 'cudaArray_t' in found_types}}
-
-cdef class cudaArray_t:
-    """
-
-    CUDA array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaArray_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaArray_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaArray_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaArray_const_t' in found_types}}
-
-cdef class cudaArray_const_t:
-    """
-
-    CUDA array (as source copy argument)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaArray_const_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaArray_const_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaArray_const_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaMipmappedArray_t' in found_types}}
-
-cdef class cudaMipmappedArray_t:
-    """
-
-    CUDA mipmapped array
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaMipmappedArray_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaMipmappedArray_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaMipmappedArray_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaMipmappedArray_const_t' in found_types}}
-
-cdef class cudaMipmappedArray_const_t:
-    """
-
-    CUDA mipmapped array (as source argument)
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaMipmappedArray_const_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaMipmappedArray_const_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaMipmappedArray_const_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaGraphicsResource_t' in found_types}}
-
-cdef class cudaGraphicsResource_t:
-    """
-
-    CUDA graphics resource types
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaGraphicsResource_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaGraphicsResource_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaGraphicsResource_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaExternalMemory_t' in found_types}}
-
-cdef class cudaExternalMemory_t:
-    """
-
-    CUDA external memory
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaExternalMemory_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaExternalMemory_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaExternalMemory_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaExternalSemaphore_t' in found_types}}
-
-cdef class cudaExternalSemaphore_t:
-    """
-
-    CUDA external semaphore
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaExternalSemaphore_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphore_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaExternalSemaphore_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaKernel_t' in found_types}}
-
-cdef class cudaKernel_t:
-    """
-
-    CUDA kernel
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaKernel_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaKernel_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaKernel_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaGraphDeviceNode_t' in found_types}}
-
-cdef class cudaGraphDeviceNode_t:
-    """
-
-    CUDA device node handle for device-side node update
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaGraphDeviceNode_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaGraphDeviceNode_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaGraphDeviceNode_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaAsyncCallbackHandle_t' in found_types}}
-
-cdef class cudaAsyncCallbackHandle_t:
-    """
-
-    CUDA async callback handle
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaAsyncCallbackHandle_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaAsyncCallbackHandle_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaAsyncCallbackHandle_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLImageKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.EGLImageKHR>init_value
-        else:
-            self._ptr = <cyruntime.EGLImageKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLImageKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLStreamKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.EGLStreamKHR>init_value
-        else:
-            self._ptr = <cyruntime.EGLStreamKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLStreamKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLSyncKHR:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.EGLSyncKHR>init_value
-        else:
-            self._ptr = <cyruntime.EGLSyncKHR *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<EGLSyncKHR ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaHostFn_t' in found_types}}
-
-cdef class cudaHostFn_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaHostFn_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaHostFn_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaHostFn_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaAsyncCallback' in found_types}}
-
-cdef class cudaAsyncCallback:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaAsyncCallback>init_value
-        else:
-            self._ptr = <cyruntime.cudaAsyncCallback *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaAsyncCallback ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaStreamCallback_t' in found_types}}
-
-cdef class cudaStreamCallback_t:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-            self._ptr[0] = <cyruntime.cudaStreamCallback_t>init_value
-        else:
-            self._ptr = <cyruntime.cudaStreamCallback_t *>_ptr
-    def __init__(self, *args, **kwargs):
-        pass
-    def __repr__(self):
-        return '<cudaStreamCallback_t ' + str(hex(self.__int__())) + '>'
-    def __index__(self):
-        return self.__int__()
-    def __int__(self):
-        return <void_ptr>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'struct dim3' in found_types}}
-
-cdef class dim3:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.dim3 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['x : ' + str(self.x)]
-            except ValueError:
-                str_list += ['x : <ValueError>']
-            try:
-                str_list += ['y : ' + str(self.y)]
-            except ValueError:
-                str_list += ['y : <ValueError>']
-            try:
-                str_list += ['z : ' + str(self.z)]
-            except ValueError:
-                str_list += ['z : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def x(self):
-        return self._ptr[0].x
-    @x.setter
-    def x(self, unsigned int x):
-        self._ptr[0].x = x
-    @property
-    def y(self):
-        return self._ptr[0].y
-    @y.setter
-    def y(self, unsigned int y):
-        self._ptr[0].y = y
-    @property
-    def z(self):
-        return self._ptr[0].z
-    @z.setter
-    def z(self, unsigned int z):
-        self._ptr[0].z = z
-{{endif}}
-{{if 'struct cudaChannelFormatDesc' in found_types}}
-
-cdef class cudaChannelFormatDesc:
-    """
-    CUDA Channel format descriptor
-
-    Attributes
-    ----------
-    x : int
-        x
-    y : int
-        y
-    z : int
-        z
-    w : int
-        w
-    f : cudaChannelFormatKind
-        Channel format kind
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaChannelFormatDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['x : ' + str(self.x)]
-            except ValueError:
-                str_list += ['x : <ValueError>']
-            try:
-                str_list += ['y : ' + str(self.y)]
-            except ValueError:
-                str_list += ['y : <ValueError>']
-            try:
-                str_list += ['z : ' + str(self.z)]
-            except ValueError:
-                str_list += ['z : <ValueError>']
-            try:
-                str_list += ['w : ' + str(self.w)]
-            except ValueError:
-                str_list += ['w : <ValueError>']
-            try:
-                str_list += ['f : ' + str(self.f)]
-            except ValueError:
-                str_list += ['f : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def x(self):
-        return self._ptr[0].x
-    @x.setter
-    def x(self, int x):
-        self._ptr[0].x = x
-    @property
-    def y(self):
-        return self._ptr[0].y
-    @y.setter
-    def y(self, int y):
-        self._ptr[0].y = y
-    @property
-    def z(self):
-        return self._ptr[0].z
-    @z.setter
-    def z(self, int z):
-        self._ptr[0].z = z
-    @property
-    def w(self):
-        return self._ptr[0].w
-    @w.setter
-    def w(self, int w):
-        self._ptr[0].w = w
-    @property
-    def f(self):
-        return cudaChannelFormatKind(self._ptr[0].f)
-    @f.setter
-    def f(self, f not None : cudaChannelFormatKind):
-        self._ptr[0].f = f.value
-{{endif}}
-{{if 'struct cudaArraySparseProperties' in found_types}}
-
-cdef class anon_struct0:
-    """
-    Attributes
-    ----------
-    width : unsigned int
-
-    height : unsigned int
-
-    depth : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaArraySparseProperties *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].tileExtent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def width(self):
-        return self._ptr[0].tileExtent.width
-    @width.setter
-    def width(self, unsigned int width):
-        self._ptr[0].tileExtent.width = width
-    @property
-    def height(self):
-        return self._ptr[0].tileExtent.height
-    @height.setter
-    def height(self, unsigned int height):
-        self._ptr[0].tileExtent.height = height
-    @property
-    def depth(self):
-        return self._ptr[0].tileExtent.depth
-    @depth.setter
-    def depth(self, unsigned int depth):
-        self._ptr[0].tileExtent.depth = depth
-{{endif}}
-{{if 'struct cudaArraySparseProperties' in found_types}}
-
-cdef class cudaArraySparseProperties:
-    """
-    Sparse CUDA array and CUDA mipmapped array properties
-
-    Attributes
-    ----------
-    tileExtent : anon_struct0
-
-    miptailFirstLevel : unsigned int
-        First mip level at which the mip tail begins
-    miptailSize : unsigned long long
-        Total size of the mip tail.
-    flags : unsigned int
-        Flags will either be zero or cudaArraySparsePropertiesSingleMipTail
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaArraySparseProperties *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._tileExtent = anon_struct0(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['tileExtent :\n' + '\n'.join(['    ' + line for line in str(self.tileExtent).splitlines()])]
-            except ValueError:
-                str_list += ['tileExtent : <ValueError>']
-            try:
-                str_list += ['miptailFirstLevel : ' + str(self.miptailFirstLevel)]
-            except ValueError:
-                str_list += ['miptailFirstLevel : <ValueError>']
-            try:
-                str_list += ['miptailSize : ' + str(self.miptailSize)]
-            except ValueError:
-                str_list += ['miptailSize : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def tileExtent(self):
-        return self._tileExtent
-    @tileExtent.setter
-    def tileExtent(self, tileExtent not None : anon_struct0):
-        string.memcpy(&self._ptr[0].tileExtent, <cyruntime.anon_struct0*><void_ptr>tileExtent.getPtr(), sizeof(self._ptr[0].tileExtent))
-    @property
-    def miptailFirstLevel(self):
-        return self._ptr[0].miptailFirstLevel
-    @miptailFirstLevel.setter
-    def miptailFirstLevel(self, unsigned int miptailFirstLevel):
-        self._ptr[0].miptailFirstLevel = miptailFirstLevel
-    @property
-    def miptailSize(self):
-        return self._ptr[0].miptailSize
-    @miptailSize.setter
-    def miptailSize(self, unsigned long long miptailSize):
-        self._ptr[0].miptailSize = miptailSize
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaArrayMemoryRequirements' in found_types}}
-
-cdef class cudaArrayMemoryRequirements:
-    """
-    CUDA array and CUDA mipmapped array memory requirements
-
-    Attributes
-    ----------
-    size : size_t
-        Total size of the array.
-    alignment : size_t
-        Alignment necessary for mapping the array.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaArrayMemoryRequirements *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['alignment : ' + str(self.alignment)]
-            except ValueError:
-                str_list += ['alignment : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, size_t size):
-        self._ptr[0].size = size
-    @property
-    def alignment(self):
-        return self._ptr[0].alignment
-    @alignment.setter
-    def alignment(self, size_t alignment):
-        self._ptr[0].alignment = alignment
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaPitchedPtr' in found_types}}
-
-cdef class cudaPitchedPtr:
-    """
-    CUDA Pitched memory pointer  ::make_cudaPitchedPtr
-
-    Attributes
-    ----------
-    ptr : Any
-        Pointer to allocated memory
-    pitch : size_t
-        Pitch of allocated memory in bytes
-    xsize : size_t
-        Logical width of allocation in elements
-    ysize : size_t
-        Logical height of allocation in elements
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaPitchedPtr *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['ptr : ' + hex(self.ptr)]
-            except ValueError:
-                str_list += ['ptr : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['xsize : ' + str(self.xsize)]
-            except ValueError:
-                str_list += ['xsize : <ValueError>']
-            try:
-                str_list += ['ysize : ' + str(self.ysize)]
-            except ValueError:
-                str_list += ['ysize : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def ptr(self):
-        return <void_ptr>self._ptr[0].ptr
-    @ptr.setter
-    def ptr(self, ptr):
-        _cyptr = utils.HelperInputVoidPtr(ptr)
-        self._ptr[0].ptr = <void*><void_ptr>_cyptr.cptr
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, size_t pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def xsize(self):
-        return self._ptr[0].xsize
-    @xsize.setter
-    def xsize(self, size_t xsize):
-        self._ptr[0].xsize = xsize
-    @property
-    def ysize(self):
-        return self._ptr[0].ysize
-    @ysize.setter
-    def ysize(self, size_t ysize):
-        self._ptr[0].ysize = ysize
-{{endif}}
-{{if 'struct cudaExtent' in found_types}}
-
-cdef class cudaExtent:
-    """
-    CUDA extent  ::make_cudaExtent
-
-    Attributes
-    ----------
-    width : size_t
-        Width in elements when referring to array memory, in bytes when
-        referring to linear memory
-    height : size_t
-        Height in elements
-    depth : size_t
-        Depth in elements
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExtent *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-    @property
-    def depth(self):
-        return self._ptr[0].depth
-    @depth.setter
-    def depth(self, size_t depth):
-        self._ptr[0].depth = depth
-{{endif}}
-{{if 'struct cudaPos' in found_types}}
-
-cdef class cudaPos:
-    """
-    CUDA 3D position  ::make_cudaPos
-
-    Attributes
-    ----------
-    x : size_t
-        x
-    y : size_t
-        y
-    z : size_t
-        z
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaPos *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['x : ' + str(self.x)]
-            except ValueError:
-                str_list += ['x : <ValueError>']
-            try:
-                str_list += ['y : ' + str(self.y)]
-            except ValueError:
-                str_list += ['y : <ValueError>']
-            try:
-                str_list += ['z : ' + str(self.z)]
-            except ValueError:
-                str_list += ['z : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def x(self):
-        return self._ptr[0].x
-    @x.setter
-    def x(self, size_t x):
-        self._ptr[0].x = x
-    @property
-    def y(self):
-        return self._ptr[0].y
-    @y.setter
-    def y(self, size_t y):
-        self._ptr[0].y = y
-    @property
-    def z(self):
-        return self._ptr[0].z
-    @z.setter
-    def z(self, size_t z):
-        self._ptr[0].z = z
-{{endif}}
-{{if 'struct cudaMemcpy3DParms' in found_types}}
-
-cdef class cudaMemcpy3DParms:
-    """
-    CUDA 3D memory copying parameters
-
-    Attributes
-    ----------
-    srcArray : cudaArray_t
-        Source memory address
-    srcPos : cudaPos
-        Source position offset
-    srcPtr : cudaPitchedPtr
-        Pitched source memory address
-    dstArray : cudaArray_t
-        Destination memory address
-    dstPos : cudaPos
-        Destination position offset
-    dstPtr : cudaPitchedPtr
-        Pitched destination memory address
-    extent : cudaExtent
-        Requested memory copy size
-    kind : cudaMemcpyKind
-        Type of transfer
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemcpy3DParms *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._ptr[0].srcArray)
-        self._srcPos = cudaPos(_ptr=<void_ptr>&self._ptr[0].srcPos)
-        self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._ptr[0].srcPtr)
-        self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._ptr[0].dstArray)
-        self._dstPos = cudaPos(_ptr=<void_ptr>&self._ptr[0].dstPos)
-        self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._ptr[0].dstPtr)
-        self._extent = cudaExtent(_ptr=<void_ptr>&self._ptr[0].extent)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['srcArray : ' + str(self.srcArray)]
-            except ValueError:
-                str_list += ['srcArray : <ValueError>']
-            try:
-                str_list += ['srcPos :\n' + '\n'.join(['    ' + line for line in str(self.srcPos).splitlines()])]
-            except ValueError:
-                str_list += ['srcPos : <ValueError>']
-            try:
-                str_list += ['srcPtr :\n' + '\n'.join(['    ' + line for line in str(self.srcPtr).splitlines()])]
-            except ValueError:
-                str_list += ['srcPtr : <ValueError>']
-            try:
-                str_list += ['dstArray : ' + str(self.dstArray)]
-            except ValueError:
-                str_list += ['dstArray : <ValueError>']
-            try:
-                str_list += ['dstPos :\n' + '\n'.join(['    ' + line for line in str(self.dstPos).splitlines()])]
-            except ValueError:
-                str_list += ['dstPos : <ValueError>']
-            try:
-                str_list += ['dstPtr :\n' + '\n'.join(['    ' + line for line in str(self.dstPtr).splitlines()])]
-            except ValueError:
-                str_list += ['dstPtr : <ValueError>']
-            try:
-                str_list += ['extent :\n' + '\n'.join(['    ' + line for line in str(self.extent).splitlines()])]
-            except ValueError:
-                str_list += ['extent : <ValueError>']
-            try:
-                str_list += ['kind : ' + str(self.kind)]
-            except ValueError:
-                str_list += ['kind : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def srcArray(self):
-        return self._srcArray
-    @srcArray.setter
-    def srcArray(self, srcArray):
-        cdef cyruntime.cudaArray_t cysrcArray
-        if srcArray is None:
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>0
-        elif isinstance(srcArray, (cudaArray_t,)):
-            psrcArray = int(srcArray)
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
-        else:
-            psrcArray = int(cudaArray_t(srcArray))
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
-        self._srcArray._ptr[0] = cysrcArray
-    @property
-    def srcPos(self):
-        return self._srcPos
-    @srcPos.setter
-    def srcPos(self, srcPos not None : cudaPos):
-        string.memcpy(&self._ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._ptr[0].srcPos))
-    @property
-    def srcPtr(self):
-        return self._srcPtr
-    @srcPtr.setter
-    def srcPtr(self, srcPtr not None : cudaPitchedPtr):
-        string.memcpy(&self._ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._ptr[0].srcPtr))
-    @property
-    def dstArray(self):
-        return self._dstArray
-    @dstArray.setter
-    def dstArray(self, dstArray):
-        cdef cyruntime.cudaArray_t cydstArray
-        if dstArray is None:
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>0
-        elif isinstance(dstArray, (cudaArray_t,)):
-            pdstArray = int(dstArray)
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
-        else:
-            pdstArray = int(cudaArray_t(dstArray))
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
-        self._dstArray._ptr[0] = cydstArray
-    @property
-    def dstPos(self):
-        return self._dstPos
-    @dstPos.setter
-    def dstPos(self, dstPos not None : cudaPos):
-        string.memcpy(&self._ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._ptr[0].dstPos))
-    @property
-    def dstPtr(self):
-        return self._dstPtr
-    @dstPtr.setter
-    def dstPtr(self, dstPtr not None : cudaPitchedPtr):
-        string.memcpy(&self._ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._ptr[0].dstPtr))
-    @property
-    def extent(self):
-        return self._extent
-    @extent.setter
-    def extent(self, extent not None : cudaExtent):
-        string.memcpy(&self._ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._ptr[0].extent))
-    @property
-    def kind(self):
-        return cudaMemcpyKind(self._ptr[0].kind)
-    @kind.setter
-    def kind(self, kind not None : cudaMemcpyKind):
-        self._ptr[0].kind = kind.value
-{{endif}}
-{{if 'struct cudaMemcpyNodeParams' in found_types}}
-
-cdef class cudaMemcpyNodeParams:
-    """
-    Memcpy node parameters
-
-    Attributes
-    ----------
-    flags : int
-        Must be zero
-    reserved : List[int]
-        Must be zero
-    copyParams : cudaMemcpy3DParms
-        Parameters for the memory copy
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemcpyNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._copyParams = cudaMemcpy3DParms(_ptr=<void_ptr>&self._ptr[0].copyParams)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            try:
-                str_list += ['copyParams :\n' + '\n'.join(['    ' + line for line in str(self.copyParams).splitlines()])]
-            except ValueError:
-                str_list += ['copyParams : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-    @property
-    def copyParams(self):
-        return self._copyParams
-    @copyParams.setter
-    def copyParams(self, copyParams not None : cudaMemcpy3DParms):
-        string.memcpy(&self._ptr[0].copyParams, <cyruntime.cudaMemcpy3DParms*><void_ptr>copyParams.getPtr(), sizeof(self._ptr[0].copyParams))
-{{endif}}
-{{if 'struct cudaMemcpy3DPeerParms' in found_types}}
-
-cdef class cudaMemcpy3DPeerParms:
-    """
-    CUDA 3D cross-device memory copying parameters
-
-    Attributes
-    ----------
-    srcArray : cudaArray_t
-        Source memory address
-    srcPos : cudaPos
-        Source position offset
-    srcPtr : cudaPitchedPtr
-        Pitched source memory address
-    srcDevice : int
-        Source device
-    dstArray : cudaArray_t
-        Destination memory address
-    dstPos : cudaPos
-        Destination position offset
-    dstPtr : cudaPitchedPtr
-        Pitched destination memory address
-    dstDevice : int
-        Destination device
-    extent : cudaExtent
-        Requested memory copy size
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemcpy3DPeerParms *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._ptr[0].srcArray)
-        self._srcPos = cudaPos(_ptr=<void_ptr>&self._ptr[0].srcPos)
-        self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._ptr[0].srcPtr)
-        self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._ptr[0].dstArray)
-        self._dstPos = cudaPos(_ptr=<void_ptr>&self._ptr[0].dstPos)
-        self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._ptr[0].dstPtr)
-        self._extent = cudaExtent(_ptr=<void_ptr>&self._ptr[0].extent)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['srcArray : ' + str(self.srcArray)]
-            except ValueError:
-                str_list += ['srcArray : <ValueError>']
-            try:
-                str_list += ['srcPos :\n' + '\n'.join(['    ' + line for line in str(self.srcPos).splitlines()])]
-            except ValueError:
-                str_list += ['srcPos : <ValueError>']
-            try:
-                str_list += ['srcPtr :\n' + '\n'.join(['    ' + line for line in str(self.srcPtr).splitlines()])]
-            except ValueError:
-                str_list += ['srcPtr : <ValueError>']
-            try:
-                str_list += ['srcDevice : ' + str(self.srcDevice)]
-            except ValueError:
-                str_list += ['srcDevice : <ValueError>']
-            try:
-                str_list += ['dstArray : ' + str(self.dstArray)]
-            except ValueError:
-                str_list += ['dstArray : <ValueError>']
-            try:
-                str_list += ['dstPos :\n' + '\n'.join(['    ' + line for line in str(self.dstPos).splitlines()])]
-            except ValueError:
-                str_list += ['dstPos : <ValueError>']
-            try:
-                str_list += ['dstPtr :\n' + '\n'.join(['    ' + line for line in str(self.dstPtr).splitlines()])]
-            except ValueError:
-                str_list += ['dstPtr : <ValueError>']
-            try:
-                str_list += ['dstDevice : ' + str(self.dstDevice)]
-            except ValueError:
-                str_list += ['dstDevice : <ValueError>']
-            try:
-                str_list += ['extent :\n' + '\n'.join(['    ' + line for line in str(self.extent).splitlines()])]
-            except ValueError:
-                str_list += ['extent : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def srcArray(self):
-        return self._srcArray
-    @srcArray.setter
-    def srcArray(self, srcArray):
-        cdef cyruntime.cudaArray_t cysrcArray
-        if srcArray is None:
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>0
-        elif isinstance(srcArray, (cudaArray_t,)):
-            psrcArray = int(srcArray)
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
-        else:
-            psrcArray = int(cudaArray_t(srcArray))
-            cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
-        self._srcArray._ptr[0] = cysrcArray
-    @property
-    def srcPos(self):
-        return self._srcPos
-    @srcPos.setter
-    def srcPos(self, srcPos not None : cudaPos):
-        string.memcpy(&self._ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._ptr[0].srcPos))
-    @property
-    def srcPtr(self):
-        return self._srcPtr
-    @srcPtr.setter
-    def srcPtr(self, srcPtr not None : cudaPitchedPtr):
-        string.memcpy(&self._ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._ptr[0].srcPtr))
-    @property
-    def srcDevice(self):
-        return self._ptr[0].srcDevice
-    @srcDevice.setter
-    def srcDevice(self, int srcDevice):
-        self._ptr[0].srcDevice = srcDevice
-    @property
-    def dstArray(self):
-        return self._dstArray
-    @dstArray.setter
-    def dstArray(self, dstArray):
-        cdef cyruntime.cudaArray_t cydstArray
-        if dstArray is None:
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>0
-        elif isinstance(dstArray, (cudaArray_t,)):
-            pdstArray = int(dstArray)
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
-        else:
-            pdstArray = int(cudaArray_t(dstArray))
-            cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
-        self._dstArray._ptr[0] = cydstArray
-    @property
-    def dstPos(self):
-        return self._dstPos
-    @dstPos.setter
-    def dstPos(self, dstPos not None : cudaPos):
-        string.memcpy(&self._ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._ptr[0].dstPos))
-    @property
-    def dstPtr(self):
-        return self._dstPtr
-    @dstPtr.setter
-    def dstPtr(self, dstPtr not None : cudaPitchedPtr):
-        string.memcpy(&self._ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._ptr[0].dstPtr))
-    @property
-    def dstDevice(self):
-        return self._ptr[0].dstDevice
-    @dstDevice.setter
-    def dstDevice(self, int dstDevice):
-        self._ptr[0].dstDevice = dstDevice
-    @property
-    def extent(self):
-        return self._extent
-    @extent.setter
-    def extent(self, extent not None : cudaExtent):
-        string.memcpy(&self._ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._ptr[0].extent))
-{{endif}}
-{{if 'struct cudaMemsetParams' in found_types}}
-
-cdef class cudaMemsetParams:
-    """
-    CUDA Memset node parameters
-
-    Attributes
-    ----------
-    dst : Any
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemsetParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dst : ' + hex(self.dst)]
-            except ValueError:
-                str_list += ['dst : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['elementSize : ' + str(self.elementSize)]
-            except ValueError:
-                str_list += ['elementSize : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dst(self):
-        return <void_ptr>self._ptr[0].dst
-    @dst.setter
-    def dst(self, dst):
-        _cydst = utils.HelperInputVoidPtr(dst)
-        self._ptr[0].dst = <void*><void_ptr>_cydst.cptr
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, size_t pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def value(self):
-        return self._ptr[0].value
-    @value.setter
-    def value(self, unsigned int value):
-        self._ptr[0].value = value
-    @property
-    def elementSize(self):
-        return self._ptr[0].elementSize
-    @elementSize.setter
-    def elementSize(self, unsigned int elementSize):
-        self._ptr[0].elementSize = elementSize
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-{{endif}}
-{{if 'struct cudaMemsetParamsV2' in found_types}}
-
-cdef class cudaMemsetParamsV2:
-    """
-    CUDA Memset node parameters
-
-    Attributes
-    ----------
-    dst : Any
-        Destination device pointer
-    pitch : size_t
-        Pitch of destination device pointer. Unused if height is 1
-    value : unsigned int
-        Value to be set
-    elementSize : unsigned int
-        Size of each element in bytes. Must be 1, 2, or 4.
-    width : size_t
-        Width of the row in elements
-    height : size_t
-        Number of rows
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemsetParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dst : ' + hex(self.dst)]
-            except ValueError:
-                str_list += ['dst : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            try:
-                str_list += ['elementSize : ' + str(self.elementSize)]
-            except ValueError:
-                str_list += ['elementSize : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dst(self):
-        return <void_ptr>self._ptr[0].dst
-    @dst.setter
-    def dst(self, dst):
-        _cydst = utils.HelperInputVoidPtr(dst)
-        self._ptr[0].dst = <void*><void_ptr>_cydst.cptr
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, size_t pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def value(self):
-        return self._ptr[0].value
-    @value.setter
-    def value(self, unsigned int value):
-        self._ptr[0].value = value
-    @property
-    def elementSize(self):
-        return self._ptr[0].elementSize
-    @elementSize.setter
-    def elementSize(self, unsigned int elementSize):
-        self._ptr[0].elementSize = elementSize
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-{{endif}}
-{{if 'struct cudaAccessPolicyWindow' in found_types}}
-
-cdef class cudaAccessPolicyWindow:
-    """
-    Specifies an access policy for a window, a contiguous extent of
-    memory beginning at base_ptr and ending at base_ptr + num_bytes.
-    Partition into many segments and assign segments such that. sum of
-    "hit segments" / window == approx. ratio. sum of "miss segments" /
-    window == approx 1-ratio. Segments and ratio specifications are
-    fitted to the capabilities of the architecture. Accesses in a hit
-    segment apply the hitProp access policy. Accesses in a miss segment
-    apply the missProp access policy.
-
-    Attributes
-    ----------
-    base_ptr : Any
-        Starting address of the access policy window. CUDA driver may align
-        it.
-    num_bytes : size_t
-        Size in bytes of the window policy. CUDA driver may restrict the
-        maximum size and alignment.
-    hitRatio : float
-        hitRatio specifies percentage of lines assigned hitProp, rest are
-        assigned missProp.
-    hitProp : cudaAccessProperty
-        ::CUaccessProperty set for hit.
-    missProp : cudaAccessProperty
-        ::CUaccessProperty set for miss. Must be either NORMAL or
-        STREAMING.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaAccessPolicyWindow *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['base_ptr : ' + hex(self.base_ptr)]
-            except ValueError:
-                str_list += ['base_ptr : <ValueError>']
-            try:
-                str_list += ['num_bytes : ' + str(self.num_bytes)]
-            except ValueError:
-                str_list += ['num_bytes : <ValueError>']
-            try:
-                str_list += ['hitRatio : ' + str(self.hitRatio)]
-            except ValueError:
-                str_list += ['hitRatio : <ValueError>']
-            try:
-                str_list += ['hitProp : ' + str(self.hitProp)]
-            except ValueError:
-                str_list += ['hitProp : <ValueError>']
-            try:
-                str_list += ['missProp : ' + str(self.missProp)]
-            except ValueError:
-                str_list += ['missProp : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def base_ptr(self):
-        return <void_ptr>self._ptr[0].base_ptr
-    @base_ptr.setter
-    def base_ptr(self, base_ptr):
-        _cybase_ptr = utils.HelperInputVoidPtr(base_ptr)
-        self._ptr[0].base_ptr = <void*><void_ptr>_cybase_ptr.cptr
-    @property
-    def num_bytes(self):
-        return self._ptr[0].num_bytes
-    @num_bytes.setter
-    def num_bytes(self, size_t num_bytes):
-        self._ptr[0].num_bytes = num_bytes
-    @property
-    def hitRatio(self):
-        return self._ptr[0].hitRatio
-    @hitRatio.setter
-    def hitRatio(self, float hitRatio):
-        self._ptr[0].hitRatio = hitRatio
-    @property
-    def hitProp(self):
-        return cudaAccessProperty(self._ptr[0].hitProp)
-    @hitProp.setter
-    def hitProp(self, hitProp not None : cudaAccessProperty):
-        self._ptr[0].hitProp = hitProp.value
-    @property
-    def missProp(self):
-        return cudaAccessProperty(self._ptr[0].missProp)
-    @missProp.setter
-    def missProp(self, missProp not None : cudaAccessProperty):
-        self._ptr[0].missProp = missProp.value
-{{endif}}
-{{if 'struct cudaHostNodeParams' in found_types}}
-
-cdef class cudaHostNodeParams:
-    """
-    CUDA host node parameters
-
-    Attributes
-    ----------
-    fn : cudaHostFn_t
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaHostNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._ptr[0].fn)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fn : ' + str(self.fn)]
-            except ValueError:
-                str_list += ['fn : <ValueError>']
-            try:
-                str_list += ['userData : ' + hex(self.userData)]
-            except ValueError:
-                str_list += ['userData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fn(self):
-        return self._fn
-    @fn.setter
-    def fn(self, fn):
-        cdef cyruntime.cudaHostFn_t cyfn
-        if fn is None:
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>0
-        elif isinstance(fn, (cudaHostFn_t)):
-            pfn = int(fn)
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-        else:
-            pfn = int(cudaHostFn_t(fn))
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-        self._fn._ptr[0] = cyfn
-    @property
-    def userData(self):
-        return <void_ptr>self._ptr[0].userData
-    @userData.setter
-    def userData(self, userData):
-        _cyuserData = utils.HelperInputVoidPtr(userData)
-        self._ptr[0].userData = <void*><void_ptr>_cyuserData.cptr
-{{endif}}
-{{if 'struct cudaHostNodeParamsV2' in found_types}}
-
-cdef class cudaHostNodeParamsV2:
-    """
-    CUDA host node parameters
-
-    Attributes
-    ----------
-    fn : cudaHostFn_t
-        The function to call when the node executes
-    userData : Any
-        Argument to pass to the function
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaHostNodeParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._ptr[0].fn)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fn : ' + str(self.fn)]
-            except ValueError:
-                str_list += ['fn : <ValueError>']
-            try:
-                str_list += ['userData : ' + hex(self.userData)]
-            except ValueError:
-                str_list += ['userData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fn(self):
-        return self._fn
-    @fn.setter
-    def fn(self, fn):
-        cdef cyruntime.cudaHostFn_t cyfn
-        if fn is None:
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>0
-        elif isinstance(fn, (cudaHostFn_t)):
-            pfn = int(fn)
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-        else:
-            pfn = int(cudaHostFn_t(fn))
-            cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-        self._fn._ptr[0] = cyfn
-    @property
-    def userData(self):
-        return <void_ptr>self._ptr[0].userData
-    @userData.setter
-    def userData(self, userData):
-        _cyuserData = utils.HelperInputVoidPtr(userData)
-        self._ptr[0].userData = <void*><void_ptr>_cyuserData.cptr
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct1:
-    """
-    Attributes
-    ----------
-    array : cudaArray_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._array = cudaArray_t(_ptr=<void_ptr>&self._ptr[0].res.array.array)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.array
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['array : ' + str(self.array)]
-            except ValueError:
-                str_list += ['array : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def array(self):
-        return self._array
-    @array.setter
-    def array(self, array):
-        cdef cyruntime.cudaArray_t cyarray
-        if array is None:
-            cyarray = <cyruntime.cudaArray_t><void_ptr>0
-        elif isinstance(array, (cudaArray_t,)):
-            parray = int(array)
-            cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-        else:
-            parray = int(cudaArray_t(array))
-            cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-        self._array._ptr[0] = cyarray
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct2:
-    """
-    Attributes
-    ----------
-    mipmap : cudaMipmappedArray_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._mipmap = cudaMipmappedArray_t(_ptr=<void_ptr>&self._ptr[0].res.mipmap.mipmap)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.mipmap
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['mipmap : ' + str(self.mipmap)]
-            except ValueError:
-                str_list += ['mipmap : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def mipmap(self):
-        return self._mipmap
-    @mipmap.setter
-    def mipmap(self, mipmap):
-        cdef cyruntime.cudaMipmappedArray_t cymipmap
-        if mipmap is None:
-            cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>0
-        elif isinstance(mipmap, (cudaMipmappedArray_t,)):
-            pmipmap = int(mipmap)
-            cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-        else:
-            pmipmap = int(cudaMipmappedArray_t(mipmap))
-            cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-        self._mipmap._ptr[0] = cymipmap
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct3:
-    """
-    Attributes
-    ----------
-    devPtr : Any
-
-    desc : cudaChannelFormatDesc
-
-    sizeInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._ptr[0].res.linear.desc)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.linear
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['devPtr : ' + hex(self.devPtr)]
-            except ValueError:
-                str_list += ['devPtr : <ValueError>']
-            try:
-                str_list += ['desc :\n' + '\n'.join(['    ' + line for line in str(self.desc).splitlines()])]
-            except ValueError:
-                str_list += ['desc : <ValueError>']
-            try:
-                str_list += ['sizeInBytes : ' + str(self.sizeInBytes)]
-            except ValueError:
-                str_list += ['sizeInBytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def devPtr(self):
-        return <void_ptr>self._ptr[0].res.linear.devPtr
-    @devPtr.setter
-    def devPtr(self, devPtr):
-        _cydevPtr = utils.HelperInputVoidPtr(devPtr)
-        self._ptr[0].res.linear.devPtr = <void*><void_ptr>_cydevPtr.cptr
-    @property
-    def desc(self):
-        return self._desc
-    @desc.setter
-    def desc(self, desc not None : cudaChannelFormatDesc):
-        string.memcpy(&self._ptr[0].res.linear.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._ptr[0].res.linear.desc))
-    @property
-    def sizeInBytes(self):
-        return self._ptr[0].res.linear.sizeInBytes
-    @sizeInBytes.setter
-    def sizeInBytes(self, size_t sizeInBytes):
-        self._ptr[0].res.linear.sizeInBytes = sizeInBytes
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_struct4:
-    """
-    Attributes
-    ----------
-    devPtr : Any
-
-    desc : cudaChannelFormatDesc
-
-    width : size_t
-
-    height : size_t
-
-    pitchInBytes : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._ptr[0].res.pitch2D.desc)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res.pitch2D
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['devPtr : ' + hex(self.devPtr)]
-            except ValueError:
-                str_list += ['devPtr : <ValueError>']
-            try:
-                str_list += ['desc :\n' + '\n'.join(['    ' + line for line in str(self.desc).splitlines()])]
-            except ValueError:
-                str_list += ['desc : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['pitchInBytes : ' + str(self.pitchInBytes)]
-            except ValueError:
-                str_list += ['pitchInBytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def devPtr(self):
-        return <void_ptr>self._ptr[0].res.pitch2D.devPtr
-    @devPtr.setter
-    def devPtr(self, devPtr):
-        _cydevPtr = utils.HelperInputVoidPtr(devPtr)
-        self._ptr[0].res.pitch2D.devPtr = <void*><void_ptr>_cydevPtr.cptr
-    @property
-    def desc(self):
-        return self._desc
-    @desc.setter
-    def desc(self, desc not None : cudaChannelFormatDesc):
-        string.memcpy(&self._ptr[0].res.pitch2D.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._ptr[0].res.pitch2D.desc))
-    @property
-    def width(self):
-        return self._ptr[0].res.pitch2D.width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].res.pitch2D.width = width
-    @property
-    def height(self):
-        return self._ptr[0].res.pitch2D.height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].res.pitch2D.height = height
-    @property
-    def pitchInBytes(self):
-        return self._ptr[0].res.pitch2D.pitchInBytes
-    @pitchInBytes.setter
-    def pitchInBytes(self, size_t pitchInBytes):
-        self._ptr[0].res.pitch2D.pitchInBytes = pitchInBytes
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class anon_union0:
-    """
-    Attributes
-    ----------
-    array : anon_struct1
-
-    mipmap : anon_struct2
-
-    linear : anon_struct3
-
-    pitch2D : anon_struct4
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._array = anon_struct1(_ptr=<void_ptr>self._ptr)
-        self._mipmap = anon_struct2(_ptr=<void_ptr>self._ptr)
-        self._linear = anon_struct3(_ptr=<void_ptr>self._ptr)
-        self._pitch2D = anon_struct4(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].res
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['array :\n' + '\n'.join(['    ' + line for line in str(self.array).splitlines()])]
-            except ValueError:
-                str_list += ['array : <ValueError>']
-            try:
-                str_list += ['mipmap :\n' + '\n'.join(['    ' + line for line in str(self.mipmap).splitlines()])]
-            except ValueError:
-                str_list += ['mipmap : <ValueError>']
-            try:
-                str_list += ['linear :\n' + '\n'.join(['    ' + line for line in str(self.linear).splitlines()])]
-            except ValueError:
-                str_list += ['linear : <ValueError>']
-            try:
-                str_list += ['pitch2D :\n' + '\n'.join(['    ' + line for line in str(self.pitch2D).splitlines()])]
-            except ValueError:
-                str_list += ['pitch2D : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def array(self):
-        return self._array
-    @array.setter
-    def array(self, array not None : anon_struct1):
-        string.memcpy(&self._ptr[0].res.array, <cyruntime.anon_struct1*><void_ptr>array.getPtr(), sizeof(self._ptr[0].res.array))
-    @property
-    def mipmap(self):
-        return self._mipmap
-    @mipmap.setter
-    def mipmap(self, mipmap not None : anon_struct2):
-        string.memcpy(&self._ptr[0].res.mipmap, <cyruntime.anon_struct2*><void_ptr>mipmap.getPtr(), sizeof(self._ptr[0].res.mipmap))
-    @property
-    def linear(self):
-        return self._linear
-    @linear.setter
-    def linear(self, linear not None : anon_struct3):
-        string.memcpy(&self._ptr[0].res.linear, <cyruntime.anon_struct3*><void_ptr>linear.getPtr(), sizeof(self._ptr[0].res.linear))
-    @property
-    def pitch2D(self):
-        return self._pitch2D
-    @pitch2D.setter
-    def pitch2D(self, pitch2D not None : anon_struct4):
-        string.memcpy(&self._ptr[0].res.pitch2D, <cyruntime.anon_struct4*><void_ptr>pitch2D.getPtr(), sizeof(self._ptr[0].res.pitch2D))
-{{endif}}
-{{if 'struct cudaResourceDesc' in found_types}}
-
-cdef class cudaResourceDesc:
-    """
-    CUDA resource descriptor
-
-    Attributes
-    ----------
-    resType : cudaResourceType
-        Resource type
-    res : anon_union0
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaResourceDesc *>calloc(1, sizeof(cyruntime.cudaResourceDesc))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaResourceDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._res = anon_union0(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['resType : ' + str(self.resType)]
-            except ValueError:
-                str_list += ['resType : <ValueError>']
-            try:
-                str_list += ['res :\n' + '\n'.join(['    ' + line for line in str(self.res).splitlines()])]
-            except ValueError:
-                str_list += ['res : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def resType(self):
-        return cudaResourceType(self._ptr[0].resType)
-    @resType.setter
-    def resType(self, resType not None : cudaResourceType):
-        self._ptr[0].resType = resType.value
-    @property
-    def res(self):
-        return self._res
-    @res.setter
-    def res(self, res not None : anon_union0):
-        string.memcpy(&self._ptr[0].res, <cyruntime.anon_union0*><void_ptr>res.getPtr(), sizeof(self._ptr[0].res))
-{{endif}}
-{{if 'struct cudaResourceViewDesc' in found_types}}
-
-cdef class cudaResourceViewDesc:
-    """
-    CUDA resource view descriptor
-
-    Attributes
-    ----------
-    format : cudaResourceViewFormat
-        Resource view format
-    width : size_t
-        Width of the resource view
-    height : size_t
-        Height of the resource view
-    depth : size_t
-        Depth of the resource view
-    firstMipmapLevel : unsigned int
-        First defined mipmap level
-    lastMipmapLevel : unsigned int
-        Last defined mipmap level
-    firstLayer : unsigned int
-        First layer index
-    lastLayer : unsigned int
-        Last layer index
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaResourceViewDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['format : ' + str(self.format)]
-            except ValueError:
-                str_list += ['format : <ValueError>']
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            try:
-                str_list += ['firstMipmapLevel : ' + str(self.firstMipmapLevel)]
-            except ValueError:
-                str_list += ['firstMipmapLevel : <ValueError>']
-            try:
-                str_list += ['lastMipmapLevel : ' + str(self.lastMipmapLevel)]
-            except ValueError:
-                str_list += ['lastMipmapLevel : <ValueError>']
-            try:
-                str_list += ['firstLayer : ' + str(self.firstLayer)]
-            except ValueError:
-                str_list += ['firstLayer : <ValueError>']
-            try:
-                str_list += ['lastLayer : ' + str(self.lastLayer)]
-            except ValueError:
-                str_list += ['lastLayer : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def format(self):
-        return cudaResourceViewFormat(self._ptr[0].format)
-    @format.setter
-    def format(self, format not None : cudaResourceViewFormat):
-        self._ptr[0].format = format.value
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, size_t width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, size_t height):
-        self._ptr[0].height = height
-    @property
-    def depth(self):
-        return self._ptr[0].depth
-    @depth.setter
-    def depth(self, size_t depth):
-        self._ptr[0].depth = depth
-    @property
-    def firstMipmapLevel(self):
-        return self._ptr[0].firstMipmapLevel
-    @firstMipmapLevel.setter
-    def firstMipmapLevel(self, unsigned int firstMipmapLevel):
-        self._ptr[0].firstMipmapLevel = firstMipmapLevel
-    @property
-    def lastMipmapLevel(self):
-        return self._ptr[0].lastMipmapLevel
-    @lastMipmapLevel.setter
-    def lastMipmapLevel(self, unsigned int lastMipmapLevel):
-        self._ptr[0].lastMipmapLevel = lastMipmapLevel
-    @property
-    def firstLayer(self):
-        return self._ptr[0].firstLayer
-    @firstLayer.setter
-    def firstLayer(self, unsigned int firstLayer):
-        self._ptr[0].firstLayer = firstLayer
-    @property
-    def lastLayer(self):
-        return self._ptr[0].lastLayer
-    @lastLayer.setter
-    def lastLayer(self, unsigned int lastLayer):
-        self._ptr[0].lastLayer = lastLayer
-{{endif}}
-{{if 'struct cudaPointerAttributes' in found_types}}
-
-cdef class cudaPointerAttributes:
-    """
-    CUDA pointer attributes
-
-    Attributes
-    ----------
-    type : cudaMemoryType
-        The type of memory - cudaMemoryTypeUnregistered,
-        cudaMemoryTypeHost, cudaMemoryTypeDevice or cudaMemoryTypeManaged.
-    device : int
-        The device against which the memory was allocated or registered. If
-        the memory type is cudaMemoryTypeDevice then this identifies the
-        device on which the memory referred physically resides. If the
-        memory type is cudaMemoryTypeHost or::cudaMemoryTypeManaged then
-        this identifies the device which was current when the memory was
-        allocated or registered (and if that device is deinitialized then
-        this allocation will vanish with that device's state).
-    devicePointer : Any
-        The address which may be dereferenced on the current device to
-        access the memory or NULL if no such address exists.
-    hostPointer : Any
-        The address which may be dereferenced on the host to access the
-        memory or NULL if no such address exists.  CUDA doesn't check if
-        unregistered memory is allocated so this field may contain invalid
-        pointer if an invalid pointer has been passed to CUDA.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaPointerAttributes *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['device : ' + str(self.device)]
-            except ValueError:
-                str_list += ['device : <ValueError>']
-            try:
-                str_list += ['devicePointer : ' + hex(self.devicePointer)]
-            except ValueError:
-                str_list += ['devicePointer : <ValueError>']
-            try:
-                str_list += ['hostPointer : ' + hex(self.hostPointer)]
-            except ValueError:
-                str_list += ['hostPointer : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaMemoryType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaMemoryType):
-        self._ptr[0].type = type.value
-    @property
-    def device(self):
-        return self._ptr[0].device
-    @device.setter
-    def device(self, int device):
-        self._ptr[0].device = device
-    @property
-    def devicePointer(self):
-        return <void_ptr>self._ptr[0].devicePointer
-    @devicePointer.setter
-    def devicePointer(self, devicePointer):
-        _cydevicePointer = utils.HelperInputVoidPtr(devicePointer)
-        self._ptr[0].devicePointer = <void*><void_ptr>_cydevicePointer.cptr
-    @property
-    def hostPointer(self):
-        return <void_ptr>self._ptr[0].hostPointer
-    @hostPointer.setter
-    def hostPointer(self, hostPointer):
-        _cyhostPointer = utils.HelperInputVoidPtr(hostPointer)
-        self._ptr[0].hostPointer = <void*><void_ptr>_cyhostPointer.cptr
-{{endif}}
-{{if 'struct cudaFuncAttributes' in found_types}}
-
-cdef class cudaFuncAttributes:
-    """
-    CUDA function attributes
-
-    Attributes
-    ----------
-    sharedSizeBytes : size_t
-        The size in bytes of statically-allocated shared memory per block
-        required by this function. This does not include dynamically-
-        allocated shared memory requested by the user at runtime.
-    constSizeBytes : size_t
-        The size in bytes of user-allocated constant memory required by
-        this function.
-    localSizeBytes : size_t
-        The size in bytes of local memory used by each thread of this
-        function.
-    maxThreadsPerBlock : int
-        The maximum number of threads per block, beyond which a launch of
-        the function would fail. This number depends on both the function
-        and the device on which the function is currently loaded.
-    numRegs : int
-        The number of registers used by each thread of this function.
-    ptxVersion : int
-        The PTX virtual architecture version for which the function was
-        compiled. This value is the major PTX version * 10 + the minor PTX
-        version, so a PTX version 1.3 function would return the value 13.
-    binaryVersion : int
-        The binary architecture version for which the function was
-        compiled. This value is the major binary version * 10 + the minor
-        binary version, so a binary version 1.3 function would return the
-        value 13.
-    cacheModeCA : int
-        The attribute to indicate whether the function has been compiled
-        with user specified option "-Xptxas --dlcm=ca" set.
-    maxDynamicSharedSizeBytes : int
-        The maximum size in bytes of dynamic shared memory per block for
-        this function. Any launch must have a dynamic shared memory size
-        smaller than this value.
-    preferredShmemCarveout : int
-        On devices where the L1 cache and shared memory use the same
-        hardware resources, this sets the shared memory carveout
-        preference, in percent of the maximum shared memory. Refer to
-        cudaDevAttrMaxSharedMemoryPerMultiprocessor. This is only a hint,
-        and the driver can choose a different ratio if required to execute
-        the function. See cudaFuncSetAttribute
-    clusterDimMustBeSet : int
-        If this attribute is set, the kernel must launch with a valid
-        cluster dimension specified.
-    requiredClusterWidth : int
-        The required cluster width/height/depth in blocks. The values must
-        either all be 0 or all be positive. The validity of the cluster
-        dimensions is otherwise checked at launch time.  If the value is
-        set during compile time, it cannot be set at runtime. Setting it at
-        runtime should return cudaErrorNotPermitted. See
-        cudaFuncSetAttribute
-    requiredClusterHeight : int
-
-    requiredClusterDepth : int
-
-    clusterSchedulingPolicyPreference : int
-        The block scheduling policy of a function. See cudaFuncSetAttribute
-    nonPortableClusterSizeAllowed : int
-        Whether the function can be launched with non-portable cluster
-        size. 1 is allowed, 0 is disallowed. A non-portable cluster size
-        may only function on the specific SKUs the program is tested on.
-        The launch might fail if the program is run on a different hardware
-        platform.  CUDA API provides cudaOccupancyMaxActiveClusters to
-        assist with checking whether the desired size can be launched on
-        the current device.  Portable Cluster Size  A portable cluster size
-        is guaranteed to be functional on all compute capabilities higher
-        than the target compute capability. The portable cluster size for
-        sm_90 is 8 blocks per cluster. This value may increase for future
-        compute capabilities.  The specific hardware unit may support
-        higher cluster sizes that’s not guaranteed to be portable. See
-        cudaFuncSetAttribute
-    reserved : List[int]
-        Reserved for future use.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaFuncAttributes *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['sharedSizeBytes : ' + str(self.sharedSizeBytes)]
-            except ValueError:
-                str_list += ['sharedSizeBytes : <ValueError>']
-            try:
-                str_list += ['constSizeBytes : ' + str(self.constSizeBytes)]
-            except ValueError:
-                str_list += ['constSizeBytes : <ValueError>']
-            try:
-                str_list += ['localSizeBytes : ' + str(self.localSizeBytes)]
-            except ValueError:
-                str_list += ['localSizeBytes : <ValueError>']
-            try:
-                str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]
-            except ValueError:
-                str_list += ['maxThreadsPerBlock : <ValueError>']
-            try:
-                str_list += ['numRegs : ' + str(self.numRegs)]
-            except ValueError:
-                str_list += ['numRegs : <ValueError>']
-            try:
-                str_list += ['ptxVersion : ' + str(self.ptxVersion)]
-            except ValueError:
-                str_list += ['ptxVersion : <ValueError>']
-            try:
-                str_list += ['binaryVersion : ' + str(self.binaryVersion)]
-            except ValueError:
-                str_list += ['binaryVersion : <ValueError>']
-            try:
-                str_list += ['cacheModeCA : ' + str(self.cacheModeCA)]
-            except ValueError:
-                str_list += ['cacheModeCA : <ValueError>']
-            try:
-                str_list += ['maxDynamicSharedSizeBytes : ' + str(self.maxDynamicSharedSizeBytes)]
-            except ValueError:
-                str_list += ['maxDynamicSharedSizeBytes : <ValueError>']
-            try:
-                str_list += ['preferredShmemCarveout : ' + str(self.preferredShmemCarveout)]
-            except ValueError:
-                str_list += ['preferredShmemCarveout : <ValueError>']
-            try:
-                str_list += ['clusterDimMustBeSet : ' + str(self.clusterDimMustBeSet)]
-            except ValueError:
-                str_list += ['clusterDimMustBeSet : <ValueError>']
-            try:
-                str_list += ['requiredClusterWidth : ' + str(self.requiredClusterWidth)]
-            except ValueError:
-                str_list += ['requiredClusterWidth : <ValueError>']
-            try:
-                str_list += ['requiredClusterHeight : ' + str(self.requiredClusterHeight)]
-            except ValueError:
-                str_list += ['requiredClusterHeight : <ValueError>']
-            try:
-                str_list += ['requiredClusterDepth : ' + str(self.requiredClusterDepth)]
-            except ValueError:
-                str_list += ['requiredClusterDepth : <ValueError>']
-            try:
-                str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]
-            except ValueError:
-                str_list += ['clusterSchedulingPolicyPreference : <ValueError>']
-            try:
-                str_list += ['nonPortableClusterSizeAllowed : ' + str(self.nonPortableClusterSizeAllowed)]
-            except ValueError:
-                str_list += ['nonPortableClusterSizeAllowed : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def sharedSizeBytes(self):
-        return self._ptr[0].sharedSizeBytes
-    @sharedSizeBytes.setter
-    def sharedSizeBytes(self, size_t sharedSizeBytes):
-        self._ptr[0].sharedSizeBytes = sharedSizeBytes
-    @property
-    def constSizeBytes(self):
-        return self._ptr[0].constSizeBytes
-    @constSizeBytes.setter
-    def constSizeBytes(self, size_t constSizeBytes):
-        self._ptr[0].constSizeBytes = constSizeBytes
-    @property
-    def localSizeBytes(self):
-        return self._ptr[0].localSizeBytes
-    @localSizeBytes.setter
-    def localSizeBytes(self, size_t localSizeBytes):
-        self._ptr[0].localSizeBytes = localSizeBytes
-    @property
-    def maxThreadsPerBlock(self):
-        return self._ptr[0].maxThreadsPerBlock
-    @maxThreadsPerBlock.setter
-    def maxThreadsPerBlock(self, int maxThreadsPerBlock):
-        self._ptr[0].maxThreadsPerBlock = maxThreadsPerBlock
-    @property
-    def numRegs(self):
-        return self._ptr[0].numRegs
-    @numRegs.setter
-    def numRegs(self, int numRegs):
-        self._ptr[0].numRegs = numRegs
-    @property
-    def ptxVersion(self):
-        return self._ptr[0].ptxVersion
-    @ptxVersion.setter
-    def ptxVersion(self, int ptxVersion):
-        self._ptr[0].ptxVersion = ptxVersion
-    @property
-    def binaryVersion(self):
-        return self._ptr[0].binaryVersion
-    @binaryVersion.setter
-    def binaryVersion(self, int binaryVersion):
-        self._ptr[0].binaryVersion = binaryVersion
-    @property
-    def cacheModeCA(self):
-        return self._ptr[0].cacheModeCA
-    @cacheModeCA.setter
-    def cacheModeCA(self, int cacheModeCA):
-        self._ptr[0].cacheModeCA = cacheModeCA
-    @property
-    def maxDynamicSharedSizeBytes(self):
-        return self._ptr[0].maxDynamicSharedSizeBytes
-    @maxDynamicSharedSizeBytes.setter
-    def maxDynamicSharedSizeBytes(self, int maxDynamicSharedSizeBytes):
-        self._ptr[0].maxDynamicSharedSizeBytes = maxDynamicSharedSizeBytes
-    @property
-    def preferredShmemCarveout(self):
-        return self._ptr[0].preferredShmemCarveout
-    @preferredShmemCarveout.setter
-    def preferredShmemCarveout(self, int preferredShmemCarveout):
-        self._ptr[0].preferredShmemCarveout = preferredShmemCarveout
-    @property
-    def clusterDimMustBeSet(self):
-        return self._ptr[0].clusterDimMustBeSet
-    @clusterDimMustBeSet.setter
-    def clusterDimMustBeSet(self, int clusterDimMustBeSet):
-        self._ptr[0].clusterDimMustBeSet = clusterDimMustBeSet
-    @property
-    def requiredClusterWidth(self):
-        return self._ptr[0].requiredClusterWidth
-    @requiredClusterWidth.setter
-    def requiredClusterWidth(self, int requiredClusterWidth):
-        self._ptr[0].requiredClusterWidth = requiredClusterWidth
-    @property
-    def requiredClusterHeight(self):
-        return self._ptr[0].requiredClusterHeight
-    @requiredClusterHeight.setter
-    def requiredClusterHeight(self, int requiredClusterHeight):
-        self._ptr[0].requiredClusterHeight = requiredClusterHeight
-    @property
-    def requiredClusterDepth(self):
-        return self._ptr[0].requiredClusterDepth
-    @requiredClusterDepth.setter
-    def requiredClusterDepth(self, int requiredClusterDepth):
-        self._ptr[0].requiredClusterDepth = requiredClusterDepth
-    @property
-    def clusterSchedulingPolicyPreference(self):
-        return self._ptr[0].clusterSchedulingPolicyPreference
-    @clusterSchedulingPolicyPreference.setter
-    def clusterSchedulingPolicyPreference(self, int clusterSchedulingPolicyPreference):
-        self._ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference
-    @property
-    def nonPortableClusterSizeAllowed(self):
-        return self._ptr[0].nonPortableClusterSizeAllowed
-    @nonPortableClusterSizeAllowed.setter
-    def nonPortableClusterSizeAllowed(self, int nonPortableClusterSizeAllowed):
-        self._ptr[0].nonPortableClusterSizeAllowed = nonPortableClusterSizeAllowed
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaMemLocation' in found_types}}
-
-cdef class cudaMemLocation:
-    """
-    Specifies a memory location.  To specify a gpu, set type =
-    cudaMemLocationTypeDevice and set id = the gpu's device ordinal. To
-    specify a cpu NUMA node, set type = cudaMemLocationTypeHostNuma and
-    set id = host NUMA node id.
-
-    Attributes
-    ----------
-    type : cudaMemLocationType
-        Specifies the location type, which modifies the meaning of id.
-    id : int
-        identifier for a given this location's ::CUmemLocationType.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemLocation *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['id : ' + str(self.id)]
-            except ValueError:
-                str_list += ['id : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaMemLocationType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaMemLocationType):
-        self._ptr[0].type = type.value
-    @property
-    def id(self):
-        return self._ptr[0].id
-    @id.setter
-    def id(self, int id):
-        self._ptr[0].id = id
-{{endif}}
-{{if 'struct cudaMemAccessDesc' in found_types}}
-
-cdef class cudaMemAccessDesc:
-    """
-    Memory access descriptor
-
-    Attributes
-    ----------
-    location : cudaMemLocation
-        Location on which the request is to change it's accessibility
-    flags : cudaMemAccessFlags
-        ::CUmemProt accessibility flags to set on the request
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemAccessDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._location = cudaMemLocation(_ptr=<void_ptr>&self._ptr[0].location)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['location :\n' + '\n'.join(['    ' + line for line in str(self.location).splitlines()])]
-            except ValueError:
-                str_list += ['location : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def location(self):
-        return self._location
-    @location.setter
-    def location(self, location not None : cudaMemLocation):
-        string.memcpy(&self._ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._ptr[0].location))
-    @property
-    def flags(self):
-        return cudaMemAccessFlags(self._ptr[0].flags)
-    @flags.setter
-    def flags(self, flags not None : cudaMemAccessFlags):
-        self._ptr[0].flags = flags.value
-{{endif}}
-{{if 'struct cudaMemPoolProps' in found_types}}
-
-cdef class cudaMemPoolProps:
-    """
-    Specifies the properties of allocations made from the pool.
-
-    Attributes
-    ----------
-    allocType : cudaMemAllocationType
-        Allocation type. Currently must be specified as
-        cudaMemAllocationTypePinned
-    handleTypes : cudaMemAllocationHandleType
-        Handle types that will be supported by allocations from the pool.
-    location : cudaMemLocation
-        Location allocations should reside.
-    win32SecurityAttributes : Any
-        Windows-specific LPSECURITYATTRIBUTES required when
-        cudaMemHandleTypeWin32 is specified. This security attribute
-        defines the scope of which exported allocations may be tranferred
-        to other processes. In all other cases, this field is required to
-        be zero.
-    maxSize : size_t
-        Maximum pool size. When set to 0, defaults to a system dependent
-        value.
-    usage : unsigned short
-        Bitmask indicating intended usage for the pool.
-    reserved : bytes
-        reserved for future use, must be 0
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemPoolProps *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._location = cudaMemLocation(_ptr=<void_ptr>&self._ptr[0].location)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['allocType : ' + str(self.allocType)]
-            except ValueError:
-                str_list += ['allocType : <ValueError>']
-            try:
-                str_list += ['handleTypes : ' + str(self.handleTypes)]
-            except ValueError:
-                str_list += ['handleTypes : <ValueError>']
-            try:
-                str_list += ['location :\n' + '\n'.join(['    ' + line for line in str(self.location).splitlines()])]
-            except ValueError:
-                str_list += ['location : <ValueError>']
-            try:
-                str_list += ['win32SecurityAttributes : ' + hex(self.win32SecurityAttributes)]
-            except ValueError:
-                str_list += ['win32SecurityAttributes : <ValueError>']
-            try:
-                str_list += ['maxSize : ' + str(self.maxSize)]
-            except ValueError:
-                str_list += ['maxSize : <ValueError>']
-            try:
-                str_list += ['usage : ' + str(self.usage)]
-            except ValueError:
-                str_list += ['usage : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def allocType(self):
-        return cudaMemAllocationType(self._ptr[0].allocType)
-    @allocType.setter
-    def allocType(self, allocType not None : cudaMemAllocationType):
-        self._ptr[0].allocType = allocType.value
-    @property
-    def handleTypes(self):
-        return cudaMemAllocationHandleType(self._ptr[0].handleTypes)
-    @handleTypes.setter
-    def handleTypes(self, handleTypes not None : cudaMemAllocationHandleType):
-        self._ptr[0].handleTypes = handleTypes.value
-    @property
-    def location(self):
-        return self._location
-    @location.setter
-    def location(self, location not None : cudaMemLocation):
-        string.memcpy(&self._ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._ptr[0].location))
-    @property
-    def win32SecurityAttributes(self):
-        return <void_ptr>self._ptr[0].win32SecurityAttributes
-    @win32SecurityAttributes.setter
-    def win32SecurityAttributes(self, win32SecurityAttributes):
-        _cywin32SecurityAttributes = utils.HelperInputVoidPtr(win32SecurityAttributes)
-        self._ptr[0].win32SecurityAttributes = <void*><void_ptr>_cywin32SecurityAttributes.cptr
-    @property
-    def maxSize(self):
-        return self._ptr[0].maxSize
-    @maxSize.setter
-    def maxSize(self, size_t maxSize):
-        self._ptr[0].maxSize = maxSize
-    @property
-    def usage(self):
-        return self._ptr[0].usage
-    @usage.setter
-    def usage(self, unsigned short usage):
-        self._ptr[0].usage = usage
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 54)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 54:
-            raise ValueError("reserved length must be 54, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaMemPoolPtrExportData' in found_types}}
-
-cdef class cudaMemPoolPtrExportData:
-    """
-    Opaque data for exporting a pool allocation
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemPoolPtrExportData *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaMemAllocNodeParams' in found_types}}
-
-cdef class cudaMemAllocNodeParams:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : cudaMemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
-        not supported. in: array of memory access descriptors. Used to
-        describe peer GPU access
-    accessDescs : cudaMemAccessDesc
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    accessDescCount : size_t
-        in: Number of `accessDescs`s
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : Any
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemAllocNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._ptr[0].poolProps)
-    def __dealloc__(self):
-        if self._accessDescs is not NULL:
-            free(self._accessDescs)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['poolProps :\n' + '\n'.join(['    ' + line for line in str(self.poolProps).splitlines()])]
-            except ValueError:
-                str_list += ['poolProps : <ValueError>']
-            try:
-                str_list += ['accessDescs : ' + str(self.accessDescs)]
-            except ValueError:
-                str_list += ['accessDescs : <ValueError>']
-            try:
-                str_list += ['accessDescCount : ' + str(self.accessDescCount)]
-            except ValueError:
-                str_list += ['accessDescCount : <ValueError>']
-            try:
-                str_list += ['bytesize : ' + str(self.bytesize)]
-            except ValueError:
-                str_list += ['bytesize : <ValueError>']
-            try:
-                str_list += ['dptr : ' + hex(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def poolProps(self):
-        return self._poolProps
-    @poolProps.setter
-    def poolProps(self, poolProps not None : cudaMemPoolProps):
-        string.memcpy(&self._ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._ptr[0].poolProps))
-    @property
-    def accessDescs(self):
-        arrs = [<void_ptr>self._ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]
-        return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]
-    @accessDescs.setter
-    def accessDescs(self, val):
-        if len(val) == 0:
-            free(self._accessDescs)
-            self._accessDescs_length = 0
-            self._ptr[0].accessDescs = NULL
-        else:
-            if self._accessDescs_length != <size_t>len(val):
-                free(self._accessDescs)
-                self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))
-                if self._accessDescs is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
-                self._accessDescs_length = <size_t>len(val)
-                self._ptr[0].accessDescs = self._accessDescs
-            for idx in range(len(val)):
-                string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._ptr, sizeof(cyruntime.cudaMemAccessDesc))
-
-    @property
-    def accessDescCount(self):
-        return self._ptr[0].accessDescCount
-    @accessDescCount.setter
-    def accessDescCount(self, size_t accessDescCount):
-        self._ptr[0].accessDescCount = accessDescCount
-    @property
-    def bytesize(self):
-        return self._ptr[0].bytesize
-    @bytesize.setter
-    def bytesize(self, size_t bytesize):
-        self._ptr[0].bytesize = bytesize
-    @property
-    def dptr(self):
-        return <void_ptr>self._ptr[0].dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        _cydptr = utils.HelperInputVoidPtr(dptr)
-        self._ptr[0].dptr = <void*><void_ptr>_cydptr.cptr
-{{endif}}
-{{if 'struct cudaMemAllocNodeParamsV2' in found_types}}
-
-cdef class cudaMemAllocNodeParamsV2:
-    """
-    Memory allocation node parameters
-
-    Attributes
-    ----------
-    poolProps : cudaMemPoolProps
-        in: location where the allocation should reside (specified in
-        ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
-        not supported. in: array of memory access descriptors. Used to
-        describe peer GPU access
-    accessDescs : cudaMemAccessDesc
-        in: number of memory access descriptors. Must not exceed the number
-        of GPUs.
-    accessDescCount : size_t
-        in: Number of `accessDescs`s
-    bytesize : size_t
-        in: size in bytes of the requested allocation
-    dptr : Any
-        out: address of the allocation returned by CUDA
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemAllocNodeParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._ptr[0].poolProps)
-    def __dealloc__(self):
-        if self._accessDescs is not NULL:
-            free(self._accessDescs)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['poolProps :\n' + '\n'.join(['    ' + line for line in str(self.poolProps).splitlines()])]
-            except ValueError:
-                str_list += ['poolProps : <ValueError>']
-            try:
-                str_list += ['accessDescs : ' + str(self.accessDescs)]
-            except ValueError:
-                str_list += ['accessDescs : <ValueError>']
-            try:
-                str_list += ['accessDescCount : ' + str(self.accessDescCount)]
-            except ValueError:
-                str_list += ['accessDescCount : <ValueError>']
-            try:
-                str_list += ['bytesize : ' + str(self.bytesize)]
-            except ValueError:
-                str_list += ['bytesize : <ValueError>']
-            try:
-                str_list += ['dptr : ' + hex(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def poolProps(self):
-        return self._poolProps
-    @poolProps.setter
-    def poolProps(self, poolProps not None : cudaMemPoolProps):
-        string.memcpy(&self._ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._ptr[0].poolProps))
-    @property
-    def accessDescs(self):
-        arrs = [<void_ptr>self._ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]
-        return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]
-    @accessDescs.setter
-    def accessDescs(self, val):
-        if len(val) == 0:
-            free(self._accessDescs)
-            self._accessDescs_length = 0
-            self._ptr[0].accessDescs = NULL
-        else:
-            if self._accessDescs_length != <size_t>len(val):
-                free(self._accessDescs)
-                self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))
-                if self._accessDescs is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
-                self._accessDescs_length = <size_t>len(val)
-                self._ptr[0].accessDescs = self._accessDescs
-            for idx in range(len(val)):
-                string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._ptr, sizeof(cyruntime.cudaMemAccessDesc))
-
-    @property
-    def accessDescCount(self):
-        return self._ptr[0].accessDescCount
-    @accessDescCount.setter
-    def accessDescCount(self, size_t accessDescCount):
-        self._ptr[0].accessDescCount = accessDescCount
-    @property
-    def bytesize(self):
-        return self._ptr[0].bytesize
-    @bytesize.setter
-    def bytesize(self, size_t bytesize):
-        self._ptr[0].bytesize = bytesize
-    @property
-    def dptr(self):
-        return <void_ptr>self._ptr[0].dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        _cydptr = utils.HelperInputVoidPtr(dptr)
-        self._ptr[0].dptr = <void*><void_ptr>_cydptr.cptr
-{{endif}}
-{{if 'struct cudaMemFreeNodeParams' in found_types}}
-
-cdef class cudaMemFreeNodeParams:
-    """
-    Memory free node parameters
-
-    Attributes
-    ----------
-    dptr : Any
-        in: the pointer to free
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemFreeNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['dptr : ' + hex(self.dptr)]
-            except ValueError:
-                str_list += ['dptr : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def dptr(self):
-        return <void_ptr>self._ptr[0].dptr
-    @dptr.setter
-    def dptr(self, dptr):
-        _cydptr = utils.HelperInputVoidPtr(dptr)
-        self._ptr[0].dptr = <void*><void_ptr>_cydptr.cptr
-{{endif}}
-{{if 'struct CUuuid_st' in found_types}}
-
-cdef class CUuuid_st:
-    """
-    Attributes
-    ----------
-    bytes : bytes
-        < CUDA definition of UUID
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.CUuuid_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['bytes : ' + str(self.bytes.hex())]
-            except ValueError:
-                str_list += ['bytes : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def bytes(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].bytes, 16)
-
-{{endif}}
-{{if 'struct cudaDeviceProp' in found_types}}
-
-cdef class cudaDeviceProp:
-    """
-    CUDA device properties
-
-    Attributes
-    ----------
-    name : bytes
-        ASCII string identifying device
-    uuid : cudaUUID_t
-        16-byte unique identifier
-    luid : bytes
-        8-byte locally unique identifier. Value is undefined on TCC and
-        non-Windows platforms
-    luidDeviceNodeMask : unsigned int
-        LUID device node mask. Value is undefined on TCC and non-Windows
-        platforms
-    totalGlobalMem : size_t
-        Global memory available on device in bytes
-    sharedMemPerBlock : size_t
-        Shared memory available per block in bytes
-    regsPerBlock : int
-        32-bit registers available per block
-    warpSize : int
-        Warp size in threads
-    memPitch : size_t
-        Maximum pitch in bytes allowed by memory copies
-    maxThreadsPerBlock : int
-        Maximum number of threads per block
-    maxThreadsDim : List[int]
-        Maximum size of each dimension of a block
-    maxGridSize : List[int]
-        Maximum size of each dimension of a grid
-    clockRate : int
-        Deprecated, Clock frequency in kilohertz
-    totalConstMem : size_t
-        Constant memory available on device in bytes
-    major : int
-        Major compute capability
-    minor : int
-        Minor compute capability
-    textureAlignment : size_t
-        Alignment requirement for textures
-    texturePitchAlignment : size_t
-        Pitch alignment requirement for texture references bound to pitched
-        memory
-    deviceOverlap : int
-        Device can concurrently copy memory and execute a kernel.
-        Deprecated. Use instead asyncEngineCount.
-    multiProcessorCount : int
-        Number of multiprocessors on device
-    kernelExecTimeoutEnabled : int
-        Deprecated, Specified whether there is a run time limit on kernels
-    integrated : int
-        Device is integrated as opposed to discrete
-    canMapHostMemory : int
-        Device can map host memory with
-        cudaHostAlloc/cudaHostGetDevicePointer
-    computeMode : int
-        Deprecated, Compute mode (See cudaComputeMode)
-    maxTexture1D : int
-        Maximum 1D texture size
-    maxTexture1DMipmap : int
-        Maximum 1D mipmapped texture size
-    maxTexture1DLinear : int
-        Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth()
-        or cuDeviceGetTexture1DLinearMaxWidth() instead.
-    maxTexture2D : List[int]
-        Maximum 2D texture dimensions
-    maxTexture2DMipmap : List[int]
-        Maximum 2D mipmapped texture dimensions
-    maxTexture2DLinear : List[int]
-        Maximum dimensions (width, height, pitch) for 2D textures bound to
-        pitched memory
-    maxTexture2DGather : List[int]
-        Maximum 2D texture dimensions if texture gather operations have to
-        be performed
-    maxTexture3D : List[int]
-        Maximum 3D texture dimensions
-    maxTexture3DAlt : List[int]
-        Maximum alternate 3D texture dimensions
-    maxTextureCubemap : int
-        Maximum Cubemap texture dimensions
-    maxTexture1DLayered : List[int]
-        Maximum 1D layered texture dimensions
-    maxTexture2DLayered : List[int]
-        Maximum 2D layered texture dimensions
-    maxTextureCubemapLayered : List[int]
-        Maximum Cubemap layered texture dimensions
-    maxSurface1D : int
-        Maximum 1D surface size
-    maxSurface2D : List[int]
-        Maximum 2D surface dimensions
-    maxSurface3D : List[int]
-        Maximum 3D surface dimensions
-    maxSurface1DLayered : List[int]
-        Maximum 1D layered surface dimensions
-    maxSurface2DLayered : List[int]
-        Maximum 2D layered surface dimensions
-    maxSurfaceCubemap : int
-        Maximum Cubemap surface dimensions
-    maxSurfaceCubemapLayered : List[int]
-        Maximum Cubemap layered surface dimensions
-    surfaceAlignment : size_t
-        Alignment requirements for surfaces
-    concurrentKernels : int
-        Device can possibly execute multiple kernels concurrently
-    ECCEnabled : int
-        Device has ECC support enabled
-    pciBusID : int
-        PCI bus ID of the device
-    pciDeviceID : int
-        PCI device ID of the device
-    pciDomainID : int
-        PCI domain ID of the device
-    tccDriver : int
-        1 if device is a Tesla device using TCC driver, 0 otherwise
-    asyncEngineCount : int
-        Number of asynchronous engines
-    unifiedAddressing : int
-        Device shares a unified address space with the host
-    memoryClockRate : int
-        Deprecated, Peak memory clock frequency in kilohertz
-    memoryBusWidth : int
-        Global memory bus width in bits
-    l2CacheSize : int
-        Size of L2 cache in bytes
-    persistingL2CacheMaxSize : int
-        Device's maximum l2 persisting lines capacity setting in bytes
-    maxThreadsPerMultiProcessor : int
-        Maximum resident threads per multiprocessor
-    streamPrioritiesSupported : int
-        Device supports stream priorities
-    globalL1CacheSupported : int
-        Device supports caching globals in L1
-    localL1CacheSupported : int
-        Device supports caching locals in L1
-    sharedMemPerMultiprocessor : size_t
-        Shared memory available per multiprocessor in bytes
-    regsPerMultiprocessor : int
-        32-bit registers available per multiprocessor
-    managedMemory : int
-        Device supports allocating managed memory on this system
-    isMultiGpuBoard : int
-        Device is on a multi-GPU board
-    multiGpuBoardGroupID : int
-        Unique identifier for a group of devices on the same multi-GPU
-        board
-    hostNativeAtomicSupported : int
-        Link between the device and the host supports native atomic
-        operations
-    singleToDoublePrecisionPerfRatio : int
-        Deprecated, Ratio of single precision performance (in floating-
-        point operations per second) to double precision performance
-    pageableMemoryAccess : int
-        Device supports coherently accessing pageable memory without
-        calling cudaHostRegister on it
-    concurrentManagedAccess : int
-        Device can coherently access managed memory concurrently with the
-        CPU
-    computePreemptionSupported : int
-        Device supports Compute Preemption
-    canUseHostPointerForRegisteredMem : int
-        Device can access host registered memory at the same virtual
-        address as the CPU
-    cooperativeLaunch : int
-        Device supports launching cooperative kernels via
-        cudaLaunchCooperativeKernel
-    cooperativeMultiDeviceLaunch : int
-        Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.
-    sharedMemPerBlockOptin : size_t
-        Per device maximum shared memory per block usable by special opt in
-    pageableMemoryAccessUsesHostPageTables : int
-        Device accesses pageable memory via the host's page tables
-    directManagedMemAccessFromHost : int
-        Host can directly access managed memory on the device without
-        migration.
-    maxBlocksPerMultiProcessor : int
-        Maximum number of resident blocks per multiprocessor
-    accessPolicyMaxWindowSize : int
-        The maximum value of cudaAccessPolicyWindow::num_bytes.
-    reservedSharedMemPerBlock : size_t
-        Shared memory reserved by CUDA driver per block in bytes
-    hostRegisterSupported : int
-        Device supports host memory registration via cudaHostRegister.
-    sparseCudaArraySupported : int
-        1 if the device supports sparse CUDA arrays and sparse CUDA
-        mipmapped arrays, 0 otherwise
-    hostRegisterReadOnlySupported : int
-        Device supports using the cudaHostRegister flag
-        cudaHostRegisterReadOnly to register memory that must be mapped as
-        read-only to the GPU
-    timelineSemaphoreInteropSupported : int
-        External timeline semaphore interop is supported on the device
-    memoryPoolsSupported : int
-        1 if the device supports using the cudaMallocAsync and cudaMemPool
-        family of APIs, 0 otherwise
-    gpuDirectRDMASupported : int
-        1 if the device supports GPUDirect RDMA APIs, 0 otherwise
-    gpuDirectRDMAFlushWritesOptions : unsigned int
-        Bitmask to be interpreted according to the
-        cudaFlushGPUDirectRDMAWritesOptions enum
-    gpuDirectRDMAWritesOrdering : int
-        See the cudaGPUDirectRDMAWritesOrdering enum for numerical values
-    memoryPoolSupportedHandleTypes : unsigned int
-        Bitmask of handle types supported with mempool-based IPC
-    deferredMappingCudaArraySupported : int
-        1 if the device supports deferred mapping CUDA arrays and CUDA
-        mipmapped arrays
-    ipcEventSupported : int
-        Device supports IPC Events.
-    clusterLaunch : int
-        Indicates device supports cluster launch
-    unifiedFunctionPointers : int
-        Indicates device supports unified pointers
-    reserved2 : List[int]
-
-    reserved1 : List[int]
-        Reserved for future use
-    reserved : List[int]
-        Reserved for future use
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaDeviceProp *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._uuid = cudaUUID_t(_ptr=<void_ptr>&self._ptr[0].uuid)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['name : ' + self.name.decode('utf-8')]
-            except ValueError:
-                str_list += ['name : <ValueError>']
-            try:
-                str_list += ['uuid :\n' + '\n'.join(['    ' + line for line in str(self.uuid).splitlines()])]
-            except ValueError:
-                str_list += ['uuid : <ValueError>']
-            try:
-                str_list += ['luid : ' + self.luid.hex()]
-            except ValueError:
-                str_list += ['luid : <ValueError>']
-            try:
-                str_list += ['luidDeviceNodeMask : ' + str(self.luidDeviceNodeMask)]
-            except ValueError:
-                str_list += ['luidDeviceNodeMask : <ValueError>']
-            try:
-                str_list += ['totalGlobalMem : ' + str(self.totalGlobalMem)]
-            except ValueError:
-                str_list += ['totalGlobalMem : <ValueError>']
-            try:
-                str_list += ['sharedMemPerBlock : ' + str(self.sharedMemPerBlock)]
-            except ValueError:
-                str_list += ['sharedMemPerBlock : <ValueError>']
-            try:
-                str_list += ['regsPerBlock : ' + str(self.regsPerBlock)]
-            except ValueError:
-                str_list += ['regsPerBlock : <ValueError>']
-            try:
-                str_list += ['warpSize : ' + str(self.warpSize)]
-            except ValueError:
-                str_list += ['warpSize : <ValueError>']
-            try:
-                str_list += ['memPitch : ' + str(self.memPitch)]
-            except ValueError:
-                str_list += ['memPitch : <ValueError>']
-            try:
-                str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]
-            except ValueError:
-                str_list += ['maxThreadsPerBlock : <ValueError>']
-            try:
-                str_list += ['maxThreadsDim : ' + str(self.maxThreadsDim)]
-            except ValueError:
-                str_list += ['maxThreadsDim : <ValueError>']
-            try:
-                str_list += ['maxGridSize : ' + str(self.maxGridSize)]
-            except ValueError:
-                str_list += ['maxGridSize : <ValueError>']
-            try:
-                str_list += ['clockRate : ' + str(self.clockRate)]
-            except ValueError:
-                str_list += ['clockRate : <ValueError>']
-            try:
-                str_list += ['totalConstMem : ' + str(self.totalConstMem)]
-            except ValueError:
-                str_list += ['totalConstMem : <ValueError>']
-            try:
-                str_list += ['major : ' + str(self.major)]
-            except ValueError:
-                str_list += ['major : <ValueError>']
-            try:
-                str_list += ['minor : ' + str(self.minor)]
-            except ValueError:
-                str_list += ['minor : <ValueError>']
-            try:
-                str_list += ['textureAlignment : ' + str(self.textureAlignment)]
-            except ValueError:
-                str_list += ['textureAlignment : <ValueError>']
-            try:
-                str_list += ['texturePitchAlignment : ' + str(self.texturePitchAlignment)]
-            except ValueError:
-                str_list += ['texturePitchAlignment : <ValueError>']
-            try:
-                str_list += ['deviceOverlap : ' + str(self.deviceOverlap)]
-            except ValueError:
-                str_list += ['deviceOverlap : <ValueError>']
-            try:
-                str_list += ['multiProcessorCount : ' + str(self.multiProcessorCount)]
-            except ValueError:
-                str_list += ['multiProcessorCount : <ValueError>']
-            try:
-                str_list += ['kernelExecTimeoutEnabled : ' + str(self.kernelExecTimeoutEnabled)]
-            except ValueError:
-                str_list += ['kernelExecTimeoutEnabled : <ValueError>']
-            try:
-                str_list += ['integrated : ' + str(self.integrated)]
-            except ValueError:
-                str_list += ['integrated : <ValueError>']
-            try:
-                str_list += ['canMapHostMemory : ' + str(self.canMapHostMemory)]
-            except ValueError:
-                str_list += ['canMapHostMemory : <ValueError>']
-            try:
-                str_list += ['computeMode : ' + str(self.computeMode)]
-            except ValueError:
-                str_list += ['computeMode : <ValueError>']
-            try:
-                str_list += ['maxTexture1D : ' + str(self.maxTexture1D)]
-            except ValueError:
-                str_list += ['maxTexture1D : <ValueError>']
-            try:
-                str_list += ['maxTexture1DMipmap : ' + str(self.maxTexture1DMipmap)]
-            except ValueError:
-                str_list += ['maxTexture1DMipmap : <ValueError>']
-            try:
-                str_list += ['maxTexture1DLinear : ' + str(self.maxTexture1DLinear)]
-            except ValueError:
-                str_list += ['maxTexture1DLinear : <ValueError>']
-            try:
-                str_list += ['maxTexture2D : ' + str(self.maxTexture2D)]
-            except ValueError:
-                str_list += ['maxTexture2D : <ValueError>']
-            try:
-                str_list += ['maxTexture2DMipmap : ' + str(self.maxTexture2DMipmap)]
-            except ValueError:
-                str_list += ['maxTexture2DMipmap : <ValueError>']
-            try:
-                str_list += ['maxTexture2DLinear : ' + str(self.maxTexture2DLinear)]
-            except ValueError:
-                str_list += ['maxTexture2DLinear : <ValueError>']
-            try:
-                str_list += ['maxTexture2DGather : ' + str(self.maxTexture2DGather)]
-            except ValueError:
-                str_list += ['maxTexture2DGather : <ValueError>']
-            try:
-                str_list += ['maxTexture3D : ' + str(self.maxTexture3D)]
-            except ValueError:
-                str_list += ['maxTexture3D : <ValueError>']
-            try:
-                str_list += ['maxTexture3DAlt : ' + str(self.maxTexture3DAlt)]
-            except ValueError:
-                str_list += ['maxTexture3DAlt : <ValueError>']
-            try:
-                str_list += ['maxTextureCubemap : ' + str(self.maxTextureCubemap)]
-            except ValueError:
-                str_list += ['maxTextureCubemap : <ValueError>']
-            try:
-                str_list += ['maxTexture1DLayered : ' + str(self.maxTexture1DLayered)]
-            except ValueError:
-                str_list += ['maxTexture1DLayered : <ValueError>']
-            try:
-                str_list += ['maxTexture2DLayered : ' + str(self.maxTexture2DLayered)]
-            except ValueError:
-                str_list += ['maxTexture2DLayered : <ValueError>']
-            try:
-                str_list += ['maxTextureCubemapLayered : ' + str(self.maxTextureCubemapLayered)]
-            except ValueError:
-                str_list += ['maxTextureCubemapLayered : <ValueError>']
-            try:
-                str_list += ['maxSurface1D : ' + str(self.maxSurface1D)]
-            except ValueError:
-                str_list += ['maxSurface1D : <ValueError>']
-            try:
-                str_list += ['maxSurface2D : ' + str(self.maxSurface2D)]
-            except ValueError:
-                str_list += ['maxSurface2D : <ValueError>']
-            try:
-                str_list += ['maxSurface3D : ' + str(self.maxSurface3D)]
-            except ValueError:
-                str_list += ['maxSurface3D : <ValueError>']
-            try:
-                str_list += ['maxSurface1DLayered : ' + str(self.maxSurface1DLayered)]
-            except ValueError:
-                str_list += ['maxSurface1DLayered : <ValueError>']
-            try:
-                str_list += ['maxSurface2DLayered : ' + str(self.maxSurface2DLayered)]
-            except ValueError:
-                str_list += ['maxSurface2DLayered : <ValueError>']
-            try:
-                str_list += ['maxSurfaceCubemap : ' + str(self.maxSurfaceCubemap)]
-            except ValueError:
-                str_list += ['maxSurfaceCubemap : <ValueError>']
-            try:
-                str_list += ['maxSurfaceCubemapLayered : ' + str(self.maxSurfaceCubemapLayered)]
-            except ValueError:
-                str_list += ['maxSurfaceCubemapLayered : <ValueError>']
-            try:
-                str_list += ['surfaceAlignment : ' + str(self.surfaceAlignment)]
-            except ValueError:
-                str_list += ['surfaceAlignment : <ValueError>']
-            try:
-                str_list += ['concurrentKernels : ' + str(self.concurrentKernels)]
-            except ValueError:
-                str_list += ['concurrentKernels : <ValueError>']
-            try:
-                str_list += ['ECCEnabled : ' + str(self.ECCEnabled)]
-            except ValueError:
-                str_list += ['ECCEnabled : <ValueError>']
-            try:
-                str_list += ['pciBusID : ' + str(self.pciBusID)]
-            except ValueError:
-                str_list += ['pciBusID : <ValueError>']
-            try:
-                str_list += ['pciDeviceID : ' + str(self.pciDeviceID)]
-            except ValueError:
-                str_list += ['pciDeviceID : <ValueError>']
-            try:
-                str_list += ['pciDomainID : ' + str(self.pciDomainID)]
-            except ValueError:
-                str_list += ['pciDomainID : <ValueError>']
-            try:
-                str_list += ['tccDriver : ' + str(self.tccDriver)]
-            except ValueError:
-                str_list += ['tccDriver : <ValueError>']
-            try:
-                str_list += ['asyncEngineCount : ' + str(self.asyncEngineCount)]
-            except ValueError:
-                str_list += ['asyncEngineCount : <ValueError>']
-            try:
-                str_list += ['unifiedAddressing : ' + str(self.unifiedAddressing)]
-            except ValueError:
-                str_list += ['unifiedAddressing : <ValueError>']
-            try:
-                str_list += ['memoryClockRate : ' + str(self.memoryClockRate)]
-            except ValueError:
-                str_list += ['memoryClockRate : <ValueError>']
-            try:
-                str_list += ['memoryBusWidth : ' + str(self.memoryBusWidth)]
-            except ValueError:
-                str_list += ['memoryBusWidth : <ValueError>']
-            try:
-                str_list += ['l2CacheSize : ' + str(self.l2CacheSize)]
-            except ValueError:
-                str_list += ['l2CacheSize : <ValueError>']
-            try:
-                str_list += ['persistingL2CacheMaxSize : ' + str(self.persistingL2CacheMaxSize)]
-            except ValueError:
-                str_list += ['persistingL2CacheMaxSize : <ValueError>']
-            try:
-                str_list += ['maxThreadsPerMultiProcessor : ' + str(self.maxThreadsPerMultiProcessor)]
-            except ValueError:
-                str_list += ['maxThreadsPerMultiProcessor : <ValueError>']
-            try:
-                str_list += ['streamPrioritiesSupported : ' + str(self.streamPrioritiesSupported)]
-            except ValueError:
-                str_list += ['streamPrioritiesSupported : <ValueError>']
-            try:
-                str_list += ['globalL1CacheSupported : ' + str(self.globalL1CacheSupported)]
-            except ValueError:
-                str_list += ['globalL1CacheSupported : <ValueError>']
-            try:
-                str_list += ['localL1CacheSupported : ' + str(self.localL1CacheSupported)]
-            except ValueError:
-                str_list += ['localL1CacheSupported : <ValueError>']
-            try:
-                str_list += ['sharedMemPerMultiprocessor : ' + str(self.sharedMemPerMultiprocessor)]
-            except ValueError:
-                str_list += ['sharedMemPerMultiprocessor : <ValueError>']
-            try:
-                str_list += ['regsPerMultiprocessor : ' + str(self.regsPerMultiprocessor)]
-            except ValueError:
-                str_list += ['regsPerMultiprocessor : <ValueError>']
-            try:
-                str_list += ['managedMemory : ' + str(self.managedMemory)]
-            except ValueError:
-                str_list += ['managedMemory : <ValueError>']
-            try:
-                str_list += ['isMultiGpuBoard : ' + str(self.isMultiGpuBoard)]
-            except ValueError:
-                str_list += ['isMultiGpuBoard : <ValueError>']
-            try:
-                str_list += ['multiGpuBoardGroupID : ' + str(self.multiGpuBoardGroupID)]
-            except ValueError:
-                str_list += ['multiGpuBoardGroupID : <ValueError>']
-            try:
-                str_list += ['hostNativeAtomicSupported : ' + str(self.hostNativeAtomicSupported)]
-            except ValueError:
-                str_list += ['hostNativeAtomicSupported : <ValueError>']
-            try:
-                str_list += ['singleToDoublePrecisionPerfRatio : ' + str(self.singleToDoublePrecisionPerfRatio)]
-            except ValueError:
-                str_list += ['singleToDoublePrecisionPerfRatio : <ValueError>']
-            try:
-                str_list += ['pageableMemoryAccess : ' + str(self.pageableMemoryAccess)]
-            except ValueError:
-                str_list += ['pageableMemoryAccess : <ValueError>']
-            try:
-                str_list += ['concurrentManagedAccess : ' + str(self.concurrentManagedAccess)]
-            except ValueError:
-                str_list += ['concurrentManagedAccess : <ValueError>']
-            try:
-                str_list += ['computePreemptionSupported : ' + str(self.computePreemptionSupported)]
-            except ValueError:
-                str_list += ['computePreemptionSupported : <ValueError>']
-            try:
-                str_list += ['canUseHostPointerForRegisteredMem : ' + str(self.canUseHostPointerForRegisteredMem)]
-            except ValueError:
-                str_list += ['canUseHostPointerForRegisteredMem : <ValueError>']
-            try:
-                str_list += ['cooperativeLaunch : ' + str(self.cooperativeLaunch)]
-            except ValueError:
-                str_list += ['cooperativeLaunch : <ValueError>']
-            try:
-                str_list += ['cooperativeMultiDeviceLaunch : ' + str(self.cooperativeMultiDeviceLaunch)]
-            except ValueError:
-                str_list += ['cooperativeMultiDeviceLaunch : <ValueError>']
-            try:
-                str_list += ['sharedMemPerBlockOptin : ' + str(self.sharedMemPerBlockOptin)]
-            except ValueError:
-                str_list += ['sharedMemPerBlockOptin : <ValueError>']
-            try:
-                str_list += ['pageableMemoryAccessUsesHostPageTables : ' + str(self.pageableMemoryAccessUsesHostPageTables)]
-            except ValueError:
-                str_list += ['pageableMemoryAccessUsesHostPageTables : <ValueError>']
-            try:
-                str_list += ['directManagedMemAccessFromHost : ' + str(self.directManagedMemAccessFromHost)]
-            except ValueError:
-                str_list += ['directManagedMemAccessFromHost : <ValueError>']
-            try:
-                str_list += ['maxBlocksPerMultiProcessor : ' + str(self.maxBlocksPerMultiProcessor)]
-            except ValueError:
-                str_list += ['maxBlocksPerMultiProcessor : <ValueError>']
-            try:
-                str_list += ['accessPolicyMaxWindowSize : ' + str(self.accessPolicyMaxWindowSize)]
-            except ValueError:
-                str_list += ['accessPolicyMaxWindowSize : <ValueError>']
-            try:
-                str_list += ['reservedSharedMemPerBlock : ' + str(self.reservedSharedMemPerBlock)]
-            except ValueError:
-                str_list += ['reservedSharedMemPerBlock : <ValueError>']
-            try:
-                str_list += ['hostRegisterSupported : ' + str(self.hostRegisterSupported)]
-            except ValueError:
-                str_list += ['hostRegisterSupported : <ValueError>']
-            try:
-                str_list += ['sparseCudaArraySupported : ' + str(self.sparseCudaArraySupported)]
-            except ValueError:
-                str_list += ['sparseCudaArraySupported : <ValueError>']
-            try:
-                str_list += ['hostRegisterReadOnlySupported : ' + str(self.hostRegisterReadOnlySupported)]
-            except ValueError:
-                str_list += ['hostRegisterReadOnlySupported : <ValueError>']
-            try:
-                str_list += ['timelineSemaphoreInteropSupported : ' + str(self.timelineSemaphoreInteropSupported)]
-            except ValueError:
-                str_list += ['timelineSemaphoreInteropSupported : <ValueError>']
-            try:
-                str_list += ['memoryPoolsSupported : ' + str(self.memoryPoolsSupported)]
-            except ValueError:
-                str_list += ['memoryPoolsSupported : <ValueError>']
-            try:
-                str_list += ['gpuDirectRDMASupported : ' + str(self.gpuDirectRDMASupported)]
-            except ValueError:
-                str_list += ['gpuDirectRDMASupported : <ValueError>']
-            try:
-                str_list += ['gpuDirectRDMAFlushWritesOptions : ' + str(self.gpuDirectRDMAFlushWritesOptions)]
-            except ValueError:
-                str_list += ['gpuDirectRDMAFlushWritesOptions : <ValueError>']
-            try:
-                str_list += ['gpuDirectRDMAWritesOrdering : ' + str(self.gpuDirectRDMAWritesOrdering)]
-            except ValueError:
-                str_list += ['gpuDirectRDMAWritesOrdering : <ValueError>']
-            try:
-                str_list += ['memoryPoolSupportedHandleTypes : ' + str(self.memoryPoolSupportedHandleTypes)]
-            except ValueError:
-                str_list += ['memoryPoolSupportedHandleTypes : <ValueError>']
-            try:
-                str_list += ['deferredMappingCudaArraySupported : ' + str(self.deferredMappingCudaArraySupported)]
-            except ValueError:
-                str_list += ['deferredMappingCudaArraySupported : <ValueError>']
-            try:
-                str_list += ['ipcEventSupported : ' + str(self.ipcEventSupported)]
-            except ValueError:
-                str_list += ['ipcEventSupported : <ValueError>']
-            try:
-                str_list += ['clusterLaunch : ' + str(self.clusterLaunch)]
-            except ValueError:
-                str_list += ['clusterLaunch : <ValueError>']
-            try:
-                str_list += ['unifiedFunctionPointers : ' + str(self.unifiedFunctionPointers)]
-            except ValueError:
-                str_list += ['unifiedFunctionPointers : <ValueError>']
-            try:
-                str_list += ['reserved2 : ' + str(self.reserved2)]
-            except ValueError:
-                str_list += ['reserved2 : <ValueError>']
-            try:
-                str_list += ['reserved1 : ' + str(self.reserved1)]
-            except ValueError:
-                str_list += ['reserved1 : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def name(self):
-        return self._ptr[0].name
-    @name.setter
-    def name(self, name):
-        pass
-        self._ptr[0].name = name
-    @property
-    def uuid(self):
-        return self._uuid
-    @uuid.setter
-    def uuid(self, uuid not None : cudaUUID_t):
-        string.memcpy(&self._ptr[0].uuid, <cyruntime.cudaUUID_t*><void_ptr>uuid.getPtr(), sizeof(self._ptr[0].uuid))
-    @property
-    def luid(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].luid, 8)
-    @luid.setter
-    def luid(self, luid):
-        if len(luid) != 8:
-            raise ValueError("luid length must be 8, is " + str(len(luid)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(luid):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].luid[i] = b
-        else:
-            for i, b in enumerate(luid):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].luid[i] = b
-    @property
-    def luidDeviceNodeMask(self):
-        return self._ptr[0].luidDeviceNodeMask
-    @luidDeviceNodeMask.setter
-    def luidDeviceNodeMask(self, unsigned int luidDeviceNodeMask):
-        self._ptr[0].luidDeviceNodeMask = luidDeviceNodeMask
-    @property
-    def totalGlobalMem(self):
-        return self._ptr[0].totalGlobalMem
-    @totalGlobalMem.setter
-    def totalGlobalMem(self, size_t totalGlobalMem):
-        self._ptr[0].totalGlobalMem = totalGlobalMem
-    @property
-    def sharedMemPerBlock(self):
-        return self._ptr[0].sharedMemPerBlock
-    @sharedMemPerBlock.setter
-    def sharedMemPerBlock(self, size_t sharedMemPerBlock):
-        self._ptr[0].sharedMemPerBlock = sharedMemPerBlock
-    @property
-    def regsPerBlock(self):
-        return self._ptr[0].regsPerBlock
-    @regsPerBlock.setter
-    def regsPerBlock(self, int regsPerBlock):
-        self._ptr[0].regsPerBlock = regsPerBlock
-    @property
-    def warpSize(self):
-        return self._ptr[0].warpSize
-    @warpSize.setter
-    def warpSize(self, int warpSize):
-        self._ptr[0].warpSize = warpSize
-    @property
-    def memPitch(self):
-        return self._ptr[0].memPitch
-    @memPitch.setter
-    def memPitch(self, size_t memPitch):
-        self._ptr[0].memPitch = memPitch
-    @property
-    def maxThreadsPerBlock(self):
-        return self._ptr[0].maxThreadsPerBlock
-    @maxThreadsPerBlock.setter
-    def maxThreadsPerBlock(self, int maxThreadsPerBlock):
-        self._ptr[0].maxThreadsPerBlock = maxThreadsPerBlock
-    @property
-    def maxThreadsDim(self):
-        return self._ptr[0].maxThreadsDim
-    @maxThreadsDim.setter
-    def maxThreadsDim(self, maxThreadsDim):
-        self._ptr[0].maxThreadsDim = maxThreadsDim
-    @property
-    def maxGridSize(self):
-        return self._ptr[0].maxGridSize
-    @maxGridSize.setter
-    def maxGridSize(self, maxGridSize):
-        self._ptr[0].maxGridSize = maxGridSize
-    @property
-    def clockRate(self):
-        return self._ptr[0].clockRate
-    @clockRate.setter
-    def clockRate(self, int clockRate):
-        self._ptr[0].clockRate = clockRate
-    @property
-    def totalConstMem(self):
-        return self._ptr[0].totalConstMem
-    @totalConstMem.setter
-    def totalConstMem(self, size_t totalConstMem):
-        self._ptr[0].totalConstMem = totalConstMem
-    @property
-    def major(self):
-        return self._ptr[0].major
-    @major.setter
-    def major(self, int major):
-        self._ptr[0].major = major
-    @property
-    def minor(self):
-        return self._ptr[0].minor
-    @minor.setter
-    def minor(self, int minor):
-        self._ptr[0].minor = minor
-    @property
-    def textureAlignment(self):
-        return self._ptr[0].textureAlignment
-    @textureAlignment.setter
-    def textureAlignment(self, size_t textureAlignment):
-        self._ptr[0].textureAlignment = textureAlignment
-    @property
-    def texturePitchAlignment(self):
-        return self._ptr[0].texturePitchAlignment
-    @texturePitchAlignment.setter
-    def texturePitchAlignment(self, size_t texturePitchAlignment):
-        self._ptr[0].texturePitchAlignment = texturePitchAlignment
-    @property
-    def deviceOverlap(self):
-        return self._ptr[0].deviceOverlap
-    @deviceOverlap.setter
-    def deviceOverlap(self, int deviceOverlap):
-        self._ptr[0].deviceOverlap = deviceOverlap
-    @property
-    def multiProcessorCount(self):
-        return self._ptr[0].multiProcessorCount
-    @multiProcessorCount.setter
-    def multiProcessorCount(self, int multiProcessorCount):
-        self._ptr[0].multiProcessorCount = multiProcessorCount
-    @property
-    def kernelExecTimeoutEnabled(self):
-        return self._ptr[0].kernelExecTimeoutEnabled
-    @kernelExecTimeoutEnabled.setter
-    def kernelExecTimeoutEnabled(self, int kernelExecTimeoutEnabled):
-        self._ptr[0].kernelExecTimeoutEnabled = kernelExecTimeoutEnabled
-    @property
-    def integrated(self):
-        return self._ptr[0].integrated
-    @integrated.setter
-    def integrated(self, int integrated):
-        self._ptr[0].integrated = integrated
-    @property
-    def canMapHostMemory(self):
-        return self._ptr[0].canMapHostMemory
-    @canMapHostMemory.setter
-    def canMapHostMemory(self, int canMapHostMemory):
-        self._ptr[0].canMapHostMemory = canMapHostMemory
-    @property
-    def computeMode(self):
-        return self._ptr[0].computeMode
-    @computeMode.setter
-    def computeMode(self, int computeMode):
-        self._ptr[0].computeMode = computeMode
-    @property
-    def maxTexture1D(self):
-        return self._ptr[0].maxTexture1D
-    @maxTexture1D.setter
-    def maxTexture1D(self, int maxTexture1D):
-        self._ptr[0].maxTexture1D = maxTexture1D
-    @property
-    def maxTexture1DMipmap(self):
-        return self._ptr[0].maxTexture1DMipmap
-    @maxTexture1DMipmap.setter
-    def maxTexture1DMipmap(self, int maxTexture1DMipmap):
-        self._ptr[0].maxTexture1DMipmap = maxTexture1DMipmap
-    @property
-    def maxTexture1DLinear(self):
-        return self._ptr[0].maxTexture1DLinear
-    @maxTexture1DLinear.setter
-    def maxTexture1DLinear(self, int maxTexture1DLinear):
-        self._ptr[0].maxTexture1DLinear = maxTexture1DLinear
-    @property
-    def maxTexture2D(self):
-        return self._ptr[0].maxTexture2D
-    @maxTexture2D.setter
-    def maxTexture2D(self, maxTexture2D):
-        self._ptr[0].maxTexture2D = maxTexture2D
-    @property
-    def maxTexture2DMipmap(self):
-        return self._ptr[0].maxTexture2DMipmap
-    @maxTexture2DMipmap.setter
-    def maxTexture2DMipmap(self, maxTexture2DMipmap):
-        self._ptr[0].maxTexture2DMipmap = maxTexture2DMipmap
-    @property
-    def maxTexture2DLinear(self):
-        return self._ptr[0].maxTexture2DLinear
-    @maxTexture2DLinear.setter
-    def maxTexture2DLinear(self, maxTexture2DLinear):
-        self._ptr[0].maxTexture2DLinear = maxTexture2DLinear
-    @property
-    def maxTexture2DGather(self):
-        return self._ptr[0].maxTexture2DGather
-    @maxTexture2DGather.setter
-    def maxTexture2DGather(self, maxTexture2DGather):
-        self._ptr[0].maxTexture2DGather = maxTexture2DGather
-    @property
-    def maxTexture3D(self):
-        return self._ptr[0].maxTexture3D
-    @maxTexture3D.setter
-    def maxTexture3D(self, maxTexture3D):
-        self._ptr[0].maxTexture3D = maxTexture3D
-    @property
-    def maxTexture3DAlt(self):
-        return self._ptr[0].maxTexture3DAlt
-    @maxTexture3DAlt.setter
-    def maxTexture3DAlt(self, maxTexture3DAlt):
-        self._ptr[0].maxTexture3DAlt = maxTexture3DAlt
-    @property
-    def maxTextureCubemap(self):
-        return self._ptr[0].maxTextureCubemap
-    @maxTextureCubemap.setter
-    def maxTextureCubemap(self, int maxTextureCubemap):
-        self._ptr[0].maxTextureCubemap = maxTextureCubemap
-    @property
-    def maxTexture1DLayered(self):
-        return self._ptr[0].maxTexture1DLayered
-    @maxTexture1DLayered.setter
-    def maxTexture1DLayered(self, maxTexture1DLayered):
-        self._ptr[0].maxTexture1DLayered = maxTexture1DLayered
-    @property
-    def maxTexture2DLayered(self):
-        return self._ptr[0].maxTexture2DLayered
-    @maxTexture2DLayered.setter
-    def maxTexture2DLayered(self, maxTexture2DLayered):
-        self._ptr[0].maxTexture2DLayered = maxTexture2DLayered
-    @property
-    def maxTextureCubemapLayered(self):
-        return self._ptr[0].maxTextureCubemapLayered
-    @maxTextureCubemapLayered.setter
-    def maxTextureCubemapLayered(self, maxTextureCubemapLayered):
-        self._ptr[0].maxTextureCubemapLayered = maxTextureCubemapLayered
-    @property
-    def maxSurface1D(self):
-        return self._ptr[0].maxSurface1D
-    @maxSurface1D.setter
-    def maxSurface1D(self, int maxSurface1D):
-        self._ptr[0].maxSurface1D = maxSurface1D
-    @property
-    def maxSurface2D(self):
-        return self._ptr[0].maxSurface2D
-    @maxSurface2D.setter
-    def maxSurface2D(self, maxSurface2D):
-        self._ptr[0].maxSurface2D = maxSurface2D
-    @property
-    def maxSurface3D(self):
-        return self._ptr[0].maxSurface3D
-    @maxSurface3D.setter
-    def maxSurface3D(self, maxSurface3D):
-        self._ptr[0].maxSurface3D = maxSurface3D
-    @property
-    def maxSurface1DLayered(self):
-        return self._ptr[0].maxSurface1DLayered
-    @maxSurface1DLayered.setter
-    def maxSurface1DLayered(self, maxSurface1DLayered):
-        self._ptr[0].maxSurface1DLayered = maxSurface1DLayered
-    @property
-    def maxSurface2DLayered(self):
-        return self._ptr[0].maxSurface2DLayered
-    @maxSurface2DLayered.setter
-    def maxSurface2DLayered(self, maxSurface2DLayered):
-        self._ptr[0].maxSurface2DLayered = maxSurface2DLayered
-    @property
-    def maxSurfaceCubemap(self):
-        return self._ptr[0].maxSurfaceCubemap
-    @maxSurfaceCubemap.setter
-    def maxSurfaceCubemap(self, int maxSurfaceCubemap):
-        self._ptr[0].maxSurfaceCubemap = maxSurfaceCubemap
-    @property
-    def maxSurfaceCubemapLayered(self):
-        return self._ptr[0].maxSurfaceCubemapLayered
-    @maxSurfaceCubemapLayered.setter
-    def maxSurfaceCubemapLayered(self, maxSurfaceCubemapLayered):
-        self._ptr[0].maxSurfaceCubemapLayered = maxSurfaceCubemapLayered
-    @property
-    def surfaceAlignment(self):
-        return self._ptr[0].surfaceAlignment
-    @surfaceAlignment.setter
-    def surfaceAlignment(self, size_t surfaceAlignment):
-        self._ptr[0].surfaceAlignment = surfaceAlignment
-    @property
-    def concurrentKernels(self):
-        return self._ptr[0].concurrentKernels
-    @concurrentKernels.setter
-    def concurrentKernels(self, int concurrentKernels):
-        self._ptr[0].concurrentKernels = concurrentKernels
-    @property
-    def ECCEnabled(self):
-        return self._ptr[0].ECCEnabled
-    @ECCEnabled.setter
-    def ECCEnabled(self, int ECCEnabled):
-        self._ptr[0].ECCEnabled = ECCEnabled
-    @property
-    def pciBusID(self):
-        return self._ptr[0].pciBusID
-    @pciBusID.setter
-    def pciBusID(self, int pciBusID):
-        self._ptr[0].pciBusID = pciBusID
-    @property
-    def pciDeviceID(self):
-        return self._ptr[0].pciDeviceID
-    @pciDeviceID.setter
-    def pciDeviceID(self, int pciDeviceID):
-        self._ptr[0].pciDeviceID = pciDeviceID
-    @property
-    def pciDomainID(self):
-        return self._ptr[0].pciDomainID
-    @pciDomainID.setter
-    def pciDomainID(self, int pciDomainID):
-        self._ptr[0].pciDomainID = pciDomainID
-    @property
-    def tccDriver(self):
-        return self._ptr[0].tccDriver
-    @tccDriver.setter
-    def tccDriver(self, int tccDriver):
-        self._ptr[0].tccDriver = tccDriver
-    @property
-    def asyncEngineCount(self):
-        return self._ptr[0].asyncEngineCount
-    @asyncEngineCount.setter
-    def asyncEngineCount(self, int asyncEngineCount):
-        self._ptr[0].asyncEngineCount = asyncEngineCount
-    @property
-    def unifiedAddressing(self):
-        return self._ptr[0].unifiedAddressing
-    @unifiedAddressing.setter
-    def unifiedAddressing(self, int unifiedAddressing):
-        self._ptr[0].unifiedAddressing = unifiedAddressing
-    @property
-    def memoryClockRate(self):
-        return self._ptr[0].memoryClockRate
-    @memoryClockRate.setter
-    def memoryClockRate(self, int memoryClockRate):
-        self._ptr[0].memoryClockRate = memoryClockRate
-    @property
-    def memoryBusWidth(self):
-        return self._ptr[0].memoryBusWidth
-    @memoryBusWidth.setter
-    def memoryBusWidth(self, int memoryBusWidth):
-        self._ptr[0].memoryBusWidth = memoryBusWidth
-    @property
-    def l2CacheSize(self):
-        return self._ptr[0].l2CacheSize
-    @l2CacheSize.setter
-    def l2CacheSize(self, int l2CacheSize):
-        self._ptr[0].l2CacheSize = l2CacheSize
-    @property
-    def persistingL2CacheMaxSize(self):
-        return self._ptr[0].persistingL2CacheMaxSize
-    @persistingL2CacheMaxSize.setter
-    def persistingL2CacheMaxSize(self, int persistingL2CacheMaxSize):
-        self._ptr[0].persistingL2CacheMaxSize = persistingL2CacheMaxSize
-    @property
-    def maxThreadsPerMultiProcessor(self):
-        return self._ptr[0].maxThreadsPerMultiProcessor
-    @maxThreadsPerMultiProcessor.setter
-    def maxThreadsPerMultiProcessor(self, int maxThreadsPerMultiProcessor):
-        self._ptr[0].maxThreadsPerMultiProcessor = maxThreadsPerMultiProcessor
-    @property
-    def streamPrioritiesSupported(self):
-        return self._ptr[0].streamPrioritiesSupported
-    @streamPrioritiesSupported.setter
-    def streamPrioritiesSupported(self, int streamPrioritiesSupported):
-        self._ptr[0].streamPrioritiesSupported = streamPrioritiesSupported
-    @property
-    def globalL1CacheSupported(self):
-        return self._ptr[0].globalL1CacheSupported
-    @globalL1CacheSupported.setter
-    def globalL1CacheSupported(self, int globalL1CacheSupported):
-        self._ptr[0].globalL1CacheSupported = globalL1CacheSupported
-    @property
-    def localL1CacheSupported(self):
-        return self._ptr[0].localL1CacheSupported
-    @localL1CacheSupported.setter
-    def localL1CacheSupported(self, int localL1CacheSupported):
-        self._ptr[0].localL1CacheSupported = localL1CacheSupported
-    @property
-    def sharedMemPerMultiprocessor(self):
-        return self._ptr[0].sharedMemPerMultiprocessor
-    @sharedMemPerMultiprocessor.setter
-    def sharedMemPerMultiprocessor(self, size_t sharedMemPerMultiprocessor):
-        self._ptr[0].sharedMemPerMultiprocessor = sharedMemPerMultiprocessor
-    @property
-    def regsPerMultiprocessor(self):
-        return self._ptr[0].regsPerMultiprocessor
-    @regsPerMultiprocessor.setter
-    def regsPerMultiprocessor(self, int regsPerMultiprocessor):
-        self._ptr[0].regsPerMultiprocessor = regsPerMultiprocessor
-    @property
-    def managedMemory(self):
-        return self._ptr[0].managedMemory
-    @managedMemory.setter
-    def managedMemory(self, int managedMemory):
-        self._ptr[0].managedMemory = managedMemory
-    @property
-    def isMultiGpuBoard(self):
-        return self._ptr[0].isMultiGpuBoard
-    @isMultiGpuBoard.setter
-    def isMultiGpuBoard(self, int isMultiGpuBoard):
-        self._ptr[0].isMultiGpuBoard = isMultiGpuBoard
-    @property
-    def multiGpuBoardGroupID(self):
-        return self._ptr[0].multiGpuBoardGroupID
-    @multiGpuBoardGroupID.setter
-    def multiGpuBoardGroupID(self, int multiGpuBoardGroupID):
-        self._ptr[0].multiGpuBoardGroupID = multiGpuBoardGroupID
-    @property
-    def hostNativeAtomicSupported(self):
-        return self._ptr[0].hostNativeAtomicSupported
-    @hostNativeAtomicSupported.setter
-    def hostNativeAtomicSupported(self, int hostNativeAtomicSupported):
-        self._ptr[0].hostNativeAtomicSupported = hostNativeAtomicSupported
-    @property
-    def singleToDoublePrecisionPerfRatio(self):
-        return self._ptr[0].singleToDoublePrecisionPerfRatio
-    @singleToDoublePrecisionPerfRatio.setter
-    def singleToDoublePrecisionPerfRatio(self, int singleToDoublePrecisionPerfRatio):
-        self._ptr[0].singleToDoublePrecisionPerfRatio = singleToDoublePrecisionPerfRatio
-    @property
-    def pageableMemoryAccess(self):
-        return self._ptr[0].pageableMemoryAccess
-    @pageableMemoryAccess.setter
-    def pageableMemoryAccess(self, int pageableMemoryAccess):
-        self._ptr[0].pageableMemoryAccess = pageableMemoryAccess
-    @property
-    def concurrentManagedAccess(self):
-        return self._ptr[0].concurrentManagedAccess
-    @concurrentManagedAccess.setter
-    def concurrentManagedAccess(self, int concurrentManagedAccess):
-        self._ptr[0].concurrentManagedAccess = concurrentManagedAccess
-    @property
-    def computePreemptionSupported(self):
-        return self._ptr[0].computePreemptionSupported
-    @computePreemptionSupported.setter
-    def computePreemptionSupported(self, int computePreemptionSupported):
-        self._ptr[0].computePreemptionSupported = computePreemptionSupported
-    @property
-    def canUseHostPointerForRegisteredMem(self):
-        return self._ptr[0].canUseHostPointerForRegisteredMem
-    @canUseHostPointerForRegisteredMem.setter
-    def canUseHostPointerForRegisteredMem(self, int canUseHostPointerForRegisteredMem):
-        self._ptr[0].canUseHostPointerForRegisteredMem = canUseHostPointerForRegisteredMem
-    @property
-    def cooperativeLaunch(self):
-        return self._ptr[0].cooperativeLaunch
-    @cooperativeLaunch.setter
-    def cooperativeLaunch(self, int cooperativeLaunch):
-        self._ptr[0].cooperativeLaunch = cooperativeLaunch
-    @property
-    def cooperativeMultiDeviceLaunch(self):
-        return self._ptr[0].cooperativeMultiDeviceLaunch
-    @cooperativeMultiDeviceLaunch.setter
-    def cooperativeMultiDeviceLaunch(self, int cooperativeMultiDeviceLaunch):
-        self._ptr[0].cooperativeMultiDeviceLaunch = cooperativeMultiDeviceLaunch
-    @property
-    def sharedMemPerBlockOptin(self):
-        return self._ptr[0].sharedMemPerBlockOptin
-    @sharedMemPerBlockOptin.setter
-    def sharedMemPerBlockOptin(self, size_t sharedMemPerBlockOptin):
-        self._ptr[0].sharedMemPerBlockOptin = sharedMemPerBlockOptin
-    @property
-    def pageableMemoryAccessUsesHostPageTables(self):
-        return self._ptr[0].pageableMemoryAccessUsesHostPageTables
-    @pageableMemoryAccessUsesHostPageTables.setter
-    def pageableMemoryAccessUsesHostPageTables(self, int pageableMemoryAccessUsesHostPageTables):
-        self._ptr[0].pageableMemoryAccessUsesHostPageTables = pageableMemoryAccessUsesHostPageTables
-    @property
-    def directManagedMemAccessFromHost(self):
-        return self._ptr[0].directManagedMemAccessFromHost
-    @directManagedMemAccessFromHost.setter
-    def directManagedMemAccessFromHost(self, int directManagedMemAccessFromHost):
-        self._ptr[0].directManagedMemAccessFromHost = directManagedMemAccessFromHost
-    @property
-    def maxBlocksPerMultiProcessor(self):
-        return self._ptr[0].maxBlocksPerMultiProcessor
-    @maxBlocksPerMultiProcessor.setter
-    def maxBlocksPerMultiProcessor(self, int maxBlocksPerMultiProcessor):
-        self._ptr[0].maxBlocksPerMultiProcessor = maxBlocksPerMultiProcessor
-    @property
-    def accessPolicyMaxWindowSize(self):
-        return self._ptr[0].accessPolicyMaxWindowSize
-    @accessPolicyMaxWindowSize.setter
-    def accessPolicyMaxWindowSize(self, int accessPolicyMaxWindowSize):
-        self._ptr[0].accessPolicyMaxWindowSize = accessPolicyMaxWindowSize
-    @property
-    def reservedSharedMemPerBlock(self):
-        return self._ptr[0].reservedSharedMemPerBlock
-    @reservedSharedMemPerBlock.setter
-    def reservedSharedMemPerBlock(self, size_t reservedSharedMemPerBlock):
-        self._ptr[0].reservedSharedMemPerBlock = reservedSharedMemPerBlock
-    @property
-    def hostRegisterSupported(self):
-        return self._ptr[0].hostRegisterSupported
-    @hostRegisterSupported.setter
-    def hostRegisterSupported(self, int hostRegisterSupported):
-        self._ptr[0].hostRegisterSupported = hostRegisterSupported
-    @property
-    def sparseCudaArraySupported(self):
-        return self._ptr[0].sparseCudaArraySupported
-    @sparseCudaArraySupported.setter
-    def sparseCudaArraySupported(self, int sparseCudaArraySupported):
-        self._ptr[0].sparseCudaArraySupported = sparseCudaArraySupported
-    @property
-    def hostRegisterReadOnlySupported(self):
-        return self._ptr[0].hostRegisterReadOnlySupported
-    @hostRegisterReadOnlySupported.setter
-    def hostRegisterReadOnlySupported(self, int hostRegisterReadOnlySupported):
-        self._ptr[0].hostRegisterReadOnlySupported = hostRegisterReadOnlySupported
-    @property
-    def timelineSemaphoreInteropSupported(self):
-        return self._ptr[0].timelineSemaphoreInteropSupported
-    @timelineSemaphoreInteropSupported.setter
-    def timelineSemaphoreInteropSupported(self, int timelineSemaphoreInteropSupported):
-        self._ptr[0].timelineSemaphoreInteropSupported = timelineSemaphoreInteropSupported
-    @property
-    def memoryPoolsSupported(self):
-        return self._ptr[0].memoryPoolsSupported
-    @memoryPoolsSupported.setter
-    def memoryPoolsSupported(self, int memoryPoolsSupported):
-        self._ptr[0].memoryPoolsSupported = memoryPoolsSupported
-    @property
-    def gpuDirectRDMASupported(self):
-        return self._ptr[0].gpuDirectRDMASupported
-    @gpuDirectRDMASupported.setter
-    def gpuDirectRDMASupported(self, int gpuDirectRDMASupported):
-        self._ptr[0].gpuDirectRDMASupported = gpuDirectRDMASupported
-    @property
-    def gpuDirectRDMAFlushWritesOptions(self):
-        return self._ptr[0].gpuDirectRDMAFlushWritesOptions
-    @gpuDirectRDMAFlushWritesOptions.setter
-    def gpuDirectRDMAFlushWritesOptions(self, unsigned int gpuDirectRDMAFlushWritesOptions):
-        self._ptr[0].gpuDirectRDMAFlushWritesOptions = gpuDirectRDMAFlushWritesOptions
-    @property
-    def gpuDirectRDMAWritesOrdering(self):
-        return self._ptr[0].gpuDirectRDMAWritesOrdering
-    @gpuDirectRDMAWritesOrdering.setter
-    def gpuDirectRDMAWritesOrdering(self, int gpuDirectRDMAWritesOrdering):
-        self._ptr[0].gpuDirectRDMAWritesOrdering = gpuDirectRDMAWritesOrdering
-    @property
-    def memoryPoolSupportedHandleTypes(self):
-        return self._ptr[0].memoryPoolSupportedHandleTypes
-    @memoryPoolSupportedHandleTypes.setter
-    def memoryPoolSupportedHandleTypes(self, unsigned int memoryPoolSupportedHandleTypes):
-        self._ptr[0].memoryPoolSupportedHandleTypes = memoryPoolSupportedHandleTypes
-    @property
-    def deferredMappingCudaArraySupported(self):
-        return self._ptr[0].deferredMappingCudaArraySupported
-    @deferredMappingCudaArraySupported.setter
-    def deferredMappingCudaArraySupported(self, int deferredMappingCudaArraySupported):
-        self._ptr[0].deferredMappingCudaArraySupported = deferredMappingCudaArraySupported
-    @property
-    def ipcEventSupported(self):
-        return self._ptr[0].ipcEventSupported
-    @ipcEventSupported.setter
-    def ipcEventSupported(self, int ipcEventSupported):
-        self._ptr[0].ipcEventSupported = ipcEventSupported
-    @property
-    def clusterLaunch(self):
-        return self._ptr[0].clusterLaunch
-    @clusterLaunch.setter
-    def clusterLaunch(self, int clusterLaunch):
-        self._ptr[0].clusterLaunch = clusterLaunch
-    @property
-    def unifiedFunctionPointers(self):
-        return self._ptr[0].unifiedFunctionPointers
-    @unifiedFunctionPointers.setter
-    def unifiedFunctionPointers(self, int unifiedFunctionPointers):
-        self._ptr[0].unifiedFunctionPointers = unifiedFunctionPointers
-    @property
-    def reserved2(self):
-        return self._ptr[0].reserved2
-    @reserved2.setter
-    def reserved2(self, reserved2):
-        self._ptr[0].reserved2 = reserved2
-    @property
-    def reserved1(self):
-        return self._ptr[0].reserved1
-    @reserved1.setter
-    def reserved1(self, reserved1):
-        self._ptr[0].reserved1 = reserved1
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaIpcEventHandle_st' in found_types}}
-
-cdef class cudaIpcEventHandle_st:
-    """
-    CUDA IPC event handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaIpcEventHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(reserved):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].reserved[i] = b
-        else:
-            for i, b in enumerate(reserved):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaIpcMemHandle_st' in found_types}}
-
-cdef class cudaIpcMemHandle_st:
-    """
-    CUDA IPC memory handle
-
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaIpcMemHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(reserved):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].reserved[i] = b
-        else:
-            for i, b in enumerate(reserved):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaMemFabricHandle_st' in found_types}}
-
-cdef class cudaMemFabricHandle_st:
-    """
-    Attributes
-    ----------
-    reserved : bytes
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaMemFabricHandle_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].reserved, 64)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 64:
-            raise ValueError("reserved length must be 64, is " + str(len(reserved)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(reserved):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].reserved[i] = b
-        else:
-            for i, b in enumerate(reserved):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class anon_struct5:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle.win32
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + hex(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['name : ' + hex(self.name)]
-            except ValueError:
-                str_list += ['name : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return <void_ptr>self._ptr[0].handle.win32.handle
-    @handle.setter
-    def handle(self, handle):
-        _cyhandle = utils.HelperInputVoidPtr(handle)
-        self._ptr[0].handle.win32.handle = <void*><void_ptr>_cyhandle.cptr
-    @property
-    def name(self):
-        return <void_ptr>self._ptr[0].handle.win32.name
-    @name.setter
-    def name(self, name):
-        _cyname = utils.HelperInputVoidPtr(name)
-        self._ptr[0].handle.win32.name = <void*><void_ptr>_cyname.cptr
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class anon_union1:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct5
-
-    nvSciBufObject : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._win32 = anon_struct5(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fd : ' + str(self.fd)]
-            except ValueError:
-                str_list += ['fd : <ValueError>']
-            try:
-                str_list += ['win32 :\n' + '\n'.join(['    ' + line for line in str(self.win32).splitlines()])]
-            except ValueError:
-                str_list += ['win32 : <ValueError>']
-            try:
-                str_list += ['nvSciBufObject : ' + hex(self.nvSciBufObject)]
-            except ValueError:
-                str_list += ['nvSciBufObject : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fd(self):
-        return self._ptr[0].handle.fd
-    @fd.setter
-    def fd(self, int fd):
-        self._ptr[0].handle.fd = fd
-    @property
-    def win32(self):
-        return self._win32
-    @win32.setter
-    def win32(self, win32 not None : anon_struct5):
-        string.memcpy(&self._ptr[0].handle.win32, <cyruntime.anon_struct5*><void_ptr>win32.getPtr(), sizeof(self._ptr[0].handle.win32))
-    @property
-    def nvSciBufObject(self):
-        return <void_ptr>self._ptr[0].handle.nvSciBufObject
-    @nvSciBufObject.setter
-    def nvSciBufObject(self, nvSciBufObject):
-        _cynvSciBufObject = utils.HelperInputVoidPtr(nvSciBufObject)
-        self._ptr[0].handle.nvSciBufObject = <void*><void_ptr>_cynvSciBufObject.cptr
-{{endif}}
-{{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-
-cdef class cudaExternalMemoryHandleDesc:
-    """
-    External memory handle descriptor
-
-    Attributes
-    ----------
-    type : cudaExternalMemoryHandleType
-        Type of the handle
-    handle : anon_union1
-
-    size : unsigned long long
-        Size of the memory allocation
-    flags : unsigned int
-        Flags must either be zero or cudaExternalMemoryDedicated
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalMemoryHandleDesc))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = anon_union1(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['handle :\n' + '\n'.join(['    ' + line for line in str(self.handle).splitlines()])]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaExternalMemoryHandleType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaExternalMemoryHandleType):
-        self._ptr[0].type = type.value
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle not None : anon_union1):
-        string.memcpy(&self._ptr[0].handle, <cyruntime.anon_union1*><void_ptr>handle.getPtr(), sizeof(self._ptr[0].handle))
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned long long size):
-        self._ptr[0].size = size
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct cudaExternalMemoryBufferDesc' in found_types}}
-
-cdef class cudaExternalMemoryBufferDesc:
-    """
-    External memory buffer descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the buffer's base is
-    size : unsigned long long
-        Size of the buffer
-    flags : unsigned int
-        Flags reserved for future use. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalMemoryBufferDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def offset(self):
-        return self._ptr[0].offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].offset = offset
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned long long size):
-        self._ptr[0].size = size
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct cudaExternalMemoryMipmappedArrayDesc' in found_types}}
-
-cdef class cudaExternalMemoryMipmappedArrayDesc:
-    """
-    External memory mipmap descriptor
-
-    Attributes
-    ----------
-    offset : unsigned long long
-        Offset into the memory object where the base level of the mipmap
-        chain is.
-    formatDesc : cudaChannelFormatDesc
-        Format of base level of the mipmap chain
-    extent : cudaExtent
-        Dimensions of base level of the mipmap chain
-    flags : unsigned int
-        Flags associated with CUDA mipmapped arrays. See
-        cudaMallocMipmappedArray
-    numLevels : unsigned int
-        Total number of levels in the mipmap chain
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalMemoryMipmappedArrayDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._formatDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._ptr[0].formatDesc)
-        self._extent = cudaExtent(_ptr=<void_ptr>&self._ptr[0].extent)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['formatDesc :\n' + '\n'.join(['    ' + line for line in str(self.formatDesc).splitlines()])]
-            except ValueError:
-                str_list += ['formatDesc : <ValueError>']
-            try:
-                str_list += ['extent :\n' + '\n'.join(['    ' + line for line in str(self.extent).splitlines()])]
-            except ValueError:
-                str_list += ['extent : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['numLevels : ' + str(self.numLevels)]
-            except ValueError:
-                str_list += ['numLevels : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def offset(self):
-        return self._ptr[0].offset
-    @offset.setter
-    def offset(self, unsigned long long offset):
-        self._ptr[0].offset = offset
-    @property
-    def formatDesc(self):
-        return self._formatDesc
-    @formatDesc.setter
-    def formatDesc(self, formatDesc not None : cudaChannelFormatDesc):
-        string.memcpy(&self._ptr[0].formatDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>formatDesc.getPtr(), sizeof(self._ptr[0].formatDesc))
-    @property
-    def extent(self):
-        return self._extent
-    @extent.setter
-    def extent(self, extent not None : cudaExtent):
-        string.memcpy(&self._ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._ptr[0].extent))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def numLevels(self):
-        return self._ptr[0].numLevels
-    @numLevels.setter
-    def numLevels(self, unsigned int numLevels):
-        self._ptr[0].numLevels = numLevels
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class anon_struct6:
-    """
-    Attributes
-    ----------
-    handle : Any
-
-    name : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle.win32
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + hex(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['name : ' + hex(self.name)]
-            except ValueError:
-                str_list += ['name : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return <void_ptr>self._ptr[0].handle.win32.handle
-    @handle.setter
-    def handle(self, handle):
-        _cyhandle = utils.HelperInputVoidPtr(handle)
-        self._ptr[0].handle.win32.handle = <void*><void_ptr>_cyhandle.cptr
-    @property
-    def name(self):
-        return <void_ptr>self._ptr[0].handle.win32.name
-    @name.setter
-    def name(self, name):
-        _cyname = utils.HelperInputVoidPtr(name)
-        self._ptr[0].handle.win32.name = <void*><void_ptr>_cyname.cptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class anon_union2:
-    """
-    Attributes
-    ----------
-    fd : int
-
-    win32 : anon_struct6
-
-    nvSciSyncObj : Any
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._win32 = anon_struct6(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].handle
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fd : ' + str(self.fd)]
-            except ValueError:
-                str_list += ['fd : <ValueError>']
-            try:
-                str_list += ['win32 :\n' + '\n'.join(['    ' + line for line in str(self.win32).splitlines()])]
-            except ValueError:
-                str_list += ['win32 : <ValueError>']
-            try:
-                str_list += ['nvSciSyncObj : ' + hex(self.nvSciSyncObj)]
-            except ValueError:
-                str_list += ['nvSciSyncObj : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fd(self):
-        return self._ptr[0].handle.fd
-    @fd.setter
-    def fd(self, int fd):
-        self._ptr[0].handle.fd = fd
-    @property
-    def win32(self):
-        return self._win32
-    @win32.setter
-    def win32(self, win32 not None : anon_struct6):
-        string.memcpy(&self._ptr[0].handle.win32, <cyruntime.anon_struct6*><void_ptr>win32.getPtr(), sizeof(self._ptr[0].handle.win32))
-    @property
-    def nvSciSyncObj(self):
-        return <void_ptr>self._ptr[0].handle.nvSciSyncObj
-    @nvSciSyncObj.setter
-    def nvSciSyncObj(self, nvSciSyncObj):
-        _cynvSciSyncObj = utils.HelperInputVoidPtr(nvSciSyncObj)
-        self._ptr[0].handle.nvSciSyncObj = <void*><void_ptr>_cynvSciSyncObj.cptr
-{{endif}}
-{{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-
-cdef class cudaExternalSemaphoreHandleDesc:
-    """
-    External semaphore handle descriptor
-
-    Attributes
-    ----------
-    type : cudaExternalSemaphoreHandleType
-        Type of the handle
-    handle : anon_union2
-
-    flags : unsigned int
-        Flags reserved for the future. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalSemaphoreHandleDesc))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = anon_union2(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['handle :\n' + '\n'.join(['    ' + line for line in str(self.handle).splitlines()])]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaExternalSemaphoreHandleType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaExternalSemaphoreHandleType):
-        self._ptr[0].type = type.value
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle not None : anon_union2):
-        string.memcpy(&self._ptr[0].handle, <cyruntime.anon_union2*><void_ptr>handle.getPtr(), sizeof(self._ptr[0].handle))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct13:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.fence
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def value(self):
-        return self._ptr[0].params.fence.value
-    @value.setter
-    def value(self, unsigned long long value):
-        self._ptr[0].params.fence.value = value
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_union5:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.nvSciSync
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence : ' + hex(self.fence)]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return <void_ptr>self._ptr[0].params.nvSciSync.fence
-    @fence.setter
-    def fence(self, fence):
-        _cyfence = utils.HelperInputVoidPtr(fence)
-        self._ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cyfence.cptr
-    @property
-    def reserved(self):
-        return self._ptr[0].params.nvSciSync.reserved
-    @reserved.setter
-    def reserved(self, unsigned long long reserved):
-        self._ptr[0].params.nvSciSync.reserved = reserved
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct14:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.keyedMutex
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['key : ' + str(self.key)]
-            except ValueError:
-                str_list += ['key : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def key(self):
-        return self._ptr[0].params.keyedMutex.key
-    @key.setter
-    def key(self, unsigned long long key):
-        self._ptr[0].params.keyedMutex.key = key
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class anon_struct15:
-    """
-    Attributes
-    ----------
-    fence : anon_struct13
-
-    nvSciSync : anon_union5
-
-    keyedMutex : anon_struct14
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._fence = anon_struct13(_ptr=<void_ptr>self._ptr)
-        self._nvSciSync = anon_union5(_ptr=<void_ptr>self._ptr)
-        self._keyedMutex = anon_struct14(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence :\n' + '\n'.join(['    ' + line for line in str(self.fence).splitlines()])]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['nvSciSync :\n' + '\n'.join(['    ' + line for line in str(self.nvSciSync).splitlines()])]
-            except ValueError:
-                str_list += ['nvSciSync : <ValueError>']
-            try:
-                str_list += ['keyedMutex :\n' + '\n'.join(['    ' + line for line in str(self.keyedMutex).splitlines()])]
-            except ValueError:
-                str_list += ['keyedMutex : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return self._fence
-    @fence.setter
-    def fence(self, fence not None : anon_struct13):
-        string.memcpy(&self._ptr[0].params.fence, <cyruntime.anon_struct13*><void_ptr>fence.getPtr(), sizeof(self._ptr[0].params.fence))
-    @property
-    def nvSciSync(self):
-        return self._nvSciSync
-    @nvSciSync.setter
-    def nvSciSync(self, nvSciSync not None : anon_union5):
-        string.memcpy(&self._ptr[0].params.nvSciSync, <cyruntime.anon_union5*><void_ptr>nvSciSync.getPtr(), sizeof(self._ptr[0].params.nvSciSync))
-    @property
-    def keyedMutex(self):
-        return self._keyedMutex
-    @keyedMutex.setter
-    def keyedMutex(self, keyedMutex not None : anon_struct14):
-        string.memcpy(&self._ptr[0].params.keyedMutex, <cyruntime.anon_struct14*><void_ptr>keyedMutex.getPtr(), sizeof(self._ptr[0].params.keyedMutex))
-    @property
-    def reserved(self):
-        return self._ptr[0].params.reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].params.reserved = reserved
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalParams:
-    """
-    External semaphore signal parameters, compatible with driver type
-
-    Attributes
-    ----------
-    params : anon_struct15
-
-    flags : unsigned int
-        Only when cudaExternalSemaphoreSignalParams is used to signal a
-        cudaExternalSemaphore_t of type
-        cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
-        cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
-        that while signaling the cudaExternalSemaphore_t, no memory
-        synchronization operations should be performed for any external
-        memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
-        all other types of cudaExternalSemaphore_t, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._params = anon_struct15(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['params :\n' + '\n'.join(['    ' + line for line in str(self.params).splitlines()])]
-            except ValueError:
-                str_list += ['params : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def params(self):
-        return self._params
-    @params.setter
-    def params(self, params not None : anon_struct15):
-        string.memcpy(&self._ptr[0].params, <cyruntime.anon_struct15*><void_ptr>params.getPtr(), sizeof(self._ptr[0].params))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct16:
-    """
-    Attributes
-    ----------
-    value : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.fence
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['value : ' + str(self.value)]
-            except ValueError:
-                str_list += ['value : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def value(self):
-        return self._ptr[0].params.fence.value
-    @value.setter
-    def value(self, unsigned long long value):
-        self._ptr[0].params.fence.value = value
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_union6:
-    """
-    Attributes
-    ----------
-    fence : Any
-
-    reserved : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.nvSciSync
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence : ' + hex(self.fence)]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return <void_ptr>self._ptr[0].params.nvSciSync.fence
-    @fence.setter
-    def fence(self, fence):
-        _cyfence = utils.HelperInputVoidPtr(fence)
-        self._ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cyfence.cptr
-    @property
-    def reserved(self):
-        return self._ptr[0].params.nvSciSync.reserved
-    @reserved.setter
-    def reserved(self, unsigned long long reserved):
-        self._ptr[0].params.nvSciSync.reserved = reserved
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct17:
-    """
-    Attributes
-    ----------
-    key : unsigned long long
-
-    timeoutMs : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params.keyedMutex
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['key : ' + str(self.key)]
-            except ValueError:
-                str_list += ['key : <ValueError>']
-            try:
-                str_list += ['timeoutMs : ' + str(self.timeoutMs)]
-            except ValueError:
-                str_list += ['timeoutMs : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def key(self):
-        return self._ptr[0].params.keyedMutex.key
-    @key.setter
-    def key(self, unsigned long long key):
-        self._ptr[0].params.keyedMutex.key = key
-    @property
-    def timeoutMs(self):
-        return self._ptr[0].params.keyedMutex.timeoutMs
-    @timeoutMs.setter
-    def timeoutMs(self, unsigned int timeoutMs):
-        self._ptr[0].params.keyedMutex.timeoutMs = timeoutMs
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class anon_struct18:
-    """
-    Attributes
-    ----------
-    fence : anon_struct16
-
-    nvSciSync : anon_union6
-
-    keyedMutex : anon_struct17
-
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._fence = anon_struct16(_ptr=<void_ptr>self._ptr)
-        self._nvSciSync = anon_union6(_ptr=<void_ptr>self._ptr)
-        self._keyedMutex = anon_struct17(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].params
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['fence :\n' + '\n'.join(['    ' + line for line in str(self.fence).splitlines()])]
-            except ValueError:
-                str_list += ['fence : <ValueError>']
-            try:
-                str_list += ['nvSciSync :\n' + '\n'.join(['    ' + line for line in str(self.nvSciSync).splitlines()])]
-            except ValueError:
-                str_list += ['nvSciSync : <ValueError>']
-            try:
-                str_list += ['keyedMutex :\n' + '\n'.join(['    ' + line for line in str(self.keyedMutex).splitlines()])]
-            except ValueError:
-                str_list += ['keyedMutex : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def fence(self):
-        return self._fence
-    @fence.setter
-    def fence(self, fence not None : anon_struct16):
-        string.memcpy(&self._ptr[0].params.fence, <cyruntime.anon_struct16*><void_ptr>fence.getPtr(), sizeof(self._ptr[0].params.fence))
-    @property
-    def nvSciSync(self):
-        return self._nvSciSync
-    @nvSciSync.setter
-    def nvSciSync(self, nvSciSync not None : anon_union6):
-        string.memcpy(&self._ptr[0].params.nvSciSync, <cyruntime.anon_union6*><void_ptr>nvSciSync.getPtr(), sizeof(self._ptr[0].params.nvSciSync))
-    @property
-    def keyedMutex(self):
-        return self._keyedMutex
-    @keyedMutex.setter
-    def keyedMutex(self, keyedMutex not None : anon_struct17):
-        string.memcpy(&self._ptr[0].params.keyedMutex, <cyruntime.anon_struct17*><void_ptr>keyedMutex.getPtr(), sizeof(self._ptr[0].params.keyedMutex))
-    @property
-    def reserved(self):
-        return self._ptr[0].params.reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].params.reserved = reserved
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitParams:
-    """
-    External semaphore wait parameters, compatible with driver type
-
-    Attributes
-    ----------
-    params : anon_struct18
-
-    flags : unsigned int
-        Only when cudaExternalSemaphoreSignalParams is used to signal a
-        cudaExternalSemaphore_t of type
-        cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
-        cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
-        that while waiting for the cudaExternalSemaphore_t, no memory
-        synchronization operations should be performed for any external
-        memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
-        all other types of cudaExternalSemaphore_t, flags must be zero.
-    reserved : List[unsigned int]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._params = anon_struct18(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['params :\n' + '\n'.join(['    ' + line for line in str(self.params).splitlines()])]
-            except ValueError:
-                str_list += ['params : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def params(self):
-        return self._params
-    @params.setter
-    def params(self, params not None : anon_struct18):
-        string.memcpy(&self._ptr[0].params, <cyruntime.anon_struct18*><void_ptr>params.getPtr(), sizeof(self._ptr[0].params))
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned int flags):
-        self._ptr[0].flags = flags
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if 'struct cudaKernelNodeParams' in found_types}}
-
-cdef class cudaKernelNodeParams:
-    """
-    CUDA GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : Any
-        Kernel to launch
-    gridDim : dim3
-        Grid dimensions
-    blockDim : dim3
-        Block dimensions
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to individual kernel arguments
-    extra : Any
-        Pointer to kernel arguments in the "extra" format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaKernelNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._gridDim = dim3(_ptr=<void_ptr>&self._ptr[0].gridDim)
-        self._blockDim = dim3(_ptr=<void_ptr>&self._ptr[0].blockDim)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['func : ' + hex(self.func)]
-            except ValueError:
-                str_list += ['func : <ValueError>']
-            try:
-                str_list += ['gridDim :\n' + '\n'.join(['    ' + line for line in str(self.gridDim).splitlines()])]
-            except ValueError:
-                str_list += ['gridDim : <ValueError>']
-            try:
-                str_list += ['blockDim :\n' + '\n'.join(['    ' + line for line in str(self.blockDim).splitlines()])]
-            except ValueError:
-                str_list += ['blockDim : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            try:
-                str_list += ['extra : ' + str(self.extra)]
-            except ValueError:
-                str_list += ['extra : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def func(self):
-        return <void_ptr>self._ptr[0].func
-    @func.setter
-    def func(self, func):
-        _cyfunc = utils.HelperInputVoidPtr(func)
-        self._ptr[0].func = <void*><void_ptr>_cyfunc.cptr
-    @property
-    def gridDim(self):
-        return self._gridDim
-    @gridDim.setter
-    def gridDim(self, gridDim not None : dim3):
-        string.memcpy(&self._ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._ptr[0].gridDim))
-    @property
-    def blockDim(self):
-        return self._blockDim
-    @blockDim.setter
-    def blockDim(self, blockDim not None : dim3):
-        string.memcpy(&self._ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._ptr[0].blockDim))
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-    @property
-    def extra(self):
-        return <void_ptr>self._ptr[0].extra
-    @extra.setter
-    def extra(self, void_ptr extra):
-        self._ptr[0].extra = <void**>extra
-{{endif}}
-{{if 'struct cudaKernelNodeParamsV2' in found_types}}
-
-cdef class cudaKernelNodeParamsV2:
-    """
-    CUDA GPU kernel node parameters
-
-    Attributes
-    ----------
-    func : Any
-        Kernel to launch
-    gridDim : dim3
-        Grid dimensions
-    blockDim : dim3
-        Block dimensions
-    sharedMemBytes : unsigned int
-        Dynamic shared-memory size per thread block in bytes
-    kernelParams : Any
-        Array of pointers to individual kernel arguments
-    extra : Any
-        Pointer to kernel arguments in the "extra" format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaKernelNodeParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._gridDim = dim3(_ptr=<void_ptr>&self._ptr[0].gridDim)
-        self._blockDim = dim3(_ptr=<void_ptr>&self._ptr[0].blockDim)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['func : ' + hex(self.func)]
-            except ValueError:
-                str_list += ['func : <ValueError>']
-            try:
-                str_list += ['gridDim :\n' + '\n'.join(['    ' + line for line in str(self.gridDim).splitlines()])]
-            except ValueError:
-                str_list += ['gridDim : <ValueError>']
-            try:
-                str_list += ['blockDim :\n' + '\n'.join(['    ' + line for line in str(self.blockDim).splitlines()])]
-            except ValueError:
-                str_list += ['blockDim : <ValueError>']
-            try:
-                str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
-            except ValueError:
-                str_list += ['sharedMemBytes : <ValueError>']
-            try:
-                str_list += ['kernelParams : ' + str(self.kernelParams)]
-            except ValueError:
-                str_list += ['kernelParams : <ValueError>']
-            try:
-                str_list += ['extra : ' + str(self.extra)]
-            except ValueError:
-                str_list += ['extra : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def func(self):
-        return <void_ptr>self._ptr[0].func
-    @func.setter
-    def func(self, func):
-        _cyfunc = utils.HelperInputVoidPtr(func)
-        self._ptr[0].func = <void*><void_ptr>_cyfunc.cptr
-    @property
-    def gridDim(self):
-        return self._gridDim
-    @gridDim.setter
-    def gridDim(self, gridDim not None : dim3):
-        string.memcpy(&self._ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._ptr[0].gridDim))
-    @property
-    def blockDim(self):
-        return self._blockDim
-    @blockDim.setter
-    def blockDim(self, blockDim not None : dim3):
-        string.memcpy(&self._ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._ptr[0].blockDim))
-    @property
-    def sharedMemBytes(self):
-        return self._ptr[0].sharedMemBytes
-    @sharedMemBytes.setter
-    def sharedMemBytes(self, unsigned int sharedMemBytes):
-        self._ptr[0].sharedMemBytes = sharedMemBytes
-    @property
-    def kernelParams(self):
-        return <void_ptr>self._ptr[0].kernelParams
-    @kernelParams.setter
-    def kernelParams(self, kernelParams):
-        self._cykernelParams = utils.HelperKernelParams(kernelParams)
-        self._ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
-    @property
-    def extra(self):
-        return <void_ptr>self._ptr[0].extra
-    @extra.setter
-    def extra(self, void_ptr extra):
-        self._ptr[0].extra = <void**>extra
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalNodeParams' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalNodeParams:
-    """
-    External semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreSignalParams
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
-        return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]
-        return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct cudaExternalSemaphoreSignalNodeParamsV2' in found_types}}
-
-cdef class cudaExternalSemaphoreSignalNodeParamsV2:
-    """
-    External semaphore signal node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreSignalParams
-        Array of external semaphore signal parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
-        return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]
-        return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitNodeParams' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitNodeParams:
-    """
-    External semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreWaitParams
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
-        return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]
-        return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct cudaExternalSemaphoreWaitNodeParamsV2' in found_types}}
-
-cdef class cudaExternalSemaphoreWaitNodeParamsV2:
-    """
-    External semaphore wait node parameters
-
-    Attributes
-    ----------
-    extSemArray : cudaExternalSemaphore_t
-        Array of external semaphore handles.
-    paramsArray : cudaExternalSemaphoreWaitParams
-        Array of external semaphore wait parameters.
-    numExtSems : unsigned int
-        Number of handles and parameters supplied in extSemArray and
-        paramsArray.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2 *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        if self._extSemArray is not NULL:
-            free(self._extSemArray)
-        if self._paramsArray is not NULL:
-            free(self._paramsArray)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['extSemArray : ' + str(self.extSemArray)]
-            except ValueError:
-                str_list += ['extSemArray : <ValueError>']
-            try:
-                str_list += ['paramsArray : ' + str(self.paramsArray)]
-            except ValueError:
-                str_list += ['paramsArray : <ValueError>']
-            try:
-                str_list += ['numExtSems : ' + str(self.numExtSems)]
-            except ValueError:
-                str_list += ['numExtSems : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def extSemArray(self):
-        arrs = [<void_ptr>self._ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
-        return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
-    @extSemArray.setter
-    def extSemArray(self, val):
-        if len(val) == 0:
-            free(self._extSemArray)
-            self._extSemArray_length = 0
-            self._ptr[0].extSemArray = NULL
-        else:
-            if self._extSemArray_length != <size_t>len(val):
-                free(self._extSemArray)
-                self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
-                if self._extSemArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-                self._extSemArray_length = <size_t>len(val)
-                self._ptr[0].extSemArray = self._extSemArray
-            for idx in range(len(val)):
-                self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._ptr[0]
-
-    @property
-    def paramsArray(self):
-        arrs = [<void_ptr>self._ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]
-        return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]
-    @paramsArray.setter
-    def paramsArray(self, val):
-        if len(val) == 0:
-            free(self._paramsArray)
-            self._paramsArray_length = 0
-            self._ptr[0].paramsArray = NULL
-        else:
-            if self._paramsArray_length != <size_t>len(val):
-                free(self._paramsArray)
-                self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-                if self._paramsArray is NULL:
-                    raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
-                self._paramsArray_length = <size_t>len(val)
-                self._ptr[0].paramsArray = self._paramsArray
-            for idx in range(len(val)):
-                string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-
-    @property
-    def numExtSems(self):
-        return self._ptr[0].numExtSems
-    @numExtSems.setter
-    def numExtSems(self, unsigned int numExtSems):
-        self._ptr[0].numExtSems = numExtSems
-{{endif}}
-{{if 'struct cudaConditionalNodeParams' in found_types}}
-
-cdef class cudaConditionalNodeParams:
-    """
-    CUDA conditional node parameters
-
-    Attributes
-    ----------
-    handle : cudaGraphConditionalHandle
-        Conditional node handle. Handles must be created in advance of
-        creating the node using cudaGraphConditionalHandleCreate.
-    type : cudaGraphConditionalNodeType
-        Type of conditional node.
-    size : unsigned int
-        Size of graph output array. Must be 1.
-    phGraph_out : cudaGraph_t
-        CUDA-owned array populated with conditional node child graphs
-        during creation of the node. Valid for the lifetime of the
-        conditional node. The contents of the graph(s) are subject to the
-        following constraints:   - Allowed node types are kernel nodes,
-        empty nodes, child graphs, memsets, memcopies, and conditionals.
-        This applies recursively to child graphs and conditional bodies.
-        - All kernels, including kernels in nested conditionals or child
-        graphs at any level, must belong to the same CUDA context.
-        These graphs may be populated using graph node creation APIs or
-        cudaStreamBeginCaptureToGraph.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaConditionalNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._handle = cudaGraphConditionalHandle(_ptr=<void_ptr>&self._ptr[0].handle)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['handle : ' + str(self.handle)]
-            except ValueError:
-                str_list += ['handle : <ValueError>']
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            try:
-                str_list += ['phGraph_out : ' + str(self.phGraph_out)]
-            except ValueError:
-                str_list += ['phGraph_out : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def handle(self):
-        return self._handle
-    @handle.setter
-    def handle(self, handle):
-        cdef cyruntime.cudaGraphConditionalHandle cyhandle
-        if handle is None:
-            cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>0
-        elif isinstance(handle, (cudaGraphConditionalHandle)):
-            phandle = int(handle)
-            cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle
-        else:
-            phandle = int(cudaGraphConditionalHandle(handle))
-            cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle
-        self._handle._ptr[0] = cyhandle
-
-    @property
-    def type(self):
-        return cudaGraphConditionalNodeType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaGraphConditionalNodeType):
-        self._ptr[0].type = type.value
-    @property
-    def size(self):
-        return self._ptr[0].size
-    @size.setter
-    def size(self, unsigned int size):
-        self._ptr[0].size = size
-    @property
-    def phGraph_out(self):
-        arrs = [<void_ptr>self._ptr[0].phGraph_out + x*sizeof(cyruntime.cudaGraph_t) for x in range(self.size)]
-        return [cudaGraph_t(_ptr=arr) for arr in arrs]
-{{endif}}
-{{if 'struct cudaChildGraphNodeParams' in found_types}}
-
-cdef class cudaChildGraphNodeParams:
-    """
-    Child graph node parameters
-
-    Attributes
-    ----------
-    graph : cudaGraph_t
-        The child graph to clone into the node for node creation, or a
-        handle to the graph owned by the node for node query
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaChildGraphNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._graph = cudaGraph_t(_ptr=<void_ptr>&self._ptr[0].graph)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['graph : ' + str(self.graph)]
-            except ValueError:
-                str_list += ['graph : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def graph(self):
-        return self._graph
-    @graph.setter
-    def graph(self, graph):
-        cdef cyruntime.cudaGraph_t cygraph
-        if graph is None:
-            cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-        elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-            pgraph = int(graph)
-            cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-        else:
-            pgraph = int(cudaGraph_t(graph))
-            cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-        self._graph._ptr[0] = cygraph
-{{endif}}
-{{if 'struct cudaEventRecordNodeParams' in found_types}}
-
-cdef class cudaEventRecordNodeParams:
-    """
-    Event record node parameters
-
-    Attributes
-    ----------
-    event : cudaEvent_t
-        The event to record when the node executes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaEventRecordNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._event = cudaEvent_t(_ptr=<void_ptr>&self._ptr[0].event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cyruntime.cudaEvent_t cyevent
-        if event is None:
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-        elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-            pevent = int(event)
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        else:
-            pevent = int(cudaEvent_t(event))
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-{{endif}}
-{{if 'struct cudaEventWaitNodeParams' in found_types}}
-
-cdef class cudaEventWaitNodeParams:
-    """
-    Event wait node parameters
-
-    Attributes
-    ----------
-    event : cudaEvent_t
-        The event to wait on from the node
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaEventWaitNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._event = cudaEvent_t(_ptr=<void_ptr>&self._ptr[0].event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cyruntime.cudaEvent_t cyevent
-        if event is None:
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-        elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-            pevent = int(event)
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        else:
-            pevent = int(cudaEvent_t(event))
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-{{endif}}
-{{if 'struct cudaGraphNodeParams' in found_types}}
-
-cdef class cudaGraphNodeParams:
-    """
-    Graph node parameters. See cudaGraphAddNode.
-
-    Attributes
-    ----------
-    type : cudaGraphNodeType
-        Type of the node
-    reserved0 : List[int]
-        Reserved. Must be zero.
-    reserved1 : List[long long]
-        Padding. Unused bytes must be zero.
-    kernel : cudaKernelNodeParamsV2
-        Kernel node parameters.
-    memcpy : cudaMemcpyNodeParams
-        Memcpy node parameters.
-    memset : cudaMemsetParamsV2
-        Memset node parameters.
-    host : cudaHostNodeParamsV2
-        Host node parameters.
-    graph : cudaChildGraphNodeParams
-        Child graph node parameters.
-    eventWait : cudaEventWaitNodeParams
-        Event wait node parameters.
-    eventRecord : cudaEventRecordNodeParams
-        Event record node parameters.
-    extSemSignal : cudaExternalSemaphoreSignalNodeParamsV2
-        External semaphore signal node parameters.
-    extSemWait : cudaExternalSemaphoreWaitNodeParamsV2
-        External semaphore wait node parameters.
-    alloc : cudaMemAllocNodeParamsV2
-        Memory allocation node parameters.
-    free : cudaMemFreeNodeParams
-        Memory free node parameters.
-    conditional : cudaConditionalNodeParams
-        Conditional node parameters.
-    reserved2 : long long
-        Reserved bytes. Must be zero.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaGraphNodeParams *>calloc(1, sizeof(cyruntime.cudaGraphNodeParams))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaGraphNodeParams *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._kernel = cudaKernelNodeParamsV2(_ptr=<void_ptr>&self._ptr[0].kernel)
-        self._memcpy = cudaMemcpyNodeParams(_ptr=<void_ptr>&self._ptr[0].memcpy)
-        self._memset = cudaMemsetParamsV2(_ptr=<void_ptr>&self._ptr[0].memset)
-        self._host = cudaHostNodeParamsV2(_ptr=<void_ptr>&self._ptr[0].host)
-        self._graph = cudaChildGraphNodeParams(_ptr=<void_ptr>&self._ptr[0].graph)
-        self._eventWait = cudaEventWaitNodeParams(_ptr=<void_ptr>&self._ptr[0].eventWait)
-        self._eventRecord = cudaEventRecordNodeParams(_ptr=<void_ptr>&self._ptr[0].eventRecord)
-        self._extSemSignal = cudaExternalSemaphoreSignalNodeParamsV2(_ptr=<void_ptr>&self._ptr[0].extSemSignal)
-        self._extSemWait = cudaExternalSemaphoreWaitNodeParamsV2(_ptr=<void_ptr>&self._ptr[0].extSemWait)
-        self._alloc = cudaMemAllocNodeParamsV2(_ptr=<void_ptr>&self._ptr[0].alloc)
-        self._free = cudaMemFreeNodeParams(_ptr=<void_ptr>&self._ptr[0].free)
-        self._conditional = cudaConditionalNodeParams(_ptr=<void_ptr>&self._ptr[0].conditional)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['reserved0 : ' + str(self.reserved0)]
-            except ValueError:
-                str_list += ['reserved0 : <ValueError>']
-            try:
-                str_list += ['reserved1 : ' + str(self.reserved1)]
-            except ValueError:
-                str_list += ['reserved1 : <ValueError>']
-            try:
-                str_list += ['kernel :\n' + '\n'.join(['    ' + line for line in str(self.kernel).splitlines()])]
-            except ValueError:
-                str_list += ['kernel : <ValueError>']
-            try:
-                str_list += ['memcpy :\n' + '\n'.join(['    ' + line for line in str(self.memcpy).splitlines()])]
-            except ValueError:
-                str_list += ['memcpy : <ValueError>']
-            try:
-                str_list += ['memset :\n' + '\n'.join(['    ' + line for line in str(self.memset).splitlines()])]
-            except ValueError:
-                str_list += ['memset : <ValueError>']
-            try:
-                str_list += ['host :\n' + '\n'.join(['    ' + line for line in str(self.host).splitlines()])]
-            except ValueError:
-                str_list += ['host : <ValueError>']
-            try:
-                str_list += ['graph :\n' + '\n'.join(['    ' + line for line in str(self.graph).splitlines()])]
-            except ValueError:
-                str_list += ['graph : <ValueError>']
-            try:
-                str_list += ['eventWait :\n' + '\n'.join(['    ' + line for line in str(self.eventWait).splitlines()])]
-            except ValueError:
-                str_list += ['eventWait : <ValueError>']
-            try:
-                str_list += ['eventRecord :\n' + '\n'.join(['    ' + line for line in str(self.eventRecord).splitlines()])]
-            except ValueError:
-                str_list += ['eventRecord : <ValueError>']
-            try:
-                str_list += ['extSemSignal :\n' + '\n'.join(['    ' + line for line in str(self.extSemSignal).splitlines()])]
-            except ValueError:
-                str_list += ['extSemSignal : <ValueError>']
-            try:
-                str_list += ['extSemWait :\n' + '\n'.join(['    ' + line for line in str(self.extSemWait).splitlines()])]
-            except ValueError:
-                str_list += ['extSemWait : <ValueError>']
-            try:
-                str_list += ['alloc :\n' + '\n'.join(['    ' + line for line in str(self.alloc).splitlines()])]
-            except ValueError:
-                str_list += ['alloc : <ValueError>']
-            try:
-                str_list += ['free :\n' + '\n'.join(['    ' + line for line in str(self.free).splitlines()])]
-            except ValueError:
-                str_list += ['free : <ValueError>']
-            try:
-                str_list += ['conditional :\n' + '\n'.join(['    ' + line for line in str(self.conditional).splitlines()])]
-            except ValueError:
-                str_list += ['conditional : <ValueError>']
-            try:
-                str_list += ['reserved2 : ' + str(self.reserved2)]
-            except ValueError:
-                str_list += ['reserved2 : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaGraphNodeType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaGraphNodeType):
-        self._ptr[0].type = type.value
-    @property
-    def reserved0(self):
-        return self._ptr[0].reserved0
-    @reserved0.setter
-    def reserved0(self, reserved0):
-        self._ptr[0].reserved0 = reserved0
-    @property
-    def reserved1(self):
-        return self._ptr[0].reserved1
-    @reserved1.setter
-    def reserved1(self, reserved1):
-        self._ptr[0].reserved1 = reserved1
-    @property
-    def kernel(self):
-        return self._kernel
-    @kernel.setter
-    def kernel(self, kernel not None : cudaKernelNodeParamsV2):
-        string.memcpy(&self._ptr[0].kernel, <cyruntime.cudaKernelNodeParamsV2*><void_ptr>kernel.getPtr(), sizeof(self._ptr[0].kernel))
-    @property
-    def memcpy(self):
-        return self._memcpy
-    @memcpy.setter
-    def memcpy(self, memcpy not None : cudaMemcpyNodeParams):
-        string.memcpy(&self._ptr[0].memcpy, <cyruntime.cudaMemcpyNodeParams*><void_ptr>memcpy.getPtr(), sizeof(self._ptr[0].memcpy))
-    @property
-    def memset(self):
-        return self._memset
-    @memset.setter
-    def memset(self, memset not None : cudaMemsetParamsV2):
-        string.memcpy(&self._ptr[0].memset, <cyruntime.cudaMemsetParamsV2*><void_ptr>memset.getPtr(), sizeof(self._ptr[0].memset))
-    @property
-    def host(self):
-        return self._host
-    @host.setter
-    def host(self, host not None : cudaHostNodeParamsV2):
-        string.memcpy(&self._ptr[0].host, <cyruntime.cudaHostNodeParamsV2*><void_ptr>host.getPtr(), sizeof(self._ptr[0].host))
-    @property
-    def graph(self):
-        return self._graph
-    @graph.setter
-    def graph(self, graph not None : cudaChildGraphNodeParams):
-        string.memcpy(&self._ptr[0].graph, <cyruntime.cudaChildGraphNodeParams*><void_ptr>graph.getPtr(), sizeof(self._ptr[0].graph))
-    @property
-    def eventWait(self):
-        return self._eventWait
-    @eventWait.setter
-    def eventWait(self, eventWait not None : cudaEventWaitNodeParams):
-        string.memcpy(&self._ptr[0].eventWait, <cyruntime.cudaEventWaitNodeParams*><void_ptr>eventWait.getPtr(), sizeof(self._ptr[0].eventWait))
-    @property
-    def eventRecord(self):
-        return self._eventRecord
-    @eventRecord.setter
-    def eventRecord(self, eventRecord not None : cudaEventRecordNodeParams):
-        string.memcpy(&self._ptr[0].eventRecord, <cyruntime.cudaEventRecordNodeParams*><void_ptr>eventRecord.getPtr(), sizeof(self._ptr[0].eventRecord))
-    @property
-    def extSemSignal(self):
-        return self._extSemSignal
-    @extSemSignal.setter
-    def extSemSignal(self, extSemSignal not None : cudaExternalSemaphoreSignalNodeParamsV2):
-        string.memcpy(&self._ptr[0].extSemSignal, <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2*><void_ptr>extSemSignal.getPtr(), sizeof(self._ptr[0].extSemSignal))
-    @property
-    def extSemWait(self):
-        return self._extSemWait
-    @extSemWait.setter
-    def extSemWait(self, extSemWait not None : cudaExternalSemaphoreWaitNodeParamsV2):
-        string.memcpy(&self._ptr[0].extSemWait, <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2*><void_ptr>extSemWait.getPtr(), sizeof(self._ptr[0].extSemWait))
-    @property
-    def alloc(self):
-        return self._alloc
-    @alloc.setter
-    def alloc(self, alloc not None : cudaMemAllocNodeParamsV2):
-        string.memcpy(&self._ptr[0].alloc, <cyruntime.cudaMemAllocNodeParamsV2*><void_ptr>alloc.getPtr(), sizeof(self._ptr[0].alloc))
-    @property
-    def free(self):
-        return self._free
-    @free.setter
-    def free(self, free not None : cudaMemFreeNodeParams):
-        string.memcpy(&self._ptr[0].free, <cyruntime.cudaMemFreeNodeParams*><void_ptr>free.getPtr(), sizeof(self._ptr[0].free))
-    @property
-    def conditional(self):
-        return self._conditional
-    @conditional.setter
-    def conditional(self, conditional not None : cudaConditionalNodeParams):
-        string.memcpy(&self._ptr[0].conditional, <cyruntime.cudaConditionalNodeParams*><void_ptr>conditional.getPtr(), sizeof(self._ptr[0].conditional))
-    @property
-    def reserved2(self):
-        return self._ptr[0].reserved2
-    @reserved2.setter
-    def reserved2(self, long long reserved2):
-        self._ptr[0].reserved2 = reserved2
-{{endif}}
-{{if 'struct cudaGraphEdgeData_st' in found_types}}
-
-cdef class cudaGraphEdgeData_st:
-    """
-    Optional annotation for edges in a CUDA graph. Note, all edges
-    implicitly have annotations and default to a zero-initialized value
-    if not specified. A zero-initialized struct indicates a standard
-    full serialization of two nodes with memory visibility.
-
-    Attributes
-    ----------
-    from_port : bytes
-        This indicates when the dependency is triggered from the upstream
-        node on the edge. The meaning is specfic to the node type. A value
-        of 0 in all cases means full completion of the upstream node, with
-        memory visibility to the downstream node or portion thereof
-        (indicated by `to_port`).   Only kernel nodes define non-zero
-        ports. A kernel node can use the following output port types:
-        cudaGraphKernelNodePortDefault,
-        cudaGraphKernelNodePortProgrammatic, or
-        cudaGraphKernelNodePortLaunchCompletion.
-    to_port : bytes
-        This indicates what portion of the downstream node is dependent on
-        the upstream node or portion thereof (indicated by `from_port`).
-        The meaning is specific to the node type. A value of 0 in all cases
-        means the entirety of the downstream node is dependent on the
-        upstream work.   Currently no node types define non-zero ports.
-        Accordingly, this field must be set to zero.
-    type : bytes
-        This should be populated with a value from
-        ::cudaGraphDependencyType. (It is typed as char due to compiler-
-        specific layout of bitfields.) See ::cudaGraphDependencyType.
-    reserved : bytes
-        These bytes are unused and must be zeroed. This ensures
-        compatibility if additional fields are added in the future.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaGraphEdgeData_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['from_port : ' + str(self.from_port)]
-            except ValueError:
-                str_list += ['from_port : <ValueError>']
-            try:
-                str_list += ['to_port : ' + str(self.to_port)]
-            except ValueError:
-                str_list += ['to_port : <ValueError>']
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def from_port(self):
-        return self._ptr[0].from_port
-    @from_port.setter
-    def from_port(self, unsigned char from_port):
-        self._ptr[0].from_port = from_port
-    @property
-    def to_port(self):
-        return self._ptr[0].to_port
-    @to_port.setter
-    def to_port(self, unsigned char to_port):
-        self._ptr[0].to_port = to_port
-    @property
-    def type(self):
-        return self._ptr[0].type
-    @type.setter
-    def type(self, unsigned char type):
-        self._ptr[0].type = type
-    @property
-    def reserved(self):
-        return PyBytes_FromStringAndSize(<char*>self._ptr[0].reserved, 5)
-    @reserved.setter
-    def reserved(self, reserved):
-        if len(reserved) != 5:
-            raise ValueError("reserved length must be 5, is " + str(len(reserved)))
-        for i, b in enumerate(reserved):
-            self._ptr[0].reserved[i] = b
-{{endif}}
-{{if 'struct cudaGraphInstantiateParams_st' in found_types}}
-
-cdef class cudaGraphInstantiateParams_st:
-    """
-    Graph instantiation parameters
-
-    Attributes
-    ----------
-    flags : unsigned long long
-        Instantiation flags
-    uploadStream : cudaStream_t
-        Upload stream
-    errNode_out : cudaGraphNode_t
-        The node which caused instantiation to fail, if any
-    result_out : cudaGraphInstantiateResult
-        Whether instantiation was successful. If it failed, the reason why
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaGraphInstantiateParams_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._uploadStream = cudaStream_t(_ptr=<void_ptr>&self._ptr[0].uploadStream)
-        self._errNode_out = cudaGraphNode_t(_ptr=<void_ptr>&self._ptr[0].errNode_out)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['uploadStream : ' + str(self.uploadStream)]
-            except ValueError:
-                str_list += ['uploadStream : <ValueError>']
-            try:
-                str_list += ['errNode_out : ' + str(self.errNode_out)]
-            except ValueError:
-                str_list += ['errNode_out : <ValueError>']
-            try:
-                str_list += ['result_out : ' + str(self.result_out)]
-            except ValueError:
-                str_list += ['result_out : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def flags(self):
-        return self._ptr[0].flags
-    @flags.setter
-    def flags(self, unsigned long long flags):
-        self._ptr[0].flags = flags
-    @property
-    def uploadStream(self):
-        return self._uploadStream
-    @uploadStream.setter
-    def uploadStream(self, uploadStream):
-        cdef cyruntime.cudaStream_t cyuploadStream
-        if uploadStream is None:
-            cyuploadStream = <cyruntime.cudaStream_t><void_ptr>0
-        elif isinstance(uploadStream, (cudaStream_t,driver.CUstream)):
-            puploadStream = int(uploadStream)
-            cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream
-        else:
-            puploadStream = int(cudaStream_t(uploadStream))
-            cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream
-        self._uploadStream._ptr[0] = cyuploadStream
-    @property
-    def errNode_out(self):
-        return self._errNode_out
-    @errNode_out.setter
-    def errNode_out(self, errNode_out):
-        cdef cyruntime.cudaGraphNode_t cyerrNode_out
-        if errNode_out is None:
-            cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>0
-        elif isinstance(errNode_out, (cudaGraphNode_t,driver.CUgraphNode)):
-            perrNode_out = int(errNode_out)
-            cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out
-        else:
-            perrNode_out = int(cudaGraphNode_t(errNode_out))
-            cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out
-        self._errNode_out._ptr[0] = cyerrNode_out
-    @property
-    def result_out(self):
-        return cudaGraphInstantiateResult(self._ptr[0].result_out)
-    @result_out.setter
-    def result_out(self, result_out not None : cudaGraphInstantiateResult):
-        self._ptr[0].result_out = result_out.value
-{{endif}}
-{{if 'struct cudaGraphExecUpdateResultInfo_st' in found_types}}
-
-cdef class cudaGraphExecUpdateResultInfo_st:
-    """
-    Result information returned by cudaGraphExecUpdate
-
-    Attributes
-    ----------
-    result : cudaGraphExecUpdateResult
-        Gives more specific detail when a cuda graph update fails.
-    errorNode : cudaGraphNode_t
-        The "to node" of the error edge when the topologies do not match.
-        The error node when the error is associated with a specific node.
-        NULL when the error is generic.
-    errorFromNode : cudaGraphNode_t
-        The from node of error edge when the topologies do not match.
-        Otherwise NULL.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaGraphExecUpdateResultInfo_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._errorNode = cudaGraphNode_t(_ptr=<void_ptr>&self._ptr[0].errorNode)
-        self._errorFromNode = cudaGraphNode_t(_ptr=<void_ptr>&self._ptr[0].errorFromNode)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['result : ' + str(self.result)]
-            except ValueError:
-                str_list += ['result : <ValueError>']
-            try:
-                str_list += ['errorNode : ' + str(self.errorNode)]
-            except ValueError:
-                str_list += ['errorNode : <ValueError>']
-            try:
-                str_list += ['errorFromNode : ' + str(self.errorFromNode)]
-            except ValueError:
-                str_list += ['errorFromNode : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def result(self):
-        return cudaGraphExecUpdateResult(self._ptr[0].result)
-    @result.setter
-    def result(self, result not None : cudaGraphExecUpdateResult):
-        self._ptr[0].result = result.value
-    @property
-    def errorNode(self):
-        return self._errorNode
-    @errorNode.setter
-    def errorNode(self, errorNode):
-        cdef cyruntime.cudaGraphNode_t cyerrorNode
-        if errorNode is None:
-            cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-        elif isinstance(errorNode, (cudaGraphNode_t,driver.CUgraphNode)):
-            perrorNode = int(errorNode)
-            cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode
-        else:
-            perrorNode = int(cudaGraphNode_t(errorNode))
-            cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode
-        self._errorNode._ptr[0] = cyerrorNode
-    @property
-    def errorFromNode(self):
-        return self._errorFromNode
-    @errorFromNode.setter
-    def errorFromNode(self, errorFromNode):
-        cdef cyruntime.cudaGraphNode_t cyerrorFromNode
-        if errorFromNode is None:
-            cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-        elif isinstance(errorFromNode, (cudaGraphNode_t,driver.CUgraphNode)):
-            perrorFromNode = int(errorFromNode)
-            cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode
-        else:
-            perrorFromNode = int(cudaGraphNode_t(errorFromNode))
-            cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode
-        self._errorFromNode._ptr[0] = cyerrorFromNode
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class anon_struct19:
-    """
-    Attributes
-    ----------
-    pValue : Any
-
-    offset : size_t
-
-    size : size_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].updateData.param
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['pValue : ' + hex(self.pValue)]
-            except ValueError:
-                str_list += ['pValue : <ValueError>']
-            try:
-                str_list += ['offset : ' + str(self.offset)]
-            except ValueError:
-                str_list += ['offset : <ValueError>']
-            try:
-                str_list += ['size : ' + str(self.size)]
-            except ValueError:
-                str_list += ['size : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def pValue(self):
-        return <void_ptr>self._ptr[0].updateData.param.pValue
-    @pValue.setter
-    def pValue(self, pValue):
-        _cypValue = utils.HelperInputVoidPtr(pValue)
-        self._ptr[0].updateData.param.pValue = <void*><void_ptr>_cypValue.cptr
-    @property
-    def offset(self):
-        return self._ptr[0].updateData.param.offset
-    @offset.setter
-    def offset(self, size_t offset):
-        self._ptr[0].updateData.param.offset = offset
-    @property
-    def size(self):
-        return self._ptr[0].updateData.param.size
-    @size.setter
-    def size(self, size_t size):
-        self._ptr[0].updateData.param.size = size
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class anon_union8:
-    """
-    Attributes
-    ----------
-    gridDim : dim3
-
-    param : anon_struct19
-
-    isEnabled : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._gridDim = dim3(_ptr=<void_ptr>&self._ptr[0].updateData.gridDim)
-        self._param = anon_struct19(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].updateData
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['gridDim :\n' + '\n'.join(['    ' + line for line in str(self.gridDim).splitlines()])]
-            except ValueError:
-                str_list += ['gridDim : <ValueError>']
-            try:
-                str_list += ['param :\n' + '\n'.join(['    ' + line for line in str(self.param).splitlines()])]
-            except ValueError:
-                str_list += ['param : <ValueError>']
-            try:
-                str_list += ['isEnabled : ' + str(self.isEnabled)]
-            except ValueError:
-                str_list += ['isEnabled : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def gridDim(self):
-        return self._gridDim
-    @gridDim.setter
-    def gridDim(self, gridDim not None : dim3):
-        string.memcpy(&self._ptr[0].updateData.gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._ptr[0].updateData.gridDim))
-    @property
-    def param(self):
-        return self._param
-    @param.setter
-    def param(self, param not None : anon_struct19):
-        string.memcpy(&self._ptr[0].updateData.param, <cyruntime.anon_struct19*><void_ptr>param.getPtr(), sizeof(self._ptr[0].updateData.param))
-    @property
-    def isEnabled(self):
-        return self._ptr[0].updateData.isEnabled
-    @isEnabled.setter
-    def isEnabled(self, unsigned int isEnabled):
-        self._ptr[0].updateData.isEnabled = isEnabled
-{{endif}}
-{{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-
-cdef class cudaGraphKernelNodeUpdate:
-    """
-    Struct to specify a single node update to pass as part of a larger
-    array to ::cudaGraphKernelNodeUpdatesApply
-
-    Attributes
-    ----------
-    node : cudaGraphDeviceNode_t
-        Node to update
-    field : cudaGraphKernelNodeField
-        Which type of update to apply. Determines how updateData is
-        interpreted
-    updateData : anon_union8
-        Update data to apply. Which field is used depends on field's value
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>calloc(1, sizeof(cyruntime.cudaGraphKernelNodeUpdate))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._node = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._ptr[0].node)
-        self._updateData = anon_union8(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['node : ' + str(self.node)]
-            except ValueError:
-                str_list += ['node : <ValueError>']
-            try:
-                str_list += ['field : ' + str(self.field)]
-            except ValueError:
-                str_list += ['field : <ValueError>']
-            try:
-                str_list += ['updateData :\n' + '\n'.join(['    ' + line for line in str(self.updateData).splitlines()])]
-            except ValueError:
-                str_list += ['updateData : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def node(self):
-        return self._node
-    @node.setter
-    def node(self, node):
-        cdef cyruntime.cudaGraphDeviceNode_t cynode
-        if node is None:
-            cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0
-        elif isinstance(node, (cudaGraphDeviceNode_t,)):
-            pnode = int(node)
-            cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode
-        else:
-            pnode = int(cudaGraphDeviceNode_t(node))
-            cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode
-        self._node._ptr[0] = cynode
-    @property
-    def field(self):
-        return cudaGraphKernelNodeField(self._ptr[0].field)
-    @field.setter
-    def field(self, field not None : cudaGraphKernelNodeField):
-        self._ptr[0].field = field.value
-    @property
-    def updateData(self):
-        return self._updateData
-    @updateData.setter
-    def updateData(self, updateData not None : anon_union8):
-        string.memcpy(&self._ptr[0].updateData, <cyruntime.anon_union8*><void_ptr>updateData.getPtr(), sizeof(self._ptr[0].updateData))
-{{endif}}
-{{if 'struct cudaLaunchMemSyncDomainMap_st' in found_types}}
-
-cdef class cudaLaunchMemSyncDomainMap_st:
-    """
-    Memory Synchronization Domain map  See cudaLaunchMemSyncDomain.  By
-    default, kernels are launched in domain 0. Kernel launched with
-    cudaLaunchMemSyncDomainRemote will have a different domain ID. User
-    may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for
-    a specific stream / graph node / kernel launch. See
-    cudaLaunchAttributeMemSyncDomainMap.  Domain ID range is available
-    through cudaDevAttrMemSyncDomainCount.
-
-    Attributes
-    ----------
-    default_ : bytes
-        The default domain ID to use for designated kernels
-    remote : bytes
-        The remote domain ID to use for designated kernels
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaLaunchMemSyncDomainMap_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['default_ : ' + str(self.default_)]
-            except ValueError:
-                str_list += ['default_ : <ValueError>']
-            try:
-                str_list += ['remote : ' + str(self.remote)]
-            except ValueError:
-                str_list += ['remote : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def default_(self):
-        return self._ptr[0].default_
-    @default_.setter
-    def default_(self, unsigned char default_):
-        self._ptr[0].default_ = default_
-    @property
-    def remote(self):
-        return self._ptr[0].remote
-    @remote.setter
-    def remote(self, unsigned char remote):
-        self._ptr[0].remote = remote
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct20:
-    """
-    Attributes
-    ----------
-    x : unsigned int
-
-    y : unsigned int
-
-    z : unsigned int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].clusterDim
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['x : ' + str(self.x)]
-            except ValueError:
-                str_list += ['x : <ValueError>']
-            try:
-                str_list += ['y : ' + str(self.y)]
-            except ValueError:
-                str_list += ['y : <ValueError>']
-            try:
-                str_list += ['z : ' + str(self.z)]
-            except ValueError:
-                str_list += ['z : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def x(self):
-        return self._ptr[0].clusterDim.x
-    @x.setter
-    def x(self, unsigned int x):
-        self._ptr[0].clusterDim.x = x
-    @property
-    def y(self):
-        return self._ptr[0].clusterDim.y
-    @y.setter
-    def y(self, unsigned int y):
-        self._ptr[0].clusterDim.y = y
-    @property
-    def z(self):
-        return self._ptr[0].clusterDim.z
-    @z.setter
-    def z(self, unsigned int z):
-        self._ptr[0].clusterDim.z = z
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct21:
-    """
-    Attributes
-    ----------
-    event : cudaEvent_t
-
-    flags : int
-
-    triggerAtBlockStart : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._event = cudaEvent_t(_ptr=<void_ptr>&self._ptr[0].programmaticEvent.event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].programmaticEvent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            try:
-                str_list += ['triggerAtBlockStart : ' + str(self.triggerAtBlockStart)]
-            except ValueError:
-                str_list += ['triggerAtBlockStart : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cyruntime.cudaEvent_t cyevent
-        if event is None:
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-        elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-            pevent = int(event)
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        else:
-            pevent = int(cudaEvent_t(event))
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-    @property
-    def flags(self):
-        return self._ptr[0].programmaticEvent.flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].programmaticEvent.flags = flags
-    @property
-    def triggerAtBlockStart(self):
-        return self._ptr[0].programmaticEvent.triggerAtBlockStart
-    @triggerAtBlockStart.setter
-    def triggerAtBlockStart(self, int triggerAtBlockStart):
-        self._ptr[0].programmaticEvent.triggerAtBlockStart = triggerAtBlockStart
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct22:
-    """
-    Attributes
-    ----------
-    event : cudaEvent_t
-
-    flags : int
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._event = cudaEvent_t(_ptr=<void_ptr>&self._ptr[0].launchCompletionEvent.event)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].launchCompletionEvent
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['event : ' + str(self.event)]
-            except ValueError:
-                str_list += ['event : <ValueError>']
-            try:
-                str_list += ['flags : ' + str(self.flags)]
-            except ValueError:
-                str_list += ['flags : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def event(self):
-        return self._event
-    @event.setter
-    def event(self, event):
-        cdef cyruntime.cudaEvent_t cyevent
-        if event is None:
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-        elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-            pevent = int(event)
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        else:
-            pevent = int(cudaEvent_t(event))
-            cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-        self._event._ptr[0] = cyevent
-    @property
-    def flags(self):
-        return self._ptr[0].launchCompletionEvent.flags
-    @flags.setter
-    def flags(self, int flags):
-        self._ptr[0].launchCompletionEvent.flags = flags
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class anon_struct23:
-    """
-    Attributes
-    ----------
-    deviceUpdatable : int
-
-    devNode : cudaGraphDeviceNode_t
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._devNode = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._ptr[0].deviceUpdatableKernelNode.devNode)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].deviceUpdatableKernelNode
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['deviceUpdatable : ' + str(self.deviceUpdatable)]
-            except ValueError:
-                str_list += ['deviceUpdatable : <ValueError>']
-            try:
-                str_list += ['devNode : ' + str(self.devNode)]
-            except ValueError:
-                str_list += ['devNode : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def deviceUpdatable(self):
-        return self._ptr[0].deviceUpdatableKernelNode.deviceUpdatable
-    @deviceUpdatable.setter
-    def deviceUpdatable(self, int deviceUpdatable):
-        self._ptr[0].deviceUpdatableKernelNode.deviceUpdatable = deviceUpdatable
-    @property
-    def devNode(self):
-        return self._devNode
-    @devNode.setter
-    def devNode(self, devNode):
-        cdef cyruntime.cudaGraphDeviceNode_t cydevNode
-        if devNode is None:
-            cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0
-        elif isinstance(devNode, (cudaGraphDeviceNode_t,)):
-            pdevNode = int(devNode)
-            cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode
-        else:
-            pdevNode = int(cudaGraphDeviceNode_t(devNode))
-            cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode
-        self._devNode._ptr[0] = cydevNode
-{{endif}}
-{{if 'union cudaLaunchAttributeValue' in found_types}}
-
-cdef class cudaLaunchAttributeValue:
-    """
-    Launch attributes union; used as value field of
-    ::cudaLaunchAttribute
-
-    Attributes
-    ----------
-    pad : bytes
-
-    accessPolicyWindow : cudaAccessPolicyWindow
-        Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.
-    cooperative : int
-        Value of launch attribute cudaLaunchAttributeCooperative. Nonzero
-        indicates a cooperative kernel (see cudaLaunchCooperativeKernel).
-    syncPolicy : cudaSynchronizationPolicy
-        Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.
-        ::cudaSynchronizationPolicy for work queued up in this stream.
-    clusterDim : anon_struct20
-        Value of launch attribute cudaLaunchAttributeClusterDimension that
-        represents the desired cluster dimensions for the kernel. Opaque
-        type with the following fields: - `x` - The X dimension of the
-        cluster, in blocks. Must be a divisor of the grid X dimension.    -
-        `y` - The Y dimension of the cluster, in blocks. Must be a divisor
-        of the grid Y dimension.    - `z` - The Z dimension of the cluster,
-        in blocks. Must be a divisor of the grid Z dimension.
-    clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy
-        Value of launch attribute
-        cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
-        scheduling policy preference for the kernel.
-    programmaticStreamSerializationAllowed : int
-        Value of launch attribute
-        cudaLaunchAttributeProgrammaticStreamSerialization.
-    programmaticEvent : anon_struct21
-        Value of launch attribute cudaLaunchAttributeProgrammaticEvent with
-        the following fields: - `cudaEvent_t` event - Event to fire when
-        all blocks trigger it.    - `int` flags; - Event record flags, see
-        cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.
-        - `int` triggerAtBlockStart - If this is set to non-0, each block
-        launch will automatically trigger the event.
-    priority : int
-        Value of launch attribute cudaLaunchAttributePriority. Execution
-        priority of the kernel.
-    memSyncDomainMap : cudaLaunchMemSyncDomainMap
-        Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See
-        ::cudaLaunchMemSyncDomainMap.
-    memSyncDomain : cudaLaunchMemSyncDomain
-        Value of launch attribute cudaLaunchAttributeMemSyncDomain. See
-        cudaLaunchMemSyncDomain.
-    launchCompletionEvent : anon_struct22
-        Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent
-        with the following fields: - `cudaEvent_t` event - Event to fire
-        when the last block launches.    - `int` flags - Event record
-        flags, see cudaEventRecordWithFlags. Does not accept
-        cudaEventRecordExternal.
-    deviceUpdatableKernelNode : anon_struct23
-        Value of launch attribute
-        cudaLaunchAttributeDeviceUpdatableKernelNode with the following
-        fields: - `int` deviceUpdatable - Whether or not the resulting
-        kernel node should be device-updatable.    -
-        `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the
-        various device-side update functions.
-    sharedMemCarveout : unsigned int
-        Value of launch attribute
-        cudaLaunchAttributePreferredSharedMemoryCarveout.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._accessPolicyWindow = cudaAccessPolicyWindow(_ptr=<void_ptr>&self._ptr[0].accessPolicyWindow)
-        self._clusterDim = anon_struct20(_ptr=<void_ptr>self._ptr)
-        self._programmaticEvent = anon_struct21(_ptr=<void_ptr>self._ptr)
-        self._memSyncDomainMap = cudaLaunchMemSyncDomainMap(_ptr=<void_ptr>&self._ptr[0].memSyncDomainMap)
-        self._launchCompletionEvent = anon_struct22(_ptr=<void_ptr>self._ptr)
-        self._deviceUpdatableKernelNode = anon_struct23(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['pad : ' + str(self.pad)]
-            except ValueError:
-                str_list += ['pad : <ValueError>']
-            try:
-                str_list += ['accessPolicyWindow :\n' + '\n'.join(['    ' + line for line in str(self.accessPolicyWindow).splitlines()])]
-            except ValueError:
-                str_list += ['accessPolicyWindow : <ValueError>']
-            try:
-                str_list += ['cooperative : ' + str(self.cooperative)]
-            except ValueError:
-                str_list += ['cooperative : <ValueError>']
-            try:
-                str_list += ['syncPolicy : ' + str(self.syncPolicy)]
-            except ValueError:
-                str_list += ['syncPolicy : <ValueError>']
-            try:
-                str_list += ['clusterDim :\n' + '\n'.join(['    ' + line for line in str(self.clusterDim).splitlines()])]
-            except ValueError:
-                str_list += ['clusterDim : <ValueError>']
-            try:
-                str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]
-            except ValueError:
-                str_list += ['clusterSchedulingPolicyPreference : <ValueError>']
-            try:
-                str_list += ['programmaticStreamSerializationAllowed : ' + str(self.programmaticStreamSerializationAllowed)]
-            except ValueError:
-                str_list += ['programmaticStreamSerializationAllowed : <ValueError>']
-            try:
-                str_list += ['programmaticEvent :\n' + '\n'.join(['    ' + line for line in str(self.programmaticEvent).splitlines()])]
-            except ValueError:
-                str_list += ['programmaticEvent : <ValueError>']
-            try:
-                str_list += ['priority : ' + str(self.priority)]
-            except ValueError:
-                str_list += ['priority : <ValueError>']
-            try:
-                str_list += ['memSyncDomainMap :\n' + '\n'.join(['    ' + line for line in str(self.memSyncDomainMap).splitlines()])]
-            except ValueError:
-                str_list += ['memSyncDomainMap : <ValueError>']
-            try:
-                str_list += ['memSyncDomain : ' + str(self.memSyncDomain)]
-            except ValueError:
-                str_list += ['memSyncDomain : <ValueError>']
-            try:
-                str_list += ['launchCompletionEvent :\n' + '\n'.join(['    ' + line for line in str(self.launchCompletionEvent).splitlines()])]
-            except ValueError:
-                str_list += ['launchCompletionEvent : <ValueError>']
-            try:
-                str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join(['    ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])]
-            except ValueError:
-                str_list += ['deviceUpdatableKernelNode : <ValueError>']
-            try:
-                str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)]
-            except ValueError:
-                str_list += ['sharedMemCarveout : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def pad(self):
-        return PyBytes_FromStringAndSize(self._ptr[0].pad, 64)
-    @pad.setter
-    def pad(self, pad):
-        if len(pad) != 64:
-            raise ValueError("pad length must be 64, is " + str(len(pad)))
-        if CHAR_MIN == 0:
-            for i, b in enumerate(pad):
-                if b < 0 and b > -129:
-                    b = b + 256
-                self._ptr[0].pad[i] = b
-        else:
-            for i, b in enumerate(pad):
-                if b > 127 and b < 256:
-                    b = b - 256
-                self._ptr[0].pad[i] = b
-    @property
-    def accessPolicyWindow(self):
-        return self._accessPolicyWindow
-    @accessPolicyWindow.setter
-    def accessPolicyWindow(self, accessPolicyWindow not None : cudaAccessPolicyWindow):
-        string.memcpy(&self._ptr[0].accessPolicyWindow, <cyruntime.cudaAccessPolicyWindow*><void_ptr>accessPolicyWindow.getPtr(), sizeof(self._ptr[0].accessPolicyWindow))
-    @property
-    def cooperative(self):
-        return self._ptr[0].cooperative
-    @cooperative.setter
-    def cooperative(self, int cooperative):
-        self._ptr[0].cooperative = cooperative
-    @property
-    def syncPolicy(self):
-        return cudaSynchronizationPolicy(self._ptr[0].syncPolicy)
-    @syncPolicy.setter
-    def syncPolicy(self, syncPolicy not None : cudaSynchronizationPolicy):
-        self._ptr[0].syncPolicy = syncPolicy.value
-    @property
-    def clusterDim(self):
-        return self._clusterDim
-    @clusterDim.setter
-    def clusterDim(self, clusterDim not None : anon_struct20):
-        string.memcpy(&self._ptr[0].clusterDim, <cyruntime.anon_struct20*><void_ptr>clusterDim.getPtr(), sizeof(self._ptr[0].clusterDim))
-    @property
-    def clusterSchedulingPolicyPreference(self):
-        return cudaClusterSchedulingPolicy(self._ptr[0].clusterSchedulingPolicyPreference)
-    @clusterSchedulingPolicyPreference.setter
-    def clusterSchedulingPolicyPreference(self, clusterSchedulingPolicyPreference not None : cudaClusterSchedulingPolicy):
-        self._ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference.value
-    @property
-    def programmaticStreamSerializationAllowed(self):
-        return self._ptr[0].programmaticStreamSerializationAllowed
-    @programmaticStreamSerializationAllowed.setter
-    def programmaticStreamSerializationAllowed(self, int programmaticStreamSerializationAllowed):
-        self._ptr[0].programmaticStreamSerializationAllowed = programmaticStreamSerializationAllowed
-    @property
-    def programmaticEvent(self):
-        return self._programmaticEvent
-    @programmaticEvent.setter
-    def programmaticEvent(self, programmaticEvent not None : anon_struct21):
-        string.memcpy(&self._ptr[0].programmaticEvent, <cyruntime.anon_struct21*><void_ptr>programmaticEvent.getPtr(), sizeof(self._ptr[0].programmaticEvent))
-    @property
-    def priority(self):
-        return self._ptr[0].priority
-    @priority.setter
-    def priority(self, int priority):
-        self._ptr[0].priority = priority
-    @property
-    def memSyncDomainMap(self):
-        return self._memSyncDomainMap
-    @memSyncDomainMap.setter
-    def memSyncDomainMap(self, memSyncDomainMap not None : cudaLaunchMemSyncDomainMap):
-        string.memcpy(&self._ptr[0].memSyncDomainMap, <cyruntime.cudaLaunchMemSyncDomainMap*><void_ptr>memSyncDomainMap.getPtr(), sizeof(self._ptr[0].memSyncDomainMap))
-    @property
-    def memSyncDomain(self):
-        return cudaLaunchMemSyncDomain(self._ptr[0].memSyncDomain)
-    @memSyncDomain.setter
-    def memSyncDomain(self, memSyncDomain not None : cudaLaunchMemSyncDomain):
-        self._ptr[0].memSyncDomain = memSyncDomain.value
-    @property
-    def launchCompletionEvent(self):
-        return self._launchCompletionEvent
-    @launchCompletionEvent.setter
-    def launchCompletionEvent(self, launchCompletionEvent not None : anon_struct22):
-        string.memcpy(&self._ptr[0].launchCompletionEvent, <cyruntime.anon_struct22*><void_ptr>launchCompletionEvent.getPtr(), sizeof(self._ptr[0].launchCompletionEvent))
-    @property
-    def deviceUpdatableKernelNode(self):
-        return self._deviceUpdatableKernelNode
-    @deviceUpdatableKernelNode.setter
-    def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct23):
-        string.memcpy(&self._ptr[0].deviceUpdatableKernelNode, <cyruntime.anon_struct23*><void_ptr>deviceUpdatableKernelNode.getPtr(), sizeof(self._ptr[0].deviceUpdatableKernelNode))
-    @property
-    def sharedMemCarveout(self):
-        return self._ptr[0].sharedMemCarveout
-    @sharedMemCarveout.setter
-    def sharedMemCarveout(self, unsigned int sharedMemCarveout):
-        self._ptr[0].sharedMemCarveout = sharedMemCarveout
-{{endif}}
-{{if 'struct cudaLaunchAttribute_st' in found_types}}
-
-cdef class cudaLaunchAttribute_st:
-    """
-    Launch attribute
-
-    Attributes
-    ----------
-    id : cudaLaunchAttributeID
-        Attribute to set
-    val : cudaLaunchAttributeValue
-        Value of the attribute
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaLaunchAttribute_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._val = cudaLaunchAttributeValue(_ptr=<void_ptr>&self._ptr[0].val)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['id : ' + str(self.id)]
-            except ValueError:
-                str_list += ['id : <ValueError>']
-            try:
-                str_list += ['val :\n' + '\n'.join(['    ' + line for line in str(self.val).splitlines()])]
-            except ValueError:
-                str_list += ['val : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def id(self):
-        return cudaLaunchAttributeID(self._ptr[0].id)
-    @id.setter
-    def id(self, id not None : cudaLaunchAttributeID):
-        self._ptr[0].id = id.value
-    @property
-    def val(self):
-        return self._val
-    @val.setter
-    def val(self, val not None : cudaLaunchAttributeValue):
-        string.memcpy(&self._ptr[0].val, <cyruntime.cudaLaunchAttributeValue*><void_ptr>val.getPtr(), sizeof(self._ptr[0].val))
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class anon_struct24:
-    """
-    Attributes
-    ----------
-    bytesOverBudget : unsigned long long
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].info.overBudget
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['bytesOverBudget : ' + str(self.bytesOverBudget)]
-            except ValueError:
-                str_list += ['bytesOverBudget : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def bytesOverBudget(self):
-        return self._ptr[0].info.overBudget.bytesOverBudget
-    @bytesOverBudget.setter
-    def bytesOverBudget(self, unsigned long long bytesOverBudget):
-        self._ptr[0].info.overBudget.bytesOverBudget = bytesOverBudget
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class anon_union9:
-    """
-    Attributes
-    ----------
-    overBudget : anon_struct24
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        self._overBudget = anon_struct24(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].info
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['overBudget :\n' + '\n'.join(['    ' + line for line in str(self.overBudget).splitlines()])]
-            except ValueError:
-                str_list += ['overBudget : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def overBudget(self):
-        return self._overBudget
-    @overBudget.setter
-    def overBudget(self, overBudget not None : anon_struct24):
-        string.memcpy(&self._ptr[0].info.overBudget, <cyruntime.anon_struct24*><void_ptr>overBudget.getPtr(), sizeof(self._ptr[0].info.overBudget))
-{{endif}}
-{{if 'struct cudaAsyncNotificationInfo' in found_types}}
-
-cdef class cudaAsyncNotificationInfo:
-    """
-    Information describing an async notification event
-
-    Attributes
-    ----------
-    type : cudaAsyncNotificationType
-
-    info : anon_union9
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaAsyncNotificationInfo *>calloc(1, sizeof(cyruntime.cudaAsyncNotificationInfo))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._info = anon_union9(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['type : ' + str(self.type)]
-            except ValueError:
-                str_list += ['type : <ValueError>']
-            try:
-                str_list += ['info :\n' + '\n'.join(['    ' + line for line in str(self.info).splitlines()])]
-            except ValueError:
-                str_list += ['info : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def type(self):
-        return cudaAsyncNotificationType(self._ptr[0].type)
-    @type.setter
-    def type(self, type not None : cudaAsyncNotificationType):
-        self._ptr[0].type = type.value
-    @property
-    def info(self):
-        return self._info
-    @info.setter
-    def info(self, info not None : anon_union9):
-        string.memcpy(&self._ptr[0].info, <cyruntime.anon_union9*><void_ptr>info.getPtr(), sizeof(self._ptr[0].info))
-{{endif}}
-{{if 'struct cudaTextureDesc' in found_types}}
-
-cdef class cudaTextureDesc:
-    """
-    CUDA texture descriptor
-
-    Attributes
-    ----------
-    addressMode : List[cudaTextureAddressMode]
-        Texture address mode for up to 3 dimensions
-    filterMode : cudaTextureFilterMode
-        Texture filter mode
-    readMode : cudaTextureReadMode
-        Texture read mode
-    sRGB : int
-        Perform sRGB->linear conversion during texture read
-    borderColor : List[float]
-        Texture Border Color
-    normalizedCoords : int
-        Indicates whether texture reads are normalized or not
-    maxAnisotropy : unsigned int
-        Limit to the anisotropy ratio
-    mipmapFilterMode : cudaTextureFilterMode
-        Mipmap filter mode
-    mipmapLevelBias : float
-        Offset applied to the supplied mipmap level
-    minMipmapLevelClamp : float
-        Lower end of the mipmap level range to clamp access to
-    maxMipmapLevelClamp : float
-        Upper end of the mipmap level range to clamp access to
-    disableTrilinearOptimization : int
-        Disable any trilinear filtering optimizations.
-    seamlessCubemap : int
-        Enable seamless cube map filtering.
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaTextureDesc *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['addressMode : ' + str(self.addressMode)]
-            except ValueError:
-                str_list += ['addressMode : <ValueError>']
-            try:
-                str_list += ['filterMode : ' + str(self.filterMode)]
-            except ValueError:
-                str_list += ['filterMode : <ValueError>']
-            try:
-                str_list += ['readMode : ' + str(self.readMode)]
-            except ValueError:
-                str_list += ['readMode : <ValueError>']
-            try:
-                str_list += ['sRGB : ' + str(self.sRGB)]
-            except ValueError:
-                str_list += ['sRGB : <ValueError>']
-            try:
-                str_list += ['borderColor : ' + str(self.borderColor)]
-            except ValueError:
-                str_list += ['borderColor : <ValueError>']
-            try:
-                str_list += ['normalizedCoords : ' + str(self.normalizedCoords)]
-            except ValueError:
-                str_list += ['normalizedCoords : <ValueError>']
-            try:
-                str_list += ['maxAnisotropy : ' + str(self.maxAnisotropy)]
-            except ValueError:
-                str_list += ['maxAnisotropy : <ValueError>']
-            try:
-                str_list += ['mipmapFilterMode : ' + str(self.mipmapFilterMode)]
-            except ValueError:
-                str_list += ['mipmapFilterMode : <ValueError>']
-            try:
-                str_list += ['mipmapLevelBias : ' + str(self.mipmapLevelBias)]
-            except ValueError:
-                str_list += ['mipmapLevelBias : <ValueError>']
-            try:
-                str_list += ['minMipmapLevelClamp : ' + str(self.minMipmapLevelClamp)]
-            except ValueError:
-                str_list += ['minMipmapLevelClamp : <ValueError>']
-            try:
-                str_list += ['maxMipmapLevelClamp : ' + str(self.maxMipmapLevelClamp)]
-            except ValueError:
-                str_list += ['maxMipmapLevelClamp : <ValueError>']
-            try:
-                str_list += ['disableTrilinearOptimization : ' + str(self.disableTrilinearOptimization)]
-            except ValueError:
-                str_list += ['disableTrilinearOptimization : <ValueError>']
-            try:
-                str_list += ['seamlessCubemap : ' + str(self.seamlessCubemap)]
-            except ValueError:
-                str_list += ['seamlessCubemap : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def addressMode(self):
-        return [cudaTextureAddressMode(_x) for _x in list(self._ptr[0].addressMode)]
-    @addressMode.setter
-    def addressMode(self, addressMode):
-        self._ptr[0].addressMode = [_x.value for _x in addressMode]
-    @property
-    def filterMode(self):
-        return cudaTextureFilterMode(self._ptr[0].filterMode)
-    @filterMode.setter
-    def filterMode(self, filterMode not None : cudaTextureFilterMode):
-        self._ptr[0].filterMode = filterMode.value
-    @property
-    def readMode(self):
-        return cudaTextureReadMode(self._ptr[0].readMode)
-    @readMode.setter
-    def readMode(self, readMode not None : cudaTextureReadMode):
-        self._ptr[0].readMode = readMode.value
-    @property
-    def sRGB(self):
-        return self._ptr[0].sRGB
-    @sRGB.setter
-    def sRGB(self, int sRGB):
-        self._ptr[0].sRGB = sRGB
-    @property
-    def borderColor(self):
-        return self._ptr[0].borderColor
-    @borderColor.setter
-    def borderColor(self, borderColor):
-        self._ptr[0].borderColor = borderColor
-    @property
-    def normalizedCoords(self):
-        return self._ptr[0].normalizedCoords
-    @normalizedCoords.setter
-    def normalizedCoords(self, int normalizedCoords):
-        self._ptr[0].normalizedCoords = normalizedCoords
-    @property
-    def maxAnisotropy(self):
-        return self._ptr[0].maxAnisotropy
-    @maxAnisotropy.setter
-    def maxAnisotropy(self, unsigned int maxAnisotropy):
-        self._ptr[0].maxAnisotropy = maxAnisotropy
-    @property
-    def mipmapFilterMode(self):
-        return cudaTextureFilterMode(self._ptr[0].mipmapFilterMode)
-    @mipmapFilterMode.setter
-    def mipmapFilterMode(self, mipmapFilterMode not None : cudaTextureFilterMode):
-        self._ptr[0].mipmapFilterMode = mipmapFilterMode.value
-    @property
-    def mipmapLevelBias(self):
-        return self._ptr[0].mipmapLevelBias
-    @mipmapLevelBias.setter
-    def mipmapLevelBias(self, float mipmapLevelBias):
-        self._ptr[0].mipmapLevelBias = mipmapLevelBias
-    @property
-    def minMipmapLevelClamp(self):
-        return self._ptr[0].minMipmapLevelClamp
-    @minMipmapLevelClamp.setter
-    def minMipmapLevelClamp(self, float minMipmapLevelClamp):
-        self._ptr[0].minMipmapLevelClamp = minMipmapLevelClamp
-    @property
-    def maxMipmapLevelClamp(self):
-        return self._ptr[0].maxMipmapLevelClamp
-    @maxMipmapLevelClamp.setter
-    def maxMipmapLevelClamp(self, float maxMipmapLevelClamp):
-        self._ptr[0].maxMipmapLevelClamp = maxMipmapLevelClamp
-    @property
-    def disableTrilinearOptimization(self):
-        return self._ptr[0].disableTrilinearOptimization
-    @disableTrilinearOptimization.setter
-    def disableTrilinearOptimization(self, int disableTrilinearOptimization):
-        self._ptr[0].disableTrilinearOptimization = disableTrilinearOptimization
-    @property
-    def seamlessCubemap(self):
-        return self._ptr[0].seamlessCubemap
-    @seamlessCubemap.setter
-    def seamlessCubemap(self, int seamlessCubemap):
-        self._ptr[0].seamlessCubemap = seamlessCubemap
-{{endif}}
-{{if True}}
-
-cdef class cudaEglPlaneDesc_st:
-    """
-    CUDA EGL Plane Descriptor - structure defining each plane of a CUDA
-    EGLFrame
-
-    Attributes
-    ----------
-    width : unsigned int
-        Width of plane
-    height : unsigned int
-        Height of plane
-    depth : unsigned int
-        Depth of plane
-    pitch : unsigned int
-        Pitch of plane
-    numChannels : unsigned int
-        Number of channels for the plane
-    channelDesc : cudaChannelFormatDesc
-        Channel Format Descriptor
-    reserved : List[unsigned int]
-        Reserved for future use
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaEglPlaneDesc_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._channelDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._ptr[0].channelDesc)
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['width : ' + str(self.width)]
-            except ValueError:
-                str_list += ['width : <ValueError>']
-            try:
-                str_list += ['height : ' + str(self.height)]
-            except ValueError:
-                str_list += ['height : <ValueError>']
-            try:
-                str_list += ['depth : ' + str(self.depth)]
-            except ValueError:
-                str_list += ['depth : <ValueError>']
-            try:
-                str_list += ['pitch : ' + str(self.pitch)]
-            except ValueError:
-                str_list += ['pitch : <ValueError>']
-            try:
-                str_list += ['numChannels : ' + str(self.numChannels)]
-            except ValueError:
-                str_list += ['numChannels : <ValueError>']
-            try:
-                str_list += ['channelDesc :\n' + '\n'.join(['    ' + line for line in str(self.channelDesc).splitlines()])]
-            except ValueError:
-                str_list += ['channelDesc : <ValueError>']
-            try:
-                str_list += ['reserved : ' + str(self.reserved)]
-            except ValueError:
-                str_list += ['reserved : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def width(self):
-        return self._ptr[0].width
-    @width.setter
-    def width(self, unsigned int width):
-        self._ptr[0].width = width
-    @property
-    def height(self):
-        return self._ptr[0].height
-    @height.setter
-    def height(self, unsigned int height):
-        self._ptr[0].height = height
-    @property
-    def depth(self):
-        return self._ptr[0].depth
-    @depth.setter
-    def depth(self, unsigned int depth):
-        self._ptr[0].depth = depth
-    @property
-    def pitch(self):
-        return self._ptr[0].pitch
-    @pitch.setter
-    def pitch(self, unsigned int pitch):
-        self._ptr[0].pitch = pitch
-    @property
-    def numChannels(self):
-        return self._ptr[0].numChannels
-    @numChannels.setter
-    def numChannels(self, unsigned int numChannels):
-        self._ptr[0].numChannels = numChannels
-    @property
-    def channelDesc(self):
-        return self._channelDesc
-    @channelDesc.setter
-    def channelDesc(self, channelDesc not None : cudaChannelFormatDesc):
-        string.memcpy(&self._ptr[0].channelDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>channelDesc.getPtr(), sizeof(self._ptr[0].channelDesc))
-    @property
-    def reserved(self):
-        return self._ptr[0].reserved
-    @reserved.setter
-    def reserved(self, reserved):
-        self._ptr[0].reserved = reserved
-{{endif}}
-{{if True}}
-
-cdef class anon_union10:
-    """
-    Attributes
-    ----------
-    pArray : List[cudaArray_t]
-
-    pPitch : List[cudaPitchedPtr]
-
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr):
-        self._ptr = <cyruntime.cudaEglFrame_st *>_ptr
-
-    def __init__(self, void_ptr _ptr):
-        pass
-    def __dealloc__(self):
-        pass
-    def getPtr(self):
-        return <void_ptr>&self._ptr[0].frame
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['pArray : ' + str(self.pArray)]
-            except ValueError:
-                str_list += ['pArray : <ValueError>']
-            try:
-                str_list += ['pPitch :\n' + '\n'.join(['    ' + line for line in str(self.pPitch).splitlines()])]
-            except ValueError:
-                str_list += ['pPitch : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def pArray(self):
-        return [cudaArray_t(init_value=<void_ptr>_pArray) for _pArray in self._ptr[0].frame.pArray]
-    @pArray.setter
-    def pArray(self, pArray : List[cudaArray_t]):
-        if len(pArray) != 3:
-            raise IndexError('not enough values found during array assignment, expected 3, got', len(pArray))
-        pArray = [int(_pArray) for _pArray in pArray]
-        for _idx, _pArray in enumerate(pArray):
-            self._ptr[0].frame.pArray[_idx] = <cyruntime.cudaArray_t><void_ptr>_pArray
-
-    @property
-    def pPitch(self):
-        out_pPitch = [cudaPitchedPtr() for _pPitch in self._ptr[0].frame.pPitch]
-        for _idx in range(len(out_pPitch)):
-            string.memcpy(<cyruntime.cudaPitchedPtr*><void_ptr>out_pPitch[_idx].getPtr(), &self._ptr[0].frame.pPitch[_idx], sizeof(cyruntime.cudaPitchedPtr))
-        return out_pPitch
-    @pPitch.setter
-    def pPitch(self, pPitch : List[cudaPitchedPtr]):
-        if len(pPitch) != 3:
-            raise IndexError('not enough values found during array assignment, expected 3, got', len(pPitch))
-        for _idx in range(len(pPitch)):
-            string.memcpy(&self._ptr[0].frame.pPitch[_idx], <cyruntime.cudaPitchedPtr*><void_ptr>pPitch[_idx].getPtr(), sizeof(cyruntime.cudaPitchedPtr))
-
-{{endif}}
-{{if True}}
-
-cdef class cudaEglFrame_st:
-    """
-    CUDA EGLFrame Descriptor - structure defining one frame of EGL.
-    Each frame may contain one or more planes depending on whether the
-    surface is Multiplanar or not. Each plane of EGLFrame is
-    represented by cudaEglPlaneDesc which is defined as:
-    typedefstructcudaEglPlaneDesc_st unsignedintwidth;
-    unsignedintheight; unsignedintdepth; unsignedintpitch;
-    unsignedintnumChannels; structcudaChannelFormatDescchannelDesc;
-    unsignedintreserved[4]; cudaEglPlaneDesc;
-
-    Attributes
-    ----------
-    frame : anon_union10
-
-    planeDesc : List[cudaEglPlaneDesc]
-        CUDA EGL Plane Descriptor cudaEglPlaneDesc
-    planeCount : unsigned int
-        Number of planes
-    frameType : cudaEglFrameType
-        Array or Pitch
-    eglColorFormat : cudaEglColorFormat
-        CUDA EGL Color Format
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._val_ptr = <cyruntime.cudaEglFrame_st *>calloc(1, sizeof(cyruntime.cudaEglFrame_st))
-            self._ptr = self._val_ptr
-        else:
-            self._ptr = <cyruntime.cudaEglFrame_st *>_ptr
-    def __init__(self, void_ptr _ptr = 0):
-        self._frame = anon_union10(_ptr=<void_ptr>self._ptr)
-    def __dealloc__(self):
-        if self._val_ptr is not NULL:
-            free(self._val_ptr)
-    def getPtr(self):
-        return <void_ptr>self._ptr
-    def __repr__(self):
-        if self._ptr is not NULL:
-            str_list = []
-            try:
-                str_list += ['frame :\n' + '\n'.join(['    ' + line for line in str(self.frame).splitlines()])]
-            except ValueError:
-                str_list += ['frame : <ValueError>']
-            try:
-                str_list += ['planeDesc :\n' + '\n'.join(['    ' + line for line in str(self.planeDesc).splitlines()])]
-            except ValueError:
-                str_list += ['planeDesc : <ValueError>']
-            try:
-                str_list += ['planeCount : ' + str(self.planeCount)]
-            except ValueError:
-                str_list += ['planeCount : <ValueError>']
-            try:
-                str_list += ['frameType : ' + str(self.frameType)]
-            except ValueError:
-                str_list += ['frameType : <ValueError>']
-            try:
-                str_list += ['eglColorFormat : ' + str(self.eglColorFormat)]
-            except ValueError:
-                str_list += ['eglColorFormat : <ValueError>']
-            return '\n'.join(str_list)
-        else:
-            return ''
-    @property
-    def frame(self):
-        return self._frame
-    @frame.setter
-    def frame(self, frame not None : anon_union10):
-        string.memcpy(&self._ptr[0].frame, <cyruntime.anon_union10*><void_ptr>frame.getPtr(), sizeof(self._ptr[0].frame))
-    @property
-    def planeDesc(self):
-        out_planeDesc = [cudaEglPlaneDesc() for _planeDesc in self._ptr[0].planeDesc]
-        for _idx in range(len(out_planeDesc)):
-            string.memcpy(<cyruntime.cudaEglPlaneDesc*><void_ptr>out_planeDesc[_idx].getPtr(), &self._ptr[0].planeDesc[_idx], sizeof(cyruntime.cudaEglPlaneDesc))
-        return out_planeDesc
-    @planeDesc.setter
-    def planeDesc(self, planeDesc : List[cudaEglPlaneDesc]):
-        if len(planeDesc) != 3:
-            raise IndexError('not enough values found during array assignment, expected 3, got', len(planeDesc))
-        for _idx in range(len(planeDesc)):
-            string.memcpy(&self._ptr[0].planeDesc[_idx], <cyruntime.cudaEglPlaneDesc*><void_ptr>planeDesc[_idx].getPtr(), sizeof(cyruntime.cudaEglPlaneDesc))
-
-    @property
-    def planeCount(self):
-        return self._ptr[0].planeCount
-    @planeCount.setter
-    def planeCount(self, unsigned int planeCount):
-        self._ptr[0].planeCount = planeCount
-    @property
-    def frameType(self):
-        return cudaEglFrameType(self._ptr[0].frameType)
-    @frameType.setter
-    def frameType(self, frameType not None : cudaEglFrameType):
-        self._ptr[0].frameType = frameType.value
-    @property
-    def eglColorFormat(self):
-        return cudaEglColorFormat(self._ptr[0].eglColorFormat)
-    @eglColorFormat.setter
-    def eglColorFormat(self, eglColorFormat not None : cudaEglColorFormat):
-        self._ptr[0].eglColorFormat = eglColorFormat.value
-{{endif}}
-{{if 'cudaGraphConditionalHandle' in found_types}}
-
-cdef class cudaGraphConditionalHandle:
-    """
-
-    CUDA handle for conditional graph nodes
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaGraphConditionalHandle *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<cudaGraphConditionalHandle ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaSurfaceObject_t' in found_types}}
-
-cdef class cudaSurfaceObject_t:
-    """
-
-    An opaque value that represents a CUDA Surface object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaSurfaceObject_t *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<cudaSurfaceObject_t ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaTextureObject_t' in found_types}}
-
-cdef class cudaTextureObject_t:
-    """
-
-    An opaque value that represents a CUDA texture object
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.cudaTextureObject_t *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<cudaTextureObject_t ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLenum:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.GLenum *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<GLenum ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class GLuint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.GLuint *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<GLuint ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class EGLint:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.EGLint *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<EGLint ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned int>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpDevice:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.VdpDevice *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpDevice ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpGetProcAddress:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.VdpGetProcAddress *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpGetProcAddress ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <unsigned long long>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpVideoSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.VdpVideoSurface *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpVideoSurface ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if True}}
-
-cdef class VdpOutputSurface:
-    """
-
-    Methods
-    -------
-    getPtr()
-        Get memory address of class instance
-
-    """
-    def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
-        if _ptr == 0:
-            self._ptr = &self.__val
-        else:
-            self._ptr = <cyruntime.VdpOutputSurface *>_ptr
-        if init_value:
-            self._ptr[0] = init_value
-    def __dealloc__(self):
-        pass
-    def __repr__(self):
-        return '<VdpOutputSurface ' + str(self.__int__()) + '>'
-    def __int__(self):
-        return <uint32_t>self._ptr[0]
-    def getPtr(self):
-        return <void_ptr>self._ptr
-{{endif}}
-
-{{if 'cudaDeviceReset' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceReset():
-    """ Destroy all allocations and reset all state on the current device in the current process.
-
-    Explicitly destroys and cleans up all resources associated with the
-    current device in the current process. It is the caller's
-    responsibility to ensure that the resources are not accessed or passed
-    in subsequent API calls and doing so will result in undefined behavior.
-    These resources include CUDA types :py:obj:`~.cudaStream_t`,
-    :py:obj:`~.cudaEvent_t`, :py:obj:`~.cudaArray_t`,
-    :py:obj:`~.cudaMipmappedArray_t`, :py:obj:`~.cudaPitchedPtr`,
-    :py:obj:`~.cudaTextureObject_t`, :py:obj:`~.cudaSurfaceObject_t`,
-    :py:obj:`~.textureReference`, :py:obj:`~.surfaceReference`,
-    :py:obj:`~.cudaExternalMemory_t`, :py:obj:`~.cudaExternalSemaphore_t`
-    and :py:obj:`~.cudaGraphicsResource_t`. These resources also include
-    memory allocations by :py:obj:`~.cudaMalloc`,
-    :py:obj:`~.cudaMallocHost`, :py:obj:`~.cudaMallocManaged` and
-    :py:obj:`~.cudaMallocPitch`. Any subsequent API call to this device
-    will reinitialize the device.
-
-    Note that this function will reset the device immediately. It is the
-    caller's responsibility to ensure that the device is not being accessed
-    by any other host threads from the process when this function is
-    called.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSynchronize`
-
-    Notes
-    -----
-    :py:obj:`~.cudaDeviceReset()` will not destroy memory allocations by :py:obj:`~.cudaMallocAsync()` and :py:obj:`~.cudaMallocFromPoolAsync()`. These memory allocations need to be destroyed explicitly.
-
-    If a non-primary :py:obj:`~.CUcontext` is current to the thread, :py:obj:`~.cudaDeviceReset()` will destroy only the internal CUDA RT state for that :py:obj:`~.CUcontext`.
-    """
-    err = cyruntime.cudaDeviceReset()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSynchronize():
-    """ Wait for compute device to finish.
-
-    Blocks until the device has completed all preceding requested tasks.
-    :py:obj:`~.cudaDeviceSynchronize()` returns an error if one of the
-    preceding tasks has failed. If the
-    :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this
-    device, the host thread will block until the device has finished its
-    work.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceReset`, :py:obj:`~.cuCtxSynchronize`
-    """
-    with nogil:
-        err = cyruntime.cudaDeviceSynchronize()
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceSetLimit' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSetLimit(limit not None : cudaLimit, size_t value):
-    """ Set resource limits.
-
-    Setting `limit` to `value` is a request by the application to update
-    the current limit maintained by the device. The driver is free to
-    modify the requested value to meet h/w requirements (this could be
-    clamping to minimum or maximum values, rounding up to nearest element
-    size, etc). The application can use :py:obj:`~.cudaDeviceGetLimit()` to
-    find out exactly what the limit has been set to.
-
-    Setting each :py:obj:`~.cudaLimit` has its own specific restrictions,
-    so each is discussed here.
-
-    - :py:obj:`~.cudaLimitStackSize` controls the stack size in bytes of
-      each GPU thread.
-
-    - :py:obj:`~.cudaLimitPrintfFifoSize` controls the size in bytes of the
-      shared FIFO used by the :py:obj:`~.printf()` device system call.
-      Setting :py:obj:`~.cudaLimitPrintfFifoSize` must not be performed
-      after launching any kernel that uses the :py:obj:`~.printf()` device
-      system call - in such case :py:obj:`~.cudaErrorInvalidValue` will be
-      returned.
-
-    - :py:obj:`~.cudaLimitMallocHeapSize` controls the size in bytes of the
-      heap used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device
-      system calls. Setting :py:obj:`~.cudaLimitMallocHeapSize` must not be
-      performed after launching any kernel that uses the
-      :py:obj:`~.malloc()` or :py:obj:`~.free()` device system calls - in
-      such case :py:obj:`~.cudaErrorInvalidValue` will be returned.
-
-    - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` controls the maximum nesting
-      depth of a grid at which a thread can safely call
-      :py:obj:`~.cudaDeviceSynchronize()`. Setting this limit must be
-      performed before any launch of a kernel that uses the device runtime
-      and calls :py:obj:`~.cudaDeviceSynchronize()` above the default sync
-      depth, two levels of grids. Calls to
-      :py:obj:`~.cudaDeviceSynchronize()` will fail with error code
-      :py:obj:`~.cudaErrorSyncDepthExceeded` if the limitation is violated.
-      This limit can be set smaller than the default or up the maximum
-      launch depth of 24. When setting this limit, keep in mind that
-      additional levels of sync depth require the runtime to reserve large
-      amounts of device memory which can no longer be used for user
-      allocations. If these reservations of device memory fail,
-      :py:obj:`~.cudaDeviceSetLimit` will return
-      :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to
-      a lower value. This limit is only applicable to devices of compute
-      capability < 9.0. Attempting to set this limit on devices of other
-      compute capability will results in error
-      :py:obj:`~.cudaErrorUnsupportedLimit` being returned.
-
-    - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` controls the
-      maximum number of outstanding device runtime launches that can be
-      made from the current device. A grid is outstanding from the point of
-      launch up until the grid is known to have been completed. Device
-      runtime launches which violate this limitation fail and return
-      :py:obj:`~.cudaErrorLaunchPendingCountExceeded` when
-      :py:obj:`~.cudaGetLastError()` is called after launch. If more
-      pending launches than the default (2048 launches) are needed for a
-      module using the device runtime, this limit can be increased. Keep in
-      mind that being able to sustain additional pending launches will
-      require the runtime to reserve larger amounts of device memory
-      upfront which can no longer be used for allocations. If these
-      reservations fail, :py:obj:`~.cudaDeviceSetLimit` will return
-      :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to
-      a lower value. This limit is only applicable to devices of compute
-      capability 3.5 and higher. Attempting to set this limit on devices of
-      compute capability less than 3.5 will result in the error
-      :py:obj:`~.cudaErrorUnsupportedLimit` being returned.
-
-    - :py:obj:`~.cudaLimitMaxL2FetchGranularity` controls the L2 cache
-      fetch granularity. Values can range from 0B to 128B. This is purely a
-      performance hint and it can be ignored or clamped depending on the
-      platform.
-
-    - :py:obj:`~.cudaLimitPersistingL2CacheSize` controls size in bytes
-      available for persisting L2 cache. This is purely a performance hint
-      and it can be ignored or clamped depending on the platform.
-
-    Parameters
-    ----------
-    limit : :py:obj:`~.cudaLimit`
-        Limit to set
-    value : size_t
-        Size of limit
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceGetLimit`, :py:obj:`~.cuCtxSetLimit`
-    """
-    cdef cyruntime.cudaLimit cylimit = limit.value
-    err = cyruntime.cudaDeviceSetLimit(cylimit, value)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetLimit' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetLimit(limit not None : cudaLimit):
-    """ Return resource limits.
-
-    Returns in `*pValue` the current size of `limit`. The following
-    :py:obj:`~.cudaLimit` values are supported.
-
-    - :py:obj:`~.cudaLimitStackSize` is the stack size in bytes of each GPU
-      thread.
-
-    - :py:obj:`~.cudaLimitPrintfFifoSize` is the size in bytes of the
-      shared FIFO used by the :py:obj:`~.printf()` device system call.
-
-    - :py:obj:`~.cudaLimitMallocHeapSize` is the size in bytes of the heap
-      used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device system
-      calls.
-
-    - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` is the maximum grid depth at
-      which a thread can isssue the device runtime call
-      :py:obj:`~.cudaDeviceSynchronize()` to wait on child grid launches to
-      complete. This functionality is removed for devices of compute
-      capability >= 9.0, and hence will return error
-      :py:obj:`~.cudaErrorUnsupportedLimit` on such devices.
-
-    - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` is the maximum
-      number of outstanding device runtime launches.
-
-    - :py:obj:`~.cudaLimitMaxL2FetchGranularity` is the L2 cache fetch
-      granularity.
-
-    - :py:obj:`~.cudaLimitPersistingL2CacheSize` is the persisting L2 cache
-      size in bytes.
-
-    Parameters
-    ----------
-    limit : :py:obj:`~.cudaLimit`
-        Limit to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`
-    pValue : int
-        Returned size of the limit
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetLimit`, :py:obj:`~.cuCtxGetLimit`
-    """
-    cdef size_t pValue = 0
-    cdef cyruntime.cudaLimit cylimit = limit.value
-    err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit)
-    return (cudaError_t(err), pValue)
-{{endif}}
-
-{{if 'cudaDeviceGetTexture1DLinearMaxWidth' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetTexture1DLinearMaxWidth(fmtDesc : Optional[cudaChannelFormatDesc], int device):
-    """ Returns the maximum number of elements allocatable in a 1D linear texture for a given element size.
-
-    Returns in `maxWidthInElements` the maximum number of elements
-    allocatable in a 1D linear texture for given format descriptor
-    `fmtDesc`.
-
-    Parameters
-    ----------
-    fmtDesc : :py:obj:`~.cudaChannelFormatDesc`
-        Texture format description.
-    None : int
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`
-    maxWidthInElements : int
-        Returns maximum number of texture elements allocatable for given
-        `fmtDesc`.
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth`
-    """
-    cdef size_t maxWidthInElements = 0
-    cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr = fmtDesc._ptr if fmtDesc != None else NULL
-    err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device)
-    return (cudaError_t(err), maxWidthInElements)
-{{endif}}
-
-{{if 'cudaDeviceGetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetCacheConfig():
-    """ Returns the preferred cache configuration for the current device.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this returns through `pCacheConfig` the preferred cache
-    configuration for the current device. This is only a preference. The
-    runtime will use the requested configuration if possible, but it is
-    free to choose a different configuration if required to execute
-    functions.
-
-    This will return a `pCacheConfig` of
-    :py:obj:`~.cudaFuncCachePreferNone` on devices where the size of the L1
-    cache and shared memory are fixed.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
-      or L1 (default)
-
-    - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
-      shared memory
-
-    - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
-      shared memory
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-    pCacheConfig : :py:obj:`~.cudaFuncCache`
-        Returned cache configuration
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxGetCacheConfig`
-    """
-    cdef cyruntime.cudaFuncCache pCacheConfig
-    err = cyruntime.cudaDeviceGetCacheConfig(&pCacheConfig)
-    return (cudaError_t(err), cudaFuncCache(pCacheConfig))
-{{endif}}
-
-{{if 'cudaDeviceGetStreamPriorityRange' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetStreamPriorityRange():
-    """ Returns numerical values that correspond to the least and greatest stream priorities.
-
-    Returns in `*leastPriority` and `*greatestPriority` the numerical
-    values that correspond to the least and greatest stream priorities
-    respectively. Stream priorities follow a convention where lower numbers
-    imply greater priorities. The range of meaningful stream priorities is
-    given by [`*greatestPriority`, `*leastPriority`]. If the user attempts
-    to create a stream with a priority value that is outside the the
-    meaningful range as specified by this API, the priority is
-    automatically clamped down or up to either `*leastPriority` or
-    `*greatestPriority` respectively. See
-    :py:obj:`~.cudaStreamCreateWithPriority` for details on creating a
-    priority stream. A NULL may be passed in for `*leastPriority` or
-    `*greatestPriority` if the value is not desired.
-
-    This function will return '0' in both `*leastPriority` and
-    `*greatestPriority` if the current context's device does not support
-    stream priorities (see :py:obj:`~.cudaDeviceGetAttribute`).
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-    leastPriority : int
-        Pointer to an int in which the numerical value for least stream
-        priority is returned
-    greatestPriority : int
-        Pointer to an int in which the numerical value for greatest stream
-        priority is returned
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`
-    """
-    cdef int leastPriority = 0
-    cdef int greatestPriority = 0
-    err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority)
-    return (cudaError_t(err), leastPriority, greatestPriority)
-{{endif}}
-
-{{if 'cudaDeviceSetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSetCacheConfig(cacheConfig not None : cudaFuncCache):
-    """ Sets the preferred cache configuration for the current device.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this sets through `cacheConfig` the preferred cache
-    configuration for the current device. This is only a preference. The
-    runtime will use the requested configuration if possible, but it is
-    free to choose a different configuration if required to execute the
-    function. Any function preference set via
-    :py:obj:`~.cudaFuncSetCacheConfig (C API)` or cudaFuncSetCacheConfig
-    (C++ API) will be preferred over this device-wide setting. Setting the
-    device-wide cache configuration to :py:obj:`~.cudaFuncCachePreferNone`
-    will cause subsequent kernel launches to prefer to not change the cache
-    configuration unless required to launch the kernel.
-
-    This setting does nothing on devices where the size of the L1 cache and
-    shared memory are fixed.
-
-    Launching a kernel with a different preference than the most recent
-    preference setting may insert a device-side synchronization point.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
-      or L1 (default)
-
-    - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
-      shared memory
-
-    - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
-      shared memory
-
-    Parameters
-    ----------
-    cacheConfig : :py:obj:`~.cudaFuncCache`
-        Requested cache configuration
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxSetCacheConfig`
-    """
-    cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value
-    err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetByPCIBusId' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetByPCIBusId(char* pciBusId):
-    """ Returns a handle to a compute device.
-
-    Returns in `*device` a device ordinal given a PCI bus ID string.
-
-    where `domain`, `bus`, `device`, and `function` are all hexadecimal
-    values
-
-    Parameters
-    ----------
-    pciBusId : bytes
-        String in one of the following forms:
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-    device : int
-        Returned device ordinal
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceGetPCIBusId`, :py:obj:`~.cuDeviceGetByPCIBusId`
-    """
-    cdef int device = 0
-    err = cyruntime.cudaDeviceGetByPCIBusId(&device, pciBusId)
-    return (cudaError_t(err), device)
-{{endif}}
-
-{{if 'cudaDeviceGetPCIBusId' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetPCIBusId(int length, int device):
-    """ Returns a PCI Bus Id string for the device.
-
-    Returns an ASCII string identifying the device `dev` in the NULL-
-    terminated string pointed to by `pciBusId`. `length` specifies the
-    maximum length of the string that may be returned.
-
-    where `domain`, `bus`, `device`, and `function` are all hexadecimal
-    values. pciBusId should be large enough to store 13 characters
-    including the NULL-terminator.
-
-    Parameters
-    ----------
-    length : int
-        Maximum length of string to store in `name`
-    device : int
-        Device to get identifier string for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-    pciBusId : bytes
-        Returned identifier string for the device in the following format
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceGetByPCIBusId`, :py:obj:`~.cuDeviceGetPCIBusId`
-    """
-    pypciBusId = b" " * length
-    cdef char* pciBusId = pypciBusId
-    err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device)
-    return (cudaError_t(err), pypciBusId)
-{{endif}}
-
-{{if 'cudaIpcGetEventHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaIpcGetEventHandle(event):
-    """ Gets an interprocess handle for a previously allocated event.
-
-    Takes as input a previously allocated event. This event must have been
-    created with the :py:obj:`~.cudaEventInterprocess` and
-    :py:obj:`~.cudaEventDisableTiming` flags set. This opaque handle may be
-    copied into other processes and opened with
-    :py:obj:`~.cudaIpcOpenEventHandle` to allow efficient hardware
-    synchronization between GPU work in different processes.
-
-    After the event has been been opened in the importing process,
-    :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`,
-    :py:obj:`~.cudaStreamWaitEvent` and :py:obj:`~.cudaEventQuery` may be
-    used in either process. Performing operations on the imported event
-    after the exported event has been freed with
-    :py:obj:`~.cudaEventDestroy` will result in undefined behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
-    :py:obj:`~.cudaDevAttrIpcEventSupport`
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event allocated with :py:obj:`~.cudaEventInterprocess` and
-        :py:obj:`~.cudaEventDisableTiming` flags.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-    handle : :py:obj:`~.cudaIpcEventHandle_t`
-        Pointer to a user allocated cudaIpcEventHandle in which to return
-        the opaque event handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetEventHandle`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cudaIpcEventHandle_t handle = cudaIpcEventHandle_t()
-    err = cyruntime.cudaIpcGetEventHandle(<cyruntime.cudaIpcEventHandle_t*>handle._ptr, cyevent)
-    return (cudaError_t(err), handle)
-{{endif}}
-
-{{if 'cudaIpcOpenEventHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t):
-    """ Opens an interprocess event handle for use in the current process.
-
-    Opens an interprocess event handle exported from another process with
-    :py:obj:`~.cudaIpcGetEventHandle`. This function returns a
-    :py:obj:`~.cudaEvent_t` that behaves like a locally created event with
-    the :py:obj:`~.cudaEventDisableTiming` flag specified. This event must
-    be freed with :py:obj:`~.cudaEventDestroy`.
-
-    Performing operations on the imported event after the exported event
-    has been freed with :py:obj:`~.cudaEventDestroy` will result in
-    undefined behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
-    :py:obj:`~.cudaDevAttrIpcEventSupport`
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.cudaIpcEventHandle_t`
-        Interprocess handle to open
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUninitialized`
-    event : :py:obj:`~.cudaEvent_t`
-        Returns the imported event
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcOpenEventHandle`
-    """
-    cdef cudaEvent_t event = cudaEvent_t()
-    err = cyruntime.cudaIpcOpenEventHandle(<cyruntime.cudaEvent_t*>event._ptr, handle._ptr[0])
-    return (cudaError_t(err), event)
-{{endif}}
-
-{{if 'cudaIpcGetMemHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaIpcGetMemHandle(devPtr):
-    """ Gets an interprocess memory handle for an existing device memory allocation.
-
-    Takes a pointer to the base of an existing device memory allocation
-    created with :py:obj:`~.cudaMalloc` and exports it for use in another
-    process. This is a lightweight operation and may be called multiple
-    times on an allocation without adverse effects.
-
-    If a region of memory is freed with :py:obj:`~.cudaFree` and a
-    subsequent call to :py:obj:`~.cudaMalloc` returns memory with the same
-    device address, :py:obj:`~.cudaIpcGetMemHandle` will return a unique
-    handle for the new memory.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
-    :py:obj:`~.cudaDevAttrIpcEventSupport`
-
-    Parameters
-    ----------
-    devPtr : Any
-        Base pointer to previously allocated device memory
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-    handle : :py:obj:`~.cudaIpcMemHandle_t`
-        Pointer to user allocated :py:obj:`~.cudaIpcMemHandle` to return
-        the handle in.
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetMemHandle`
-    """
-    cdef cudaIpcMemHandle_t handle = cudaIpcMemHandle_t()
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaIpcGetMemHandle(<cyruntime.cudaIpcMemHandle_t*>handle._ptr, cydevPtr_ptr)
-    return (cudaError_t(err), handle)
-{{endif}}
-
-{{if 'cudaIpcOpenMemHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaIpcOpenMemHandle(handle not None : cudaIpcMemHandle_t, unsigned int flags):
-    """ Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process.
-
-    Maps memory exported from another process with
-    :py:obj:`~.cudaIpcGetMemHandle` into the current device address space.
-    For contexts on different devices :py:obj:`~.cudaIpcOpenMemHandle` can
-    attempt to enable peer access between the devices as if the user called
-    :py:obj:`~.cudaDeviceEnablePeerAccess`. This behavior is controlled by
-    the :py:obj:`~.cudaIpcMemLazyEnablePeerAccess` flag.
-    :py:obj:`~.cudaDeviceCanAccessPeer` can determine if a mapping is
-    possible.
-
-    :py:obj:`~.cudaIpcOpenMemHandle` can open handles to devices that may
-    not be visible in the process calling the API.
-
-    Contexts that may open :py:obj:`~.cudaIpcMemHandles` are restricted in
-    the following way. :py:obj:`~.cudaIpcMemHandles` from each device in a
-    given process may only be opened by one context per device per other
-    process.
-
-    If the memory handle has already been opened by the current context,
-    the reference count on the handle is incremented by 1 and the existing
-    device pointer is returned.
-
-    Memory returned from :py:obj:`~.cudaIpcOpenMemHandle` must be freed
-    with :py:obj:`~.cudaIpcCloseMemHandle`.
-
-    Calling :py:obj:`~.cudaFree` on an exported memory region before
-    calling :py:obj:`~.cudaIpcCloseMemHandle` in the importing context will
-    result in undefined behavior.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
-    :py:obj:`~.cudaDevAttrIpcEventSupport`
-
-    Parameters
-    ----------
-    handle : :py:obj:`~.cudaIpcMemHandle_t`
-        :py:obj:`~.cudaIpcMemHandle` to open
-    flags : unsigned int
-        Flags for this operation. Must be specified as
-        :py:obj:`~.cudaIpcMemLazyEnablePeerAccess`
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorTooManyPeers`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-    devPtr : Any
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuIpcOpenMemHandle`
-
-    Notes
-    -----
-    No guarantees are made about the address returned in `*devPtr`. 
-     In particular, multiple processes may not receive the same address for the same `handle`.
-    """
-    cdef void_ptr devPtr = 0
-    err = cyruntime.cudaIpcOpenMemHandle(<void**>&devPtr, handle._ptr[0], flags)
-    return (cudaError_t(err), devPtr)
-{{endif}}
-
-{{if 'cudaIpcCloseMemHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaIpcCloseMemHandle(devPtr):
-    """ Attempts to close memory mapped with cudaIpcOpenMemHandle.
-
-    Decrements the reference count of the memory returnd by
-    :py:obj:`~.cudaIpcOpenMemHandle` by 1. When the reference count reaches
-    0, this API unmaps the memory. The original allocation in the exporting
-    process as well as imported mappings in other processes will be
-    unaffected.
-
-    Any resources used to enable peer access will be freed if this is the
-    last mapping using them.
-
-    IPC functionality is restricted to devices with support for unified
-    addressing on Linux and Windows operating systems. IPC functionality on
-    Windows is supported for compatibility purposes but not recommended as
-    it comes with performance cost. Users can test their device for IPC
-    functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
-    :py:obj:`~.cudaDevAttrIpcEventSupport`
-
-    Parameters
-    ----------
-    devPtr : Any
-        Device pointer returned by :py:obj:`~.cudaIpcOpenMemHandle`
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaIpcCloseMemHandle(cydevPtr_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceFlushGPUDirectRDMAWrites' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceFlushGPUDirectRDMAWrites(target not None : cudaFlushGPUDirectRDMAWritesTarget, scope not None : cudaFlushGPUDirectRDMAWritesScope):
-    """ Blocks until remote writes are visible to the specified scope.
-
-    Blocks until remote writes to the target context via mappings created
-    through GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
-    https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are
-    visible to the specified scope.
-
-    If the scope equals or lies within the scope indicated by
-    :py:obj:`~.cudaDevAttrGPUDirectRDMAWritesOrdering`, the call will be a
-    no-op and can be safely omitted for performance. This can be determined
-    by comparing the numerical values between the two enums, with smaller
-    scopes having smaller values.
-
-    Users may query support for this API via
-    :py:obj:`~.cudaDevAttrGPUDirectRDMAFlushWritesOptions`.
-
-    Parameters
-    ----------
-    target : :py:obj:`~.cudaFlushGPUDirectRDMAWritesTarget`
-        The target of the operation, see cudaFlushGPUDirectRDMAWritesTarget
-    scope : :py:obj:`~.cudaFlushGPUDirectRDMAWritesScope`
-        The scope of the operation, see cudaFlushGPUDirectRDMAWritesScope
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`,
-
-    See Also
-    --------
-    :py:obj:`~.cuFlushGPUDirectRDMAWrites`
-    """
-    cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget = target.value
-    cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope = scope.value
-    err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData):
-    """ Registers a callback function to receive async notifications.
-
-    Registers `callbackFunc` to receive async notifications.
-
-    The `userData` parameter is passed to the callback function at async
-    notification time. Likewise, `callback` is also passed to the callback
-    function to distinguish between multiple registered callbacks.
-
-    The callback function being registered should be designed to return
-    quickly (~10ms). Any long running tasks should be queued for execution
-    on an application thread.
-
-    Callbacks may not call cudaDeviceRegisterAsyncNotification or
-    cudaDeviceUnregisterAsyncNotification. Doing so will result in
-    :py:obj:`~.cudaErrorNotPermitted`. Async notification callbacks execute
-    in an undefined order and may be serialized.
-
-    Returns in `*callback` a handle representing the registered callback
-    instance.
-
-    Parameters
-    ----------
-    device : int
-        The device on which to register the callback
-    callbackFunc : :py:obj:`~.cudaAsyncCallback`
-        The function to register as a callback
-    userData : Any
-        A generic pointer to user data. This is passed into the callback
-        function.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`
-    callback : :py:obj:`~.cudaAsyncCallbackHandle_t`
-        A handle representing the registered callback instance
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceUnregisterAsyncNotification`
-    """
-    cdef cyruntime.cudaAsyncCallback cycallbackFunc
-    if callbackFunc is None:
-        cycallbackFunc = <cyruntime.cudaAsyncCallback><void_ptr>0
-    elif isinstance(callbackFunc, (cudaAsyncCallback,)):
-        pcallbackFunc = int(callbackFunc)
-        cycallbackFunc = <cyruntime.cudaAsyncCallback><void_ptr>pcallbackFunc
-    else:
-        pcallbackFunc = int(cudaAsyncCallback(callbackFunc))
-        cycallbackFunc = <cyruntime.cudaAsyncCallback><void_ptr>pcallbackFunc
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    cdef cudaAsyncCallbackHandle_t callback = cudaAsyncCallbackHandle_t()
-    err = cyruntime.cudaDeviceRegisterAsyncNotification(device, cycallbackFunc, cyuserData_ptr, <cyruntime.cudaAsyncCallbackHandle_t*>callback._ptr)
-    return (cudaError_t(err), callback)
-{{endif}}
-
-{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceUnregisterAsyncNotification(int device, callback):
-    """ Unregisters an async notification callback.
-
-    Unregisters `callback` so that the corresponding callback function will
-    stop receiving async notifications.
-
-    Parameters
-    ----------
-    device : int
-        The device from which to remove `callback`.
-    callback : :py:obj:`~.cudaAsyncCallbackHandle_t`
-        The callback instance to unregister from receiving async
-        notifications.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceRegisterAsyncNotification`
-    """
-    cdef cyruntime.cudaAsyncCallbackHandle_t cycallback
-    if callback is None:
-        cycallback = <cyruntime.cudaAsyncCallbackHandle_t><void_ptr>0
-    elif isinstance(callback, (cudaAsyncCallbackHandle_t,)):
-        pcallback = int(callback)
-        cycallback = <cyruntime.cudaAsyncCallbackHandle_t><void_ptr>pcallback
-    else:
-        pcallback = int(cudaAsyncCallbackHandle_t(callback))
-        cycallback = <cyruntime.cudaAsyncCallbackHandle_t><void_ptr>pcallback
-    err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetSharedMemConfig():
-    """ Returns the shared memory configuration for the current device.
-
-    [Deprecated]
-
-    This function will return in `pConfig` the current size of shared
-    memory banks on the current device. On devices with configurable shared
-    memory banks, :py:obj:`~.cudaDeviceSetSharedMemConfig` can be used to
-    change this setting, so that all subsequent kernel launches will by
-    default use the new bank size. When
-    :py:obj:`~.cudaDeviceGetSharedMemConfig` is called on devices without
-    configurable shared memory, it will return the fixed bank size of the
-    hardware.
-
-    The returned bank configurations can be either:
-
-    - :py:obj:`~.cudaSharedMemBankSizeFourByte` - shared memory bank width
-      is four bytes.
-
-    - :py:obj:`~.cudaSharedMemBankSizeEightByte` - shared memory bank width
-      is eight bytes.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pConfig : :py:obj:`~.cudaSharedMemConfig`
-        Returned cache configuration
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig`
-    """
-    cdef cyruntime.cudaSharedMemConfig pConfig
-    err = cyruntime.cudaDeviceGetSharedMemConfig(&pConfig)
-    return (cudaError_t(err), cudaSharedMemConfig(pConfig))
-{{endif}}
-
-{{if 'cudaDeviceSetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSetSharedMemConfig(config not None : cudaSharedMemConfig):
-    """ Sets the shared memory configuration for the current device.
-
-    [Deprecated]
-
-    On devices with configurable shared memory banks, this function will
-    set the shared memory bank size which is used for all subsequent kernel
-    launches. Any per-function setting of shared memory set via
-    :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide
-    setting.
-
-    Changing the shared memory configuration between launches may introduce
-    a device side synchronization point.
-
-    Changing the shared memory bank size will not increase shared memory
-    usage or affect occupancy of kernels, but may have major effects on
-    performance. Larger bank sizes will allow for greater potential
-    bandwidth to shared memory, but will change what kinds of accesses to
-    shared memory will result in bank conflicts.
-
-    This function will do nothing on devices with fixed shared memory bank
-    size.
-
-    The supported bank configurations are:
-
-    - :py:obj:`~.cudaSharedMemBankSizeDefault`: set bank width the device
-      default (currently, four bytes)
-
-    - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank
-      width to be four bytes natively.
-
-    - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank
-      width to be eight bytes natively.
-
-    Parameters
-    ----------
-    config : :py:obj:`~.cudaSharedMemConfig`
-        Requested cache configuration
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxSetSharedMemConfig`
-    """
-    cdef cyruntime.cudaSharedMemConfig cyconfig = config.value
-    err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetLastError' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetLastError():
-    """ Returns the last error from a runtime call.
-
-    Returns the last error that has been produced by any of the runtime
-    calls in the same instance of the CUDA Runtime library in the host
-    thread and resets it to :py:obj:`~.cudaSuccess`.
-
-    Note: Multiple instances of the CUDA Runtime library can be present in
-    an application when using a library that statically links the CUDA
-    Runtime.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`
-
-    See Also
-    --------
-    :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`
-    """
-    err = cyruntime.cudaGetLastError()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaPeekAtLastError' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaPeekAtLastError():
-    """ Returns the last error from a runtime call.
-
-    Returns the last error that has been produced by any of the runtime
-    calls in the same instance of the CUDA Runtime library in the host
-    thread. This call does not reset the error to :py:obj:`~.cudaSuccess`
-    like :py:obj:`~.cudaGetLastError()`.
-
-    Note: Multiple instances of the CUDA Runtime library can be present in
-    an application when using a library that statically links the CUDA
-    Runtime.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`
-    """
-    err = cyruntime.cudaPeekAtLastError()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetErrorName' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetErrorName(error not None : cudaError_t):
-    """ Returns the string representation of an error code enum name.
-
-    Returns a string containing the name of an error code in the enum. If
-    the error code is not recognized, "unrecognized error code" is
-    returned.
-
-    Parameters
-    ----------
-    error : :py:obj:`~.cudaError_t`
-        Error code to convert to string
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    bytes
-        `char*` pointer to a NULL-terminated string
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorName`
-    """
-    cdef cyruntime.cudaError_t cyerror = error.value
-    err = cyruntime.cudaGetErrorName(cyerror)
-    return (cudaError_t.cudaSuccess, err)
-{{endif}}
-
-{{if 'cudaGetErrorString' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetErrorString(error not None : cudaError_t):
-    """ Returns the description string for an error code.
-
-    Returns the description string for an error code. If the error code is
-    not recognized, "unrecognized error code" is returned.
-
-    Parameters
-    ----------
-    error : :py:obj:`~.cudaError_t`
-        Error code to convert to string
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    bytes
-        `char*` pointer to a NULL-terminated string
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorString`
-    """
-    cdef cyruntime.cudaError_t cyerror = error.value
-    err = cyruntime.cudaGetErrorString(cyerror)
-    return (cudaError_t.cudaSuccess, err)
-{{endif}}
-
-{{if 'cudaGetDeviceCount' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDeviceCount():
-    """ Returns the number of compute-capable devices.
-
-    Returns in `*count` the number of devices with compute capability
-    greater or equal to 2.0 that are available for execution.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-    count : int
-        Returns the number of devices with compute capability greater or
-        equal to 2.0
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetCount`
-    """
-    cdef int count = 0
-    err = cyruntime.cudaGetDeviceCount(&count)
-    return (cudaError_t(err), count)
-{{endif}}
-
-{{if 'cudaGetDeviceProperties_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDeviceProperties(int device):
-    """ Returns information about the compute-device.
-
-    Returns in `*prop` the properties of device `dev`. The
-    :py:obj:`~.cudaDeviceProp` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.name[256]` is an ASCII string identifying the device.
-
-    - :py:obj:`~.uuid` is a 16-byte unique identifier.
-
-    - :py:obj:`~.totalGlobalMem` is the total amount of global memory
-      available on the device in bytes.
-
-    - :py:obj:`~.sharedMemPerBlock` is the maximum amount of shared memory
-      available to a thread block in bytes.
-
-    - :py:obj:`~.regsPerBlock` is the maximum number of 32-bit registers
-      available to a thread block.
-
-    - :py:obj:`~.warpSize` is the warp size in threads.
-
-    - :py:obj:`~.memPitch` is the maximum pitch in bytes allowed by the
-      memory copy functions that involve memory regions allocated through
-      :py:obj:`~.cudaMallocPitch()`.
-
-    - :py:obj:`~.maxThreadsPerBlock` is the maximum number of threads per
-      block.
-
-    - :py:obj:`~.maxThreadsDim[3]` contains the maximum size of each
-      dimension of a block.
-
-    - :py:obj:`~.maxGridSize[3]` contains the maximum size of each
-      dimension of a grid.
-
-    - :py:obj:`~.clockRate` is the clock frequency in kilohertz.
-
-    - :py:obj:`~.totalConstMem` is the total amount of constant memory
-      available on the device in bytes.
-
-    - :py:obj:`~.major`, :py:obj:`~.minor` are the major and minor revision
-      numbers defining the device's compute capability.
-
-    - :py:obj:`~.textureAlignment` is the alignment requirement; texture
-      base addresses that are aligned to :py:obj:`~.textureAlignment` bytes
-      do not need an offset applied to texture fetches.
-
-    - :py:obj:`~.texturePitchAlignment` is the pitch alignment requirement
-      for 2D texture references that are bound to pitched memory.
-
-    - :py:obj:`~.deviceOverlap` is 1 if the device can concurrently copy
-      memory between host and device while executing a kernel, or 0 if not.
-      Deprecated, use instead asyncEngineCount.
-
-    - :py:obj:`~.multiProcessorCount` is the number of multiprocessors on
-      the device.
-
-    - :py:obj:`~.kernelExecTimeoutEnabled` is 1 if there is a run time
-      limit for kernels executed on the device, or 0 if not.
-
-    - :py:obj:`~.integrated` is 1 if the device is an integrated
-      (motherboard) GPU and 0 if it is a discrete (card) component.
-
-    - :py:obj:`~.canMapHostMemory` is 1 if the device can map host memory
-      into the CUDA address space for use with
-      :py:obj:`~.cudaHostAlloc()`/:py:obj:`~.cudaHostGetDevicePointer()`,
-      or 0 if not.
-
-    - :py:obj:`~.computeMode` is the compute mode that the device is
-      currently in. Available modes are as follows:
-
-      - cudaComputeModeDefault: Default mode - Device is not restricted and
-        multiple threads can use :py:obj:`~.cudaSetDevice()` with this
-        device.
-
-      - cudaComputeModeProhibited: Compute-prohibited mode - No threads can
-        use :py:obj:`~.cudaSetDevice()` with this device.
-
-      - cudaComputeModeExclusiveProcess: Compute-exclusive-process mode -
-        Many threads in one process will be able to use
-        :py:obj:`~.cudaSetDevice()` with this device.   When an occupied
-        exclusive mode device is chosen with :py:obj:`~.cudaSetDevice`, all
-        subsequent non-device management runtime functions will return
-        :py:obj:`~.cudaErrorDevicesUnavailable`.
-
-    - :py:obj:`~.maxTexture1D` is the maximum 1D texture size.
-
-    - :py:obj:`~.maxTexture1DMipmap` is the maximum 1D mipmapped texture
-      texture size.
-
-    - :py:obj:`~.maxTexture1DLinear` is the maximum 1D texture size for
-      textures bound to linear memory.
-
-    - :py:obj:`~.maxTexture2D[2]` contains the maximum 2D texture
-      dimensions.
-
-    - :py:obj:`~.maxTexture2DMipmap[2]` contains the maximum 2D mipmapped
-      texture dimensions.
-
-    - :py:obj:`~.maxTexture2DLinear[3]` contains the maximum 2D texture
-      dimensions for 2D textures bound to pitch linear memory.
-
-    - :py:obj:`~.maxTexture2DGather[2]` contains the maximum 2D texture
-      dimensions if texture gather operations have to be performed.
-
-    - :py:obj:`~.maxTexture3D[3]` contains the maximum 3D texture
-      dimensions.
-
-    - :py:obj:`~.maxTexture3DAlt[3]` contains the maximum alternate 3D
-      texture dimensions.
-
-    - :py:obj:`~.maxTextureCubemap` is the maximum cubemap texture width or
-      height.
-
-    - :py:obj:`~.maxTexture1DLayered[2]` contains the maximum 1D layered
-      texture dimensions.
-
-    - :py:obj:`~.maxTexture2DLayered[3]` contains the maximum 2D layered
-      texture dimensions.
-
-    - :py:obj:`~.maxTextureCubemapLayered[2]` contains the maximum cubemap
-      layered texture dimensions.
-
-    - :py:obj:`~.maxSurface1D` is the maximum 1D surface size.
-
-    - :py:obj:`~.maxSurface2D[2]` contains the maximum 2D surface
-      dimensions.
-
-    - :py:obj:`~.maxSurface3D[3]` contains the maximum 3D surface
-      dimensions.
-
-    - :py:obj:`~.maxSurface1DLayered[2]` contains the maximum 1D layered
-      surface dimensions.
-
-    - :py:obj:`~.maxSurface2DLayered[3]` contains the maximum 2D layered
-      surface dimensions.
-
-    - :py:obj:`~.maxSurfaceCubemap` is the maximum cubemap surface width or
-      height.
-
-    - :py:obj:`~.maxSurfaceCubemapLayered[2]` contains the maximum cubemap
-      layered surface dimensions.
-
-    - :py:obj:`~.surfaceAlignment` specifies the alignment requirements for
-      surfaces.
-
-    - :py:obj:`~.concurrentKernels` is 1 if the device supports executing
-      multiple kernels within the same context simultaneously, or 0 if not.
-      It is not guaranteed that multiple kernels will be resident on the
-      device concurrently so this feature should not be relied upon for
-      correctness.
-
-    - :py:obj:`~.ECCEnabled` is 1 if the device has ECC support turned on,
-      or 0 if not.
-
-    - :py:obj:`~.pciBusID` is the PCI bus identifier of the device.
-
-    - :py:obj:`~.pciDeviceID` is the PCI device (sometimes called slot)
-      identifier of the device.
-
-    - :py:obj:`~.pciDomainID` is the PCI domain identifier of the device.
-
-    - :py:obj:`~.tccDriver` is 1 if the device is using a TCC driver or 0
-      if not.
-
-    - :py:obj:`~.asyncEngineCount` is 1 when the device can concurrently
-      copy memory between host and device while executing a kernel. It is 2
-      when the device can concurrently copy memory between host and device
-      in both directions and execute a kernel at the same time. It is 0 if
-      neither of these is supported.
-
-    - :py:obj:`~.unifiedAddressing` is 1 if the device shares a unified
-      address space with the host and 0 otherwise.
-
-    - :py:obj:`~.memoryClockRate` is the peak memory clock frequency in
-      kilohertz.
-
-    - :py:obj:`~.memoryBusWidth` is the memory bus width   in bits.
-
-    - :py:obj:`~.l2CacheSize` is L2 cache size in bytes.
-
-    - :py:obj:`~.persistingL2CacheMaxSize` is L2 cache's maximum persisting
-      lines size in bytes.
-
-    - :py:obj:`~.maxThreadsPerMultiProcessor`   is the number of maximum
-      resident threads per multiprocessor.
-
-    - :py:obj:`~.streamPrioritiesSupported` is 1 if the device supports
-      stream priorities, or 0 if it is not supported.
-
-    - :py:obj:`~.globalL1CacheSupported` is 1 if the device supports
-      caching of globals in L1 cache, or 0 if it is not supported.
-
-    - :py:obj:`~.localL1CacheSupported` is 1 if the device supports caching
-      of locals in L1 cache, or 0 if it is not supported.
-
-    - :py:obj:`~.sharedMemPerMultiprocessor` is the maximum amount of
-      shared memory available to a multiprocessor in bytes; this amount is
-      shared by all thread blocks simultaneously resident on a
-      multiprocessor.
-
-    - :py:obj:`~.regsPerMultiprocessor` is the maximum number of 32-bit
-      registers available to a multiprocessor; this number is shared by all
-      thread blocks simultaneously resident on a multiprocessor.
-
-    - :py:obj:`~.managedMemory` is 1 if the device supports allocating
-      managed memory on this system, or 0 if it is not supported.
-
-    - :py:obj:`~.isMultiGpuBoard` is 1 if the device is on a multi-GPU
-      board (e.g. Gemini cards), and 0 if not;
-
-    - :py:obj:`~.multiGpuBoardGroupID` is a unique identifier for a group
-      of devices associated with the same board. Devices on the same multi-
-      GPU board will share the same identifier.
-
-    - :py:obj:`~.hostNativeAtomicSupported` is 1 if the link between the
-      device and the host supports native atomic operations, or 0 if it is
-      not supported.
-
-    - :py:obj:`~.singleToDoublePrecisionPerfRatio`   is the ratio of single
-      precision performance (in floating-point operations per second) to
-      double precision performance.
-
-    - :py:obj:`~.pageableMemoryAccess` is 1 if the device supports
-      coherently accessing pageable memory without calling cudaHostRegister
-      on it, and 0 otherwise.
-
-    - :py:obj:`~.concurrentManagedAccess` is 1 if the device can coherently
-      access managed memory concurrently with the CPU, and 0 otherwise.
-
-    - :py:obj:`~.computePreemptionSupported` is 1 if the device supports
-      Compute Preemption, and 0 otherwise.
-
-    - :py:obj:`~.canUseHostPointerForRegisteredMem` is 1 if the device can
-      access host registered memory at the same virtual address as the CPU,
-      and 0 otherwise.
-
-    - :py:obj:`~.cooperativeLaunch` is 1 if the device supports launching
-      cooperative kernels via :py:obj:`~.cudaLaunchCooperativeKernel`, and
-      0 otherwise.
-
-    - :py:obj:`~.cooperativeMultiDeviceLaunch` is 1 if the device supports
-      launching cooperative kernels via
-      :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice`, and 0 otherwise.
-
-    - :py:obj:`~.sharedMemPerBlockOptin` is the per device maximum shared
-      memory per block usable by special opt in
-
-    - :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is 1 if the device
-      accesses pageable memory via the host's page tables, and 0 otherwise.
-
-    - :py:obj:`~.directManagedMemAccessFromHost` is 1 if the host can
-      directly access managed memory on the device without migration, and 0
-      otherwise.
-
-    - :py:obj:`~.maxBlocksPerMultiProcessor` is the maximum number of
-      thread blocks that can reside on a multiprocessor.
-
-    - :py:obj:`~.accessPolicyMaxWindowSize` is the maximum value of
-      :py:obj:`~.cudaAccessPolicyWindow.num_bytes`.
-
-    - :py:obj:`~.reservedSharedMemPerBlock` is the shared memory reserved
-      by CUDA driver per block in bytes
-
-    - :py:obj:`~.hostRegisterSupported` is 1 if the device supports host
-      memory registration via :py:obj:`~.cudaHostRegister`, and 0
-      otherwise.
-
-    - :py:obj:`~.sparseCudaArraySupported` is 1 if the device supports
-      sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise
-
-    - :py:obj:`~.hostRegisterReadOnlySupported` is 1 if the device supports
-      using the :py:obj:`~.cudaHostRegister` flag cudaHostRegisterReadOnly
-      to register memory that must be mapped as read-only to the GPU
-
-    - :py:obj:`~.timelineSemaphoreInteropSupported` is 1 if external
-      timeline semaphore interop is supported on the device, 0 otherwise
-
-    - :py:obj:`~.memoryPoolsSupported` is 1 if the device supports using
-      the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise
-
-    - :py:obj:`~.gpuDirectRDMASupported` is 1 if the device supports
-      GPUDirect RDMA APIs, 0 otherwise
-
-    - :py:obj:`~.gpuDirectRDMAFlushWritesOptions` is a bitmask to be
-      interpreted according to the
-      :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum
-
-    - :py:obj:`~.gpuDirectRDMAWritesOrdering` See the
-      :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` enum for numerical values
-
-    - :py:obj:`~.memoryPoolSupportedHandleTypes` is a bitmask of handle
-      types supported with mempool-based IPC
-
-    - :py:obj:`~.deferredMappingCudaArraySupported` is 1 if the device
-      supports deferred mapping CUDA arrays and CUDA mipmapped arrays
-
-    - :py:obj:`~.ipcEventSupported` is 1 if the device supports IPC Events,
-      and 0 otherwise
-
-    - :py:obj:`~.unifiedFunctionPointers` is 1 if the device support
-      unified pointers, and 0 otherwise
-
-    Parameters
-    ----------
-    device : int
-        Device number to get properties for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
-    prop : :py:obj:`~.cudaDeviceProp`
-        Properties for the specified device
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName`
-    """
-    cdef cudaDeviceProp prop = cudaDeviceProp()
-    err = cyruntime.cudaGetDeviceProperties(<cyruntime.cudaDeviceProp*>prop._ptr, device)
-    return (cudaError_t(err), prop)
-{{endif}}
-
-{{if 'cudaDeviceGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device):
-    """ Returns information about the device.
-
-    Returns in `*value` the integer value of the attribute `attr` on device
-    `device`. The supported attributes are:
-
-    - :py:obj:`~.cudaDevAttrMaxThreadsPerBlock`: Maximum number of threads
-      per block
-
-    - :py:obj:`~.cudaDevAttrMaxBlockDimX`: Maximum x-dimension of a block
-
-    - :py:obj:`~.cudaDevAttrMaxBlockDimY`: Maximum y-dimension of a block
-
-    - :py:obj:`~.cudaDevAttrMaxBlockDimZ`: Maximum z-dimension of a block
-
-    - :py:obj:`~.cudaDevAttrMaxGridDimX`: Maximum x-dimension of a grid
-
-    - :py:obj:`~.cudaDevAttrMaxGridDimY`: Maximum y-dimension of a grid
-
-    - :py:obj:`~.cudaDevAttrMaxGridDimZ`: Maximum z-dimension of a grid
-
-    - :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlock`: Maximum amount of
-      shared memory available to a thread block in bytes
-
-    - :py:obj:`~.cudaDevAttrTotalConstantMemory`: Memory available on
-      device for constant variables in a CUDA C kernel in bytes
-
-    - :py:obj:`~.cudaDevAttrWarpSize`: Warp size in threads
-
-    - :py:obj:`~.cudaDevAttrMaxPitch`: Maximum pitch in bytes allowed by
-      the memory copy functions that involve memory regions allocated
-      through :py:obj:`~.cudaMallocPitch()`
-
-    - :py:obj:`~.cudaDevAttrMaxTexture1DWidth`: Maximum 1D texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture1DLinearWidth`: Maximum width for a
-      1D texture bound to linear memory
-
-    - :py:obj:`~.cudaDevAttrMaxTexture1DMipmappedWidth`: Maximum mipmapped
-      1D texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DWidth`: Maximum 2D texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DHeight`: Maximum 2D texture height
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLinearWidth`: Maximum width for a
-      2D texture bound to linear memory
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLinearHeight`: Maximum height for a
-      2D texture bound to linear memory
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLinearPitch`: Maximum pitch in
-      bytes for a 2D texture bound to linear memory
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DMipmappedWidth`: Maximum mipmapped
-      2D texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DMipmappedHeight`: Maximum mipmapped
-      2D texture height
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DWidth`: Maximum 3D texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DHeight`: Maximum 3D texture height
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DDepth`: Maximum 3D texture depth
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DWidthAlt`: Alternate maximum 3D
-      texture width, 0 if no alternate maximum 3D texture size is supported
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DHeightAlt`: Alternate maximum 3D
-      texture height, 0 if no alternate maximum 3D texture size is
-      supported
-
-    - :py:obj:`~.cudaDevAttrMaxTexture3DDepthAlt`: Alternate maximum 3D
-      texture depth, 0 if no alternate maximum 3D texture size is supported
-
-    - :py:obj:`~.cudaDevAttrMaxTextureCubemapWidth`: Maximum cubemap
-      texture width or height
-
-    - :py:obj:`~.cudaDevAttrMaxTexture1DLayeredWidth`: Maximum 1D layered
-      texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture1DLayeredLayers`: Maximum layers in a
-      1D layered texture
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLayeredWidth`: Maximum 2D layered
-      texture width
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLayeredHeight`: Maximum 2D layered
-      texture height
-
-    - :py:obj:`~.cudaDevAttrMaxTexture2DLayeredLayers`: Maximum layers in a
-      2D layered texture
-
-    - :py:obj:`~.cudaDevAttrMaxTextureCubemapLayeredWidth`: Maximum cubemap
-      layered texture width or height
-
-    - :py:obj:`~.cudaDevAttrMaxTextureCubemapLayeredLayers`: Maximum layers
-      in a cubemap layered texture
-
-    - :py:obj:`~.cudaDevAttrMaxSurface1DWidth`: Maximum 1D surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurface2DWidth`: Maximum 2D surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurface2DHeight`: Maximum 2D surface height
-
-    - :py:obj:`~.cudaDevAttrMaxSurface3DWidth`: Maximum 3D surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurface3DHeight`: Maximum 3D surface height
-
-    - :py:obj:`~.cudaDevAttrMaxSurface3DDepth`: Maximum 3D surface depth
-
-    - :py:obj:`~.cudaDevAttrMaxSurface1DLayeredWidth`: Maximum 1D layered
-      surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurface1DLayeredLayers`: Maximum layers in a
-      1D layered surface
-
-    - :py:obj:`~.cudaDevAttrMaxSurface2DLayeredWidth`: Maximum 2D layered
-      surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurface2DLayeredHeight`: Maximum 2D layered
-      surface height
-
-    - :py:obj:`~.cudaDevAttrMaxSurface2DLayeredLayers`: Maximum layers in a
-      2D layered surface
-
-    - :py:obj:`~.cudaDevAttrMaxSurfaceCubemapWidth`: Maximum cubemap
-      surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurfaceCubemapLayeredWidth`: Maximum cubemap
-      layered surface width
-
-    - :py:obj:`~.cudaDevAttrMaxSurfaceCubemapLayeredLayers`: Maximum layers
-      in a cubemap layered surface
-
-    - :py:obj:`~.cudaDevAttrMaxRegistersPerBlock`: Maximum number of 32-bit
-      registers available to a thread block
-
-    - :py:obj:`~.cudaDevAttrClockRate`: Peak clock frequency in kilohertz
-
-    - :py:obj:`~.cudaDevAttrTextureAlignment`: Alignment requirement;
-      texture base addresses aligned to :py:obj:`~.textureAlign` bytes do
-      not need an offset applied to texture fetches
-
-    - :py:obj:`~.cudaDevAttrTexturePitchAlignment`: Pitch alignment
-      requirement for 2D texture references bound to pitched memory
-
-    - :py:obj:`~.cudaDevAttrGpuOverlap`: 1 if the device can concurrently
-      copy memory between host and device while executing a kernel, or 0 if
-      not
-
-    - :py:obj:`~.cudaDevAttrMultiProcessorCount`: Number of multiprocessors
-      on the device
-
-    - :py:obj:`~.cudaDevAttrKernelExecTimeout`: 1 if there is a run time
-      limit for kernels executed on the device, or 0 if not
-
-    - :py:obj:`~.cudaDevAttrIntegrated`: 1 if the device is integrated with
-      the memory subsystem, or 0 if not
-
-    - :py:obj:`~.cudaDevAttrCanMapHostMemory`: 1 if the device can map host
-      memory into the CUDA address space, or 0 if not
-
-    - :py:obj:`~.cudaDevAttrComputeMode`: Compute mode is the compute mode
-      that the device is currently in. Available modes are as follows:
-
-      - :py:obj:`~.cudaComputeModeDefault`: Default mode - Device is not
-        restricted and multiple threads can use :py:obj:`~.cudaSetDevice()`
-        with this device.
-
-      - :py:obj:`~.cudaComputeModeProhibited`: Compute-prohibited mode - No
-        threads can use :py:obj:`~.cudaSetDevice()` with this device.
-
-      - :py:obj:`~.cudaComputeModeExclusiveProcess`: Compute-exclusive-
-        process mode - Many threads in one process will be able to use
-        :py:obj:`~.cudaSetDevice()` with this device.
-
-    - :py:obj:`~.cudaDevAttrConcurrentKernels`: 1 if the device supports
-      executing multiple kernels within the same context simultaneously, or
-      0 if not. It is not guaranteed that multiple kernels will be resident
-      on the device concurrently so this feature should not be relied upon
-      for correctness.
-
-    - :py:obj:`~.cudaDevAttrEccEnabled`: 1 if error correction is enabled
-      on the device, 0 if error correction is disabled or not supported by
-      the device
-
-    - :py:obj:`~.cudaDevAttrPciBusId`: PCI bus identifier of the device
-
-    - :py:obj:`~.cudaDevAttrPciDeviceId`: PCI device (also known as slot)
-      identifier of the device
-
-    - :py:obj:`~.cudaDevAttrTccDriver`: 1 if the device is using a TCC
-      driver. TCC is only available on Tesla hardware running Windows Vista
-      or later.
-
-    - :py:obj:`~.cudaDevAttrMemoryClockRate`: Peak memory clock frequency
-      in kilohertz
-
-    - :py:obj:`~.cudaDevAttrGlobalMemoryBusWidth`: Global memory bus width
-      in bits
-
-    - :py:obj:`~.cudaDevAttrL2CacheSize`: Size of L2 cache in bytes. 0 if
-      the device doesn't have L2 cache.
-
-    - :py:obj:`~.cudaDevAttrMaxThreadsPerMultiProcessor`: Maximum resident
-      threads per multiprocessor
-
-    - :py:obj:`~.cudaDevAttrUnifiedAddressing`: 1 if the device shares a
-      unified address space with the host, or 0 if not
-
-    - :py:obj:`~.cudaDevAttrComputeCapabilityMajor`: Major compute
-      capability version number
-
-    - :py:obj:`~.cudaDevAttrComputeCapabilityMinor`: Minor compute
-      capability version number
-
-    - :py:obj:`~.cudaDevAttrStreamPrioritiesSupported`: 1 if the device
-      supports stream priorities, or 0 if not
-
-    - :py:obj:`~.cudaDevAttrGlobalL1CacheSupported`: 1 if device supports
-      caching globals in L1 cache, 0 if not
-
-    - :py:obj:`~.cudaDevAttrLocalL1CacheSupported`: 1 if device supports
-      caching locals in L1 cache, 0 if not
-
-    - :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`: Maximum
-      amount of shared memory available to a multiprocessor in bytes; this
-      amount is shared by all thread blocks simultaneously resident on a
-      multiprocessor
-
-    - :py:obj:`~.cudaDevAttrMaxRegistersPerMultiprocessor`: Maximum number
-      of 32-bit registers available to a multiprocessor; this number is
-      shared by all thread blocks simultaneously resident on a
-      multiprocessor
-
-    - :py:obj:`~.cudaDevAttrManagedMemory`: 1 if device supports allocating
-      managed memory, 0 if not
-
-    - :py:obj:`~.cudaDevAttrIsMultiGpuBoard`: 1 if device is on a multi-GPU
-      board, 0 if not
-
-    - :py:obj:`~.cudaDevAttrMultiGpuBoardGroupID`: Unique identifier for a
-      group of devices on the same multi-GPU board
-
-    - :py:obj:`~.cudaDevAttrHostNativeAtomicSupported`: 1 if the link
-      between the device and the host supports native atomic operations
-
-    - :py:obj:`~.cudaDevAttrSingleToDoublePrecisionPerfRatio`: Ratio of
-      single precision performance (in floating-point operations per
-      second) to double precision performance
-
-    - :py:obj:`~.cudaDevAttrPageableMemoryAccess`: 1 if the device supports
-      coherently accessing pageable memory without calling cudaHostRegister
-      on it, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrConcurrentManagedAccess`: 1 if the device can
-      coherently access managed memory concurrently with the CPU, and 0
-      otherwise
-
-    - :py:obj:`~.cudaDevAttrComputePreemptionSupported`: 1 if the device
-      supports Compute Preemption, 0 if not
-
-    - :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`: 1 if the
-      device can access host registered memory at the same virtual address
-      as the CPU, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrCooperativeLaunch`: 1 if the device supports
-      launching cooperative kernels via
-      :py:obj:`~.cudaLaunchCooperativeKernel`, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrCooperativeMultiDeviceLaunch`: 1 if the device
-      supports launching cooperative kernels via
-      :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice`, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrCanFlushRemoteWrites`: 1 if the device supports
-      flushing of outstanding remote writes, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrHostRegisterSupported`: 1 if the device
-      supports host memory registration via :py:obj:`~.cudaHostRegister`,
-      and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`: 1 if
-      the device accesses pageable memory via the host's page tables, and 0
-      otherwise
-
-    - :py:obj:`~.cudaDevAttrDirectManagedMemAccessFromHost`: 1 if the host
-      can directly access managed memory on the device without migration,
-      and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`: Maximum per
-      block shared memory size on the device. This value can be opted into
-      when using :py:obj:`~.cudaFuncSetAttribute`
-
-    - :py:obj:`~.cudaDevAttrMaxBlocksPerMultiprocessor`: Maximum number of
-      thread blocks that can reside on a multiprocessor
-
-    - :py:obj:`~.cudaDevAttrMaxPersistingL2CacheSize`: Maximum L2
-      persisting lines capacity setting in bytes
-
-    - :py:obj:`~.cudaDevAttrMaxAccessPolicyWindowSize`: Maximum value of
-      :py:obj:`~.cudaAccessPolicyWindow.num_bytes`
-
-    - :py:obj:`~.cudaDevAttrReservedSharedMemoryPerBlock`: Shared memory
-      reserved by CUDA driver per block in bytes
-
-    - :py:obj:`~.cudaDevAttrSparseCudaArraySupported`: 1 if the device
-      supports sparse CUDA arrays and sparse CUDA mipmapped arrays.
-
-    - :py:obj:`~.cudaDevAttrHostRegisterReadOnlySupported`: Device supports
-      using the :py:obj:`~.cudaHostRegister` flag cudaHostRegisterReadOnly
-      to register memory that must be mapped as read-only to the GPU
-
-    - :py:obj:`~.cudaDevAttrMemoryPoolsSupported`: 1 if the device supports
-      using the cudaMallocAsync and cudaMemPool family of APIs, and 0
-      otherwise
-
-    - :py:obj:`~.cudaDevAttrGPUDirectRDMASupported`: 1 if the device
-      supports GPUDirect RDMA APIs, and 0 otherwise
-
-    - :py:obj:`~.cudaDevAttrGPUDirectRDMAFlushWritesOptions`: bitmask to be
-      interpreted according to the
-      :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum
-
-    - :py:obj:`~.cudaDevAttrGPUDirectRDMAWritesOrdering`: see the
-      :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` enum for numerical values
-
-    - :py:obj:`~.cudaDevAttrMemoryPoolSupportedHandleTypes`: Bitmask of
-      handle types supported with mempool based IPC
-
-    - :py:obj:`~.cudaDevAttrDeferredMappingCudaArraySupported` : 1 if the
-      device supports deferred mapping CUDA arrays and CUDA mipmapped
-      arrays.
-
-    - :py:obj:`~.cudaDevAttrIpcEventSupport`: 1 if the device supports IPC
-      Events.
-
-    - :py:obj:`~.cudaDevAttrNumaConfig`: NUMA configuration of a device:
-      value is of type :py:obj:`~.cudaDeviceNumaConfig` enum
-
-    - :py:obj:`~.cudaDevAttrNumaId`: NUMA node ID of the GPU memory
-
-    Parameters
-    ----------
-    attr : :py:obj:`~.cudaDeviceAttr`
-        Device attribute to query
-    device : int
-        Device number to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
-    value : int
-        Returned device attribute value
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`
-    """
-    cdef int value = 0
-    cdef cyruntime.cudaDeviceAttr cyattr = attr.value
-    err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device)
-    return (cudaError_t(err), value)
-{{endif}}
-
-{{if 'cudaDeviceGetDefaultMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetDefaultMemPool(int device):
-    """ Returns the default mempool of a device.
-
-    The default mempool of a device contains device memory from that
-    device.
-
-    Parameters
-    ----------
-    device : int
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`
-    memPool : :py:obj:`~.cudaMemPool_t`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMemPoolTrimTo`, :py:obj:`~.cudaMemPoolGetAttribute`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolSetAccess`
-    """
-    cdef cudaMemPool_t memPool = cudaMemPool_t()
-    with nogil:
-        err = cyruntime.cudaDeviceGetDefaultMemPool(<cyruntime.cudaMemPool_t*>memPool._ptr, device)
-
-    return (cudaError_t(err), memPool)
-{{endif}}
-
-{{if 'cudaDeviceSetMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSetMemPool(int device, memPool):
-    """ Sets the current memory pool of a device.
-
-    The memory pool must be local to the specified device. Unless a mempool
-    is specified in the :py:obj:`~.cudaMallocAsync` call,
-    :py:obj:`~.cudaMallocAsync` allocates from the current mempool of the
-    provided stream's device. By default, a device's current memory pool is
-    its default memory pool.
-
-    Parameters
-    ----------
-    device : int
-        None
-    memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolDestroy`, :py:obj:`~.cudaMallocFromPoolAsync`
-
-    Notes
-    -----
-    Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    with nogil:
-        err = cyruntime.cudaDeviceSetMemPool(device, cymemPool)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetMemPool' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetMemPool(int device):
-    """ Gets the current mempool for a device.
-
-    Returns the last pool provided to :py:obj:`~.cudaDeviceSetMemPool` for
-    this device or the device's default memory pool if
-    :py:obj:`~.cudaDeviceSetMemPool` has never been called. By default the
-    current mempool is the default mempool for a device, otherwise the
-    returned pool must have been set with :py:obj:`~.cuDeviceSetMemPool` or
-    :py:obj:`~.cudaDeviceSetMemPool`.
-
-    Parameters
-    ----------
-    device : int
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`
-    memPool : :py:obj:`~.cudaMemPool_t`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceSetMemPool`
-    """
-    cdef cudaMemPool_t memPool = cudaMemPool_t()
-    with nogil:
-        err = cyruntime.cudaDeviceGetMemPool(<cyruntime.cudaMemPool_t*>memPool._ptr, device)
-
-    return (cudaError_t(err), memPool)
-{{endif}}
-
-{{if 'cudaDeviceGetNvSciSyncAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, int device, int flags):
-    """ Return NvSciSync attributes that this device can support.
-
-    Returns in `nvSciSyncAttrList`, the properties of NvSciSync that this
-    CUDA device, `dev` can support. The returned `nvSciSyncAttrList` can be
-    used to create an NvSciSync that matches this device's capabilities.
-
-    If NvSciSyncAttrKey_RequiredPerm field in `nvSciSyncAttrList` is
-    already set this API will return :py:obj:`~.cudaErrorInvalidValue`.
-
-    The applications should set `nvSciSyncAttrList` to a valid
-    NvSciSyncAttrList failing which this API will return
-    :py:obj:`~.cudaErrorInvalidHandle`.
-
-    The `flags` controls how applications intends to use the NvSciSync
-    created from the `nvSciSyncAttrList`. The valid flags are:
-
-    - :py:obj:`~.cudaNvSciSyncAttrSignal`, specifies that the applications
-      intends to signal an NvSciSync on this CUDA device.
-
-    - :py:obj:`~.cudaNvSciSyncAttrWait`, specifies that the applications
-      intends to wait on an NvSciSync on this CUDA device.
-
-    At least one of these flags must be set, failing which the API returns
-    :py:obj:`~.cudaErrorInvalidValue`. Both the flags are orthogonal to one
-    another: a developer may set both these flags that allows to set both
-    wait and signal specific attributes in the same `nvSciSyncAttrList`.
-
-    Note that this API updates the input `nvSciSyncAttrList` with values
-    equivalent to the following public attribute key-values:
-    NvSciSyncAttrKey_RequiredPerm is set to
-
-    - NvSciSyncAccessPerm_SignalOnly if :py:obj:`~.cudaNvSciSyncAttrSignal`
-      is set in `flags`.
-
-    - NvSciSyncAccessPerm_WaitOnly if :py:obj:`~.cudaNvSciSyncAttrWait` is
-      set in `flags`.
-
-    - NvSciSyncAccessPerm_WaitSignal if both
-      :py:obj:`~.cudaNvSciSyncAttrWait` and
-      :py:obj:`~.cudaNvSciSyncAttrSignal` are set in `flags`.
-      NvSciSyncAttrKey_PrimitiveInfo is set to
-
-    - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid `device`.
-
-    - NvSciSyncAttrValPrimitiveType_Syncpoint if `device` is a Tegra
-      device.
-
-    - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if `device`
-      is GA10X+. NvSciSyncAttrKey_GpuId is set to the same UUID that is
-      returned in `None` from :py:obj:`~.cudaDeviceGetProperties` for this
-      `device`.
-
-    :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorDeviceUninitialized`,
-    :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidHandle`,
-    :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorNotSupported`,
-    :py:obj:`~.cudaErrorMemoryAllocation`
-
-    Parameters
-    ----------
-    nvSciSyncAttrList : Any
-        Return NvSciSync attributes supported.
-    device : int
-        Valid Cuda Device to get NvSciSync attributes for.
-    flags : int
-        flags describing NvSciSync usage.
-
-    Returns
-    -------
-    cudaError_t
-
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cynvSciSyncAttrList = utils.HelperInputVoidPtr(nvSciSyncAttrList)
-    cdef void* cynvSciSyncAttrList_ptr = <void*><void_ptr>cynvSciSyncAttrList.cptr
-    err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, device, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetP2PAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetP2PAttribute(attr not None : cudaDeviceP2PAttr, int srcDevice, int dstDevice):
-    """ Queries attributes of the link between two devices.
-
-    Returns in `*value` the value of the requested attribute `attrib` of
-    the link between `srcDevice` and `dstDevice`. The supported attributes
-    are:
-
-    - :py:obj:`~.cudaDevP2PAttrPerformanceRank`: A relative value
-      indicating the performance of the link between two devices. Lower
-      value means better performance (0 being the value used for most
-      performant link).
-
-    - :py:obj:`~.cudaDevP2PAttrAccessSupported`: 1 if peer access is
-      enabled.
-
-    - :py:obj:`~.cudaDevP2PAttrNativeAtomicSupported`: 1 if native atomic
-      operations over the link are supported.
-
-    - :py:obj:`~.cudaDevP2PAttrCudaArrayAccessSupported`: 1 if accessing
-      CUDA arrays over the link is supported.
-
-    Returns :py:obj:`~.cudaErrorInvalidDevice` if `srcDevice` or
-    `dstDevice` are not valid or if they represent the same device.
-
-    Returns :py:obj:`~.cudaErrorInvalidValue` if `attrib` is not valid or
-    if `value` is a null pointer.
-
-    Parameters
-    ----------
-    attrib : :py:obj:`~.cudaDeviceP2PAttr`
-        The requested attribute of the link between `srcDevice` and
-        `dstDevice`.
-    srcDevice : int
-        The source device of the target link.
-    dstDevice : int
-        The destination device of the target link.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
-    value : int
-        Returned value of the requested attribute
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuDeviceGetP2PAttribute`
-    """
-    cdef int value = 0
-    cdef cyruntime.cudaDeviceP2PAttr cyattr = attr.value
-    err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice)
-    return (cudaError_t(err), value)
-{{endif}}
-
-{{if 'cudaChooseDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaChooseDevice(prop : Optional[cudaDeviceProp]):
-    """ Select compute-device which best matches criteria.
-
-    Returns in `*device` the device which has properties that best match
-    `*prop`.
-
-    Parameters
-    ----------
-    prop : :py:obj:`~.cudaDeviceProp`
-        Desired device properties
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    device : int
-        Device with best match
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`
-    """
-    cdef int device = 0
-    cdef cyruntime.cudaDeviceProp* cyprop_ptr = prop._ptr if prop != None else NULL
-    err = cyruntime.cudaChooseDevice(&device, cyprop_ptr)
-    return (cudaError_t(err), device)
-{{endif}}
-
-{{if 'cudaInitDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags):
-    """ Initialize device to be used for GPU executions.
-
-    This function will initialize the CUDA Runtime structures and primary
-    context on `device` when called, but the context will not be made
-    current to `device`.
-
-    When :py:obj:`~.cudaInitDeviceFlagsAreValid` is set in `flags`,
-    deviceFlags are applied to the requested device. The values of
-    deviceFlags match those of the flags parameters in
-    :py:obj:`~.cudaSetDeviceFlags`. The effect may be verified by
-    :py:obj:`~.cudaGetDeviceFlags`.
-
-    This function will return an error if the device is in
-    :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another
-    process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.
-
-    Parameters
-    ----------
-    device : int
-        Device on which the runtime will initialize itself.
-    deviceFlags : unsigned int
-        Parameters for device operation.
-    flags : unsigned int
-        Flags for controlling the device initialization.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaSetDevice` :py:obj:`~.cuCtxSetCurrent`
-    """
-    err = cyruntime.cudaInitDevice(device, deviceFlags, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaSetDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaSetDevice(int device):
-    """ Set device to be used for GPU executions.
-
-    Sets `device` as the current device for the calling host thread. Valid
-    device id's are 0 to (:py:obj:`~.cudaGetDeviceCount()` - 1).
-
-    Any device memory subsequently allocated from this host thread using
-    :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()` or
-    :py:obj:`~.cudaMallocArray()` will be physically resident on `device`.
-    Any host memory allocated from this host thread using
-    :py:obj:`~.cudaMallocHost()` or :py:obj:`~.cudaHostAlloc()` or
-    :py:obj:`~.cudaHostRegister()` will have its lifetime associated with
-    `device`. Any streams or events created from this host thread will be
-    associated with `device`. Any kernels launched from this host thread
-    using the <<<>>> operator or :py:obj:`~.cudaLaunchKernel()` will be
-    executed on `device`.
-
-    This call may be made from any host thread, to any device, and at any
-    time. This function will do no synchronization with the previous or new
-    device, and should only take significant time when it initializes the
-    runtime's context state. This call will bind the primary context of the
-    specified device to the calling thread and all the subsequent memory
-    allocations, stream and event creations, and kernel launches will be
-    associated with the primary context. This function will also
-    immediately initialize the runtime state on the primary context, and
-    the context will be current on `device` immediately. This function will
-    return an error if the device is in
-    :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another
-    process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.
-
-    It is not required to call :py:obj:`~.cudaInitDevice` before using this
-    function.
-
-    Parameters
-    ----------
-    device : int
-        Device on which the active host thread should execute the device
-        code.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorDeviceUnavailable`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxSetCurrent`
-    """
-    err = cyruntime.cudaSetDevice(device)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetDevice' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDevice():
-    """ Returns which device is currently being used.
-
-    Returns in `*device` the current device for the calling host thread.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUnavailable`,
-    device : int
-        Returns the device on which the active host thread executes the
-        device code.
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuCtxGetCurrent`
-    """
-    cdef int device = 0
-    err = cyruntime.cudaGetDevice(&device)
-    return (cudaError_t(err), device)
-{{endif}}
-
-{{if 'cudaSetDeviceFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaSetDeviceFlags(unsigned int flags):
-    """ Sets flags to be used for device executions.
-
-    Records `flags` as the flags for the current device. If the current
-    device has been set and that device has already been initialized, the
-    previous flags are overwritten. If the current device has not been
-    initialized, it is initialized with the provided flags. If no device
-    has been made current to the calling thread, a default device is
-    selected and initialized with the provided flags.
-
-    The three LSBs of the `flags` parameter can be used to control how the
-    CPU thread interacts with the OS scheduler when waiting for results
-    from the device.
-
-    - :py:obj:`~.cudaDeviceScheduleAuto`: The default value if the `flags`
-      parameter is zero, uses a heuristic based on the number of active
-      CUDA contexts in the process `C` and the number of logical processors
-      in the system `P`. If `C` > `P`, then CUDA will yield to other OS
-      threads when waiting for the device, otherwise CUDA will not yield
-      while waiting for results and actively spin on the processor.
-      Additionally, on Tegra devices, :py:obj:`~.cudaDeviceScheduleAuto`
-      uses a heuristic based on the power profile of the platform and may
-      choose :py:obj:`~.cudaDeviceScheduleBlockingSync` for low-powered
-      devices.
-
-    - :py:obj:`~.cudaDeviceScheduleSpin`: Instruct CUDA to actively spin
-      when waiting for results from the device. This can decrease latency
-      when waiting for the device, but may lower the performance of CPU
-      threads if they are performing work in parallel with the CUDA thread.
-
-    - :py:obj:`~.cudaDeviceScheduleYield`: Instruct CUDA to yield its
-      thread when waiting for results from the device. This can increase
-      latency when waiting for the device, but can increase the performance
-      of CPU threads performing work in parallel with the device.
-
-    - :py:obj:`~.cudaDeviceScheduleBlockingSync`: Instruct CUDA to block
-      the CPU thread on a synchronization primitive when waiting for the
-      device to finish work.
-
-    - :py:obj:`~.cudaDeviceBlockingSync`: Instruct CUDA to block the CPU
-      thread on a synchronization primitive when waiting for the device to
-      finish work.   :py:obj:`~.Deprecated:` This flag was deprecated as of
-      CUDA 4.0 and replaced with
-      :py:obj:`~.cudaDeviceScheduleBlockingSync`.
-
-    - :py:obj:`~.cudaDeviceMapHost`: This flag enables allocating pinned
-      host memory that is accessible to the device. It is implicit for the
-      runtime but may be absent if a context is created using the driver
-      API. If this flag is not set, :py:obj:`~.cudaHostGetDevicePointer()`
-      will always return a failure code.
-
-    - :py:obj:`~.cudaDeviceLmemResizeToMax`: Instruct CUDA to not reduce
-      local memory after resizing local memory for a kernel. This can
-      prevent thrashing by local memory allocations when launching many
-      kernels with high local memory usage at the cost of potentially
-      increased memory usage.   :py:obj:`~.Deprecated:` This flag is
-      deprecated and the behavior enabled by this flag is now the default
-      and cannot be disabled.
-
-    - :py:obj:`~.cudaDeviceSyncMemops`: Ensures that synchronous memory
-      operations initiated on this context will always synchronize. See
-      further documentation in the section titled "API Synchronization
-      behavior" to learn more about cases when synchronous memory
-      operations can exhibit asynchronous behavior.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Parameters for device operation
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetValidDevices`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`
-    """
-    err = cyruntime.cudaSetDeviceFlags(flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetDeviceFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDeviceFlags():
-    """ Gets the flags for the current device.
-
-    Returns in `flags` the flags for the current device. If there is a
-    current device for the calling thread, the flags for the device are
-    returned. If there is no current device, the flags for the first device
-    are returned, which may be the default flags. Compare to the behavior
-    of :py:obj:`~.cudaSetDeviceFlags`.
-
-    Typically, the flags returned should match the behavior that will be
-    seen if the calling thread uses a device after this call, without any
-    change to the flags or current device inbetween by this or another
-    thread. Note that if the device is not initialized, it is possible for
-    another thread to change the flags for the current device before it is
-    initialized. Additionally, when using exclusive mode, if this thread
-    has not requested a specific device, it may use a device other than the
-    first device, contrary to the assumption made by this function.
-
-    If a context has been created via the driver API and is current to the
-    calling thread, the flags for that context are always returned.
-
-    Flags returned by this function may specifically include
-    :py:obj:`~.cudaDeviceMapHost` even though it is not accepted by
-    :py:obj:`~.cudaSetDeviceFlags` because it is implicit in runtime API
-    flags. The reason for this is that the current context may have been
-    created via the driver API in which case the flag is not implicit and
-    may be unset.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
-    flags : unsigned int
-        Pointer to store the device flags
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuDevicePrimaryCtxGetState`
-    """
-    cdef unsigned int flags = 0
-    err = cyruntime.cudaGetDeviceFlags(&flags)
-    return (cudaError_t(err), flags)
-{{endif}}
-
-{{if 'cudaStreamCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamCreate():
-    """ Create an asynchronous stream.
-
-    Creates a new asynchronous stream on the context that is current to the
-    calling host thread. If no context is current to the calling host
-    thread, then the primary context for a device is selected, made current
-    to the calling thread, and initialized before creating a stream on it.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pStream : :py:obj:`~.cudaStream_t`
-        Pointer to new stream identifier
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`
-    """
-    cdef cudaStream_t pStream = cudaStream_t()
-    err = cyruntime.cudaStreamCreate(<cyruntime.cudaStream_t*>pStream._ptr)
-    return (cudaError_t(err), pStream)
-{{endif}}
-
-{{if 'cudaStreamCreateWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamCreateWithFlags(unsigned int flags):
-    """ Create an asynchronous stream.
-
-    Creates a new asynchronous stream on the context that is current to the
-    calling host thread. If no context is current to the calling host
-    thread, then the primary context for a device is selected, made current
-    to the calling thread, and initialized before creating a stream on it.
-    The `flags` argument determines the behaviors of the stream. Valid
-    values for `flags` are
-
-    - :py:obj:`~.cudaStreamDefault`: Default stream creation flag.
-
-    - :py:obj:`~.cudaStreamNonBlocking`: Specifies that work running in the
-      created stream may run concurrently with work in stream 0 (the NULL
-      stream), and that the created stream should perform no implicit
-      synchronization with stream 0.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Parameters for stream creation
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pStream : :py:obj:`~.cudaStream_t`
-        Pointer to new stream identifier
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`
-    """
-    cdef cudaStream_t pStream = cudaStream_t()
-    err = cyruntime.cudaStreamCreateWithFlags(<cyruntime.cudaStream_t*>pStream._ptr, flags)
-    return (cudaError_t(err), pStream)
-{{endif}}
-
-{{if 'cudaStreamCreateWithPriority' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamCreateWithPriority(unsigned int flags, int priority):
-    """ Create an asynchronous stream with the specified priority.
-
-    Creates a stream with the specified priority and returns a handle in
-    `pStream`. The stream is created on the context that is current to the
-    calling host thread. If no context is current to the calling host
-    thread, then the primary context for a device is selected, made current
-    to the calling thread, and initialized before creating a stream on it.
-    This affects the scheduling priority of work in the stream. Priorities
-    provide a hint to preferentially run work with higher priority when
-    possible, but do not preempt already-running work or provide any other
-    functional guarantee on execution order.
-
-    `priority` follows a convention where lower numbers represent higher
-    priorities. '0' represents default priority. The range of meaningful
-    numerical priorities can be queried using
-    :py:obj:`~.cudaDeviceGetStreamPriorityRange`. If the specified priority
-    is outside the numerical range returned by
-    :py:obj:`~.cudaDeviceGetStreamPriorityRange`, it will automatically be
-    clamped to the lowest or the highest number in the range.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Flags for stream creation. See
-        :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags
-        that can be passed
-    priority : int
-        Priority of the stream. Lower numbers represent higher priorities.
-        See :py:obj:`~.cudaDeviceGetStreamPriorityRange` for more
-        information about the meaningful stream priorities that can be
-        passed.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pStream : :py:obj:`~.cudaStream_t`
-        Pointer to new stream identifier
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`
-
-    Notes
-    -----
-    Stream priorities are supported only on GPUs with compute capability 3.5 or higher.
-
-    In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.
-    """
-    cdef cudaStream_t pStream = cudaStream_t()
-    err = cyruntime.cudaStreamCreateWithPriority(<cyruntime.cudaStream_t*>pStream._ptr, flags, priority)
-    return (cudaError_t(err), pStream)
-{{endif}}
-
-{{if 'cudaStreamGetPriority' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetPriority(hStream):
-    """ Query the priority of a stream.
-
-    Query the priority of a stream. The priority is returned in in
-    `priority`. Note that if the stream was created with a priority outside
-    the meaningful numerical range returned by
-    :py:obj:`~.cudaDeviceGetStreamPriorityRange`, this function returns the
-    clamped priority. See :py:obj:`~.cudaStreamCreateWithPriority` for
-    details about priority clamping.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    priority : int
-        Pointer to a signed integer in which the stream's priority is
-        returned
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetPriority`
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef int priority = 0
-    err = cyruntime.cudaStreamGetPriority(cyhStream, &priority)
-    return (cudaError_t(err), priority)
-{{endif}}
-
-{{if 'cudaStreamGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetFlags(hStream):
-    """ Query the flags of a stream.
-
-    Query the flags of a stream. The flags are returned in `flags`. See
-    :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    flags : unsigned int
-        Pointer to an unsigned integer in which the stream's flags are
-        returned
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef unsigned int flags = 0
-    err = cyruntime.cudaStreamGetFlags(cyhStream, &flags)
-    return (cudaError_t(err), flags)
-{{endif}}
-
-{{if 'cudaStreamGetId' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetId(hStream):
-    """ Query the Id of a stream.
-
-    Query the Id of a stream. The Id is returned in `streamId`. The Id is
-    unique for the life of the program.
-
-    The stream handle `hStream` can refer to any of the following:
-
-    - a stream created via any of the CUDA runtime APIs such as
-      :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`
-      and :py:obj:`~.cudaStreamCreateWithPriority`, or their driver API
-      equivalents such as :py:obj:`~.cuStreamCreate` or
-      :py:obj:`~.cuStreamCreateWithPriority`. Passing an invalid handle
-      will result in undefined behavior.
-
-    - any of the special streams such as the NULL stream,
-      :py:obj:`~.cudaStreamLegacy` and :py:obj:`~.cudaStreamPerThread`
-      respectively. The driver API equivalents of these are also accepted
-      which are NULL, :py:obj:`~.CU_STREAM_LEGACY` and
-      :py:obj:`~.CU_STREAM_PER_THREAD`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Handle to the stream to be queried
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    streamId : unsigned long long
-        Pointer to an unsigned long long in which the stream Id is returned
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId`
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef unsigned long long streamId = 0
-    err = cyruntime.cudaStreamGetId(cyhStream, &streamId)
-    return (cudaError_t(err), streamId)
-{{endif}}
-
-{{if 'cudaCtxResetPersistingL2Cache' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaCtxResetPersistingL2Cache():
-    """ Resets all persisting lines in cache to normal status.
-
-    Resets all persisting lines in cache to normal status. Takes effect on
-    function return.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    err = cyruntime.cudaCtxResetPersistingL2Cache()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamCopyAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamCopyAttributes(dst, src):
-    """ Copies attributes from source stream to destination stream.
-
-    Copies attributes from source stream `src` to destination stream `dst`.
-    Both streams must have the same context.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Destination stream
-    src : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Source stream For attributes see :py:obj:`~.cudaStreamAttrID`
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaStream_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(src, (cudaStream_t,driver.CUstream)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaStream_t><void_ptr>psrc
-    else:
-        psrc = int(cudaStream_t(src))
-        cysrc = <cyruntime.cudaStream_t><void_ptr>psrc
-    cdef cyruntime.cudaStream_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(dst, (cudaStream_t,driver.CUstream)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaStream_t><void_ptr>pdst
-    else:
-        pdst = int(cudaStream_t(dst))
-        cydst = <cyruntime.cudaStream_t><void_ptr>pdst
-    err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID):
-    """ Queries stream attribute.
-
-    Queries attribute `attr` from `hStream` and stores it in corresponding
-    member of `value_out`.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-
-    attr : :py:obj:`~.cudaStreamAttrID`
-
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    value_out : :py:obj:`~.cudaStreamAttrValue`
-
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef cyruntime.cudaStreamAttrID cyattr = attr.value
-    cdef cudaStreamAttrValue value_out = cudaStreamAttrValue()
-    err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, <cyruntime.cudaStreamAttrValue*>value_out._ptr)
-    return (cudaError_t(err), value_out)
-{{endif}}
-
-{{if 'cudaStreamSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Optional[cudaStreamAttrValue]):
-    """ Sets stream attribute.
-
-    Sets attribute `attr` on `hStream` from corresponding attribute of
-    `value`. The updated attribute will be applied to subsequent work
-    submitted to the stream. It will not affect previously submitted work.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-
-    attr : :py:obj:`~.cudaStreamAttrID`
-
-    value : :py:obj:`~.cudaStreamAttrValue`
-
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef cyruntime.cudaStreamAttrID cyattr = attr.value
-    cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr = value._ptr if value != None else NULL
-    err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamDestroy(stream):
-    """ Destroys and cleans up an asynchronous stream.
-
-    Destroys and cleans up the asynchronous stream specified by `stream`.
-
-    In case the device is still doing work in the stream `stream` when
-    :py:obj:`~.cudaStreamDestroy()` is called, the function will return
-    immediately and the resources associated with `stream` will be released
-    automatically once the device has completed all work in `stream`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuStreamDestroy`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    err = cyruntime.cudaStreamDestroy(cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamWaitEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamWaitEvent(stream, event, unsigned int flags):
-    """ Make a compute stream wait on an event.
-
-    Makes all future work submitted to `stream` wait for all work captured
-    in `event`. See :py:obj:`~.cudaEventRecord()` for details on what is
-    captured by an event. The synchronization will be performed efficiently
-    on the device when applicable. `event` may be from a different device
-    than `stream`.
-
-    flags include:
-
-    - :py:obj:`~.cudaEventWaitDefault`: Default event creation flag.
-
-    - :py:obj:`~.cudaEventWaitExternal`: Event is captured in the graph as
-      an external event node when performing stream capture.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to wait
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait on
-    flags : unsigned int
-        Parameters for the operation(See above)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamWaitEvent`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    with nogil:
-        err = cyruntime.cudaStreamWaitEvent(cystream, cyevent, flags)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamAddCallback' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamAddCallback(stream, callback, userData, unsigned int flags):
-    """ Add a callback to a compute stream.
-
-    Adds a callback to be called on the host after all currently enqueued
-    items in the stream have completed. For each cudaStreamAddCallback
-    call, a callback will be executed exactly once. The callback will block
-    later work in the stream until it is finished.
-
-    The callback may be passed :py:obj:`~.cudaSuccess` or an error code. In
-    the event of a device error, all subsequently executed callbacks will
-    receive an appropriate :py:obj:`~.cudaError_t`.
-
-    Callbacks must not make any CUDA API calls. Attempting to use CUDA APIs
-    may result in :py:obj:`~.cudaErrorNotPermitted`. Callbacks must not
-    perform any synchronization that may depend on outstanding device work
-    or other callbacks that are not mandated to run earlier. Callbacks
-    without a mandated order (in independent streams) execute in undefined
-    order and may be serialized.
-
-    For the purposes of Unified Memory, callback execution makes a number
-    of guarantees:
-
-    - The callback stream is considered idle for the duration of the
-      callback. Thus, for example, a callback may always use memory
-      attached to the callback stream.
-
-    - The start of execution of a callback has the same effect as
-      synchronizing an event recorded in the same stream immediately prior
-      to the callback. It thus synchronizes streams which have been
-      "joined" prior to the callback.
-
-    - Adding device work to any stream does not have the effect of making
-      the stream active until all preceding callbacks have executed. Thus,
-      for example, a callback might use global attached memory even if work
-      has been added to another stream, if it has been properly ordered
-      with an event.
-
-    - Completion of a callback does not cause a stream to become active
-      except as described above. The callback stream will remain idle if no
-      device work follows the callback, and will remain idle across
-      consecutive callbacks without device work in between. Thus, for
-      example, stream synchronization can be done by signaling from a
-      callback at the end of the stream.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to add callback to
-    callback : :py:obj:`~.cudaStreamCallback_t`
-        The function to call once preceding stream operations are complete
-    userData : Any
-        User specified data to be passed to the callback function
-    flags : unsigned int
-        Reserved for future use, must be 0
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cuStreamAddCallback`
-
-    Notes
-    -----
-    This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cudaLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cudaStreamBeginCapture` and :py:obj:`~.cudaStreamEndCapture`, unlike :py:obj:`~.cudaLaunchHostFunc`.
-    """
-    cdef cyruntime.cudaStreamCallback_t cycallback
-    if callback is None:
-        cycallback = <cyruntime.cudaStreamCallback_t><void_ptr>0
-    elif isinstance(callback, (cudaStreamCallback_t,)):
-        pcallback = int(callback)
-        cycallback = <cyruntime.cudaStreamCallback_t><void_ptr>pcallback
-    else:
-        pcallback = int(cudaStreamCallback_t(callback))
-        cycallback = <cyruntime.cudaStreamCallback_t><void_ptr>pcallback
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    with nogil:
-        err = cyruntime.cudaStreamAddCallback(cystream, cycallback, cyuserData_ptr, flags)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamSynchronize(stream):
-    """ Waits for stream tasks to complete.
-
-    Blocks until `stream` has completed all operations. If the
-    :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this
-    device, the host thread will block until the stream is finished with
-    all of its tasks.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamSynchronize`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    with nogil:
-        err = cyruntime.cudaStreamSynchronize(cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamQuery' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamQuery(stream):
-    """ Queries an asynchronous stream for completion status.
-
-    Returns :py:obj:`~.cudaSuccess` if all operations in `stream` have
-    completed, or :py:obj:`~.cudaErrorNotReady` if not.
-
-    For the purposes of Unified Memory, a return value of
-    :py:obj:`~.cudaSuccess` is equivalent to having called
-    :py:obj:`~.cudaStreamSynchronize()`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamQuery`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    err = cyruntime.cudaStreamQuery(cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamAttachMemAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamAttachMemAsync(stream, devPtr, size_t length, unsigned int flags):
-    """ Attach memory to a stream asynchronously.
-
-    Enqueues an operation in `stream` to specify stream association of
-    `length` bytes of memory starting from `devPtr`. This function is a
-    stream-ordered operation, meaning that it is dependent on, and will
-    only take effect when, previous work in stream has completed. Any
-    previous association is automatically replaced.
-
-    `devPtr` must point to an one of the following types of memories:
-
-    - managed memory declared using the managed keyword or allocated with
-      :py:obj:`~.cudaMallocManaged`.
-
-    - a valid host-accessible region of system-allocated pageable memory.
-      This type of memory may only be specified if the device associated
-      with the stream reports a non-zero value for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-
-    For managed allocations, `length` must be either zero or the entire
-    allocation's size. Both indicate that the entire allocation's stream
-    association is being changed. Currently, it is not possible to change
-    stream association for a portion of a managed allocation.
-
-    For pageable allocations, `length` must be non-zero.
-
-    The stream association is specified using `flags` which must be one of
-    :py:obj:`~.cudaMemAttachGlobal`, :py:obj:`~.cudaMemAttachHost` or
-    :py:obj:`~.cudaMemAttachSingle`. The default value for `flags` is
-    :py:obj:`~.cudaMemAttachSingle` If the :py:obj:`~.cudaMemAttachGlobal`
-    flag is specified, the memory can be accessed by any stream on any
-    device. If the :py:obj:`~.cudaMemAttachHost` flag is specified, the
-    program makes a guarantee that it won't access the memory on the device
-    from any stream on a device that has a zero value for the device
-    attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If the
-    :py:obj:`~.cudaMemAttachSingle` flag is specified and `stream` is
-    associated with a device that has a zero value for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, the program makes a
-    guarantee that it will only access the memory on the device from
-    `stream`. It is illegal to attach singly to the NULL stream, because
-    the NULL stream is a virtual global stream and not a specific stream.
-    An error will be returned in this case.
-
-    When memory is associated with a single stream, the Unified Memory
-    system will allow CPU access to this memory region so long as all
-    operations in `stream` have completed, regardless of whether other
-    streams are active. In effect, this constrains exclusive ownership of
-    the managed memory region by an active GPU to per-stream activity
-    instead of whole-GPU activity.
-
-    Accessing memory on the device from streams that are not associated
-    with it will produce undefined results. No error checking is performed
-    by the Unified Memory system to ensure that kernels launched into other
-    streams do not access this region.
-
-    It is a program's responsibility to order calls to
-    :py:obj:`~.cudaStreamAttachMemAsync` via events, synchronization or
-    other means to ensure legal access to memory at all times. Data
-    visibility and coherency will be changed appropriately for all kernels
-    which follow a stream-association change.
-
-    If `stream` is destroyed while data is associated with it, the
-    association is removed and the association reverts to the default
-    visibility of the allocation as specified at
-    :py:obj:`~.cudaMallocManaged`. For managed variables, the default
-    association is always :py:obj:`~.cudaMemAttachGlobal`. Note that
-    destroying a stream is an asynchronous operation, and as a result, the
-    change to default association won't happen until all work in the stream
-    has completed.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to enqueue the attach operation
-    devPtr : Any
-        Pointer to memory (must be a pointer to managed memory or to a
-        valid host-accessible region of system-allocated memory)
-    length : size_t
-        Length of memory (defaults to zero)
-    flags : unsigned int
-        Must be one of :py:obj:`~.cudaMemAttachGlobal`,
-        :py:obj:`~.cudaMemAttachHost` or :py:obj:`~.cudaMemAttachSingle`
-        (defaults to :py:obj:`~.cudaMemAttachSingle`)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cuStreamAttachMemAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr_ptr, length, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamBeginCapture' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode):
-    """ Begins graph capture on a stream.
-
-    Begin graph capture on `stream`. When a stream is in capture mode, all
-    operations pushed into the stream will not be executed, but will
-    instead be captured into a graph, which will be returned via
-    :py:obj:`~.cudaStreamEndCapture`. Capture may not be initiated if
-    `stream` is :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the
-    same stream in which it was initiated, and it may only be initiated if
-    the stream is not already in capture mode. The capture mode may be
-    queried via :py:obj:`~.cudaStreamIsCapturing`. A unique id representing
-    the capture sequence may be queried via
-    :py:obj:`~.cudaStreamGetCaptureInfo`.
-
-    If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,
-    :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the
-    same thread.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to initiate capture
-    mode : :py:obj:`~.cudaStreamCaptureMode`
-        Controls the interaction of this capture sequence with other API
-        calls that are potentially unsafe. For more details see
-        :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
-
-    Notes
-    -----
-    Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
-    err = cyruntime.cudaStreamBeginCapture(cystream, cymode)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamBeginCaptureToGraph' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], dependencyData : Optional[Tuple[cudaGraphEdgeData] | List[cudaGraphEdgeData]], size_t numDependencies, mode not None : cudaStreamCaptureMode):
-    """ Begins graph capture on a stream to an existing graph.
-
-    Begin graph capture on `stream`. When a stream is in capture mode, all
-    operations pushed into the stream will not be executed, but will
-    instead be captured into `graph`, which will be returned via
-    :py:obj:`~.cudaStreamEndCapture`.
-
-    Capture may not be initiated if `stream` is
-    :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the same stream
-    in which it was initiated, and it may only be initiated if the stream
-    is not already in capture mode. The capture mode may be queried via
-    :py:obj:`~.cudaStreamIsCapturing`. A unique id representing the capture
-    sequence may be queried via :py:obj:`~.cudaStreamGetCaptureInfo`.
-
-    If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,
-    :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the
-    same thread.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to initiate capture.
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to capture into.
-    dependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the first node captured in the stream. Can be NULL
-        if numDependencies is 0.
-    dependencyData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional array of data associated with each dependency.
-    numDependencies : size_t
-        Number of dependencies.
-    mode : :py:obj:`~.cudaStreamCaptureMode`
-        Controls the interaction of this capture sequence with other API
-        calls that are potentially unsafe. For more details see
-        :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
-
-    Notes
-    -----
-    Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cyruntime.cudaGraphEdgeData,] or List[cyruntime.cudaGraphEdgeData,]")
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphNode_t* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._ptr[0]
-    cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._ptr, sizeof(cyruntime.cudaGraphEdgeData))
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
-    err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, (<cudaGraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cymode)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaThreadExchangeStreamCaptureMode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaThreadExchangeStreamCaptureMode(mode not None : cudaStreamCaptureMode):
-    """ Swaps the stream capture interaction mode for a thread.
-
-    Sets the calling thread's stream capture interaction mode to the value
-    contained in `*mode`, and overwrites `*mode` with the previous mode for
-    the thread. To facilitate deterministic behavior across function or
-    module boundaries, callers are encouraged to use this API in a push-pop
-    fashion:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    During stream capture (see :py:obj:`~.cudaStreamBeginCapture`), some
-    actions, such as a call to :py:obj:`~.cudaMalloc`, may be unsafe. In
-    the case of :py:obj:`~.cudaMalloc`, the operation is not enqueued
-    asynchronously to a stream, and is not observed by stream capture.
-    Therefore, if the sequence of operations captured via
-    :py:obj:`~.cudaStreamBeginCapture` depended on the allocation being
-    replayed whenever the graph is launched, the captured graph would be
-    invalid.
-
-    Therefore, stream capture places restrictions on API calls that can be
-    made within or concurrently to a
-    :py:obj:`~.cudaStreamBeginCapture`-:py:obj:`~.cudaStreamEndCapture`
-    sequence. This behavior can be controlled via this API and flags to
-    :py:obj:`~.cudaStreamBeginCapture`.
-
-    A thread's mode is one of the following:
-
-    - `cudaStreamCaptureModeGlobal:` This is the default mode. If the local
-      thread has an ongoing capture sequence that was not initiated with
-      `cudaStreamCaptureModeRelaxed` at `cuStreamBeginCapture`, or if any
-      other thread has a concurrent capture sequence initiated with
-      `cudaStreamCaptureModeGlobal`, this thread is prohibited from
-      potentially unsafe API calls.
-
-    - `cudaStreamCaptureModeThreadLocal:` If the local thread has an
-      ongoing capture sequence not initiated with
-      `cudaStreamCaptureModeRelaxed`, it is prohibited from potentially
-      unsafe API calls. Concurrent capture sequences in other threads are
-      ignored.
-
-    - `cudaStreamCaptureModeRelaxed:` The local thread is not prohibited
-      from potentially unsafe API calls. Note that the thread is still
-      prohibited from API calls which necessarily conflict with stream
-      capture, for example, attempting :py:obj:`~.cudaEventQuery` on an
-      event that was last recorded inside a capture sequence.
-
-    Parameters
-    ----------
-    mode : :py:obj:`~.cudaStreamCaptureMode`
-        Pointer to mode value to swap with the current mode
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    mode : :py:obj:`~.cudaStreamCaptureMode`
-        Pointer to mode value to swap with the current mode
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamBeginCapture`
-    """
-    cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
-    err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode)
-    return (cudaError_t(err), cudaStreamCaptureMode(cymode))
-{{endif}}
-
-{{if 'cudaStreamEndCapture' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamEndCapture(stream):
-    """ Ends capture on a stream, returning the captured graph.
-
-    End capture on `stream`, returning the captured graph via `pGraph`.
-    Capture must have been initiated on `stream` via a call to
-    :py:obj:`~.cudaStreamBeginCapture`. If capture was invalidated, due to
-    a violation of the rules of stream capture, then a NULL graph will be
-    returned.
-
-    If the `mode` argument to :py:obj:`~.cudaStreamBeginCapture` was not
-    :py:obj:`~.cudaStreamCaptureModeRelaxed`, this call must be from the
-    same thread as :py:obj:`~.cudaStreamBeginCapture`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureWrongThread`
-    pGraph : :py:obj:`~.cudaGraph_t`
-        The captured graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaGraphDestroy`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cudaGraph_t pGraph = cudaGraph_t()
-    err = cyruntime.cudaStreamEndCapture(cystream, <cyruntime.cudaGraph_t*>pGraph._ptr)
-    return (cudaError_t(err), pGraph)
-{{endif}}
-
-{{if 'cudaStreamIsCapturing' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamIsCapturing(stream):
-    """ Returns a stream's capture status.
-
-    Return the capture status of `stream` via `pCaptureStatus`. After a
-    successful call, `*pCaptureStatus` will contain one of the following:
-
-    - :py:obj:`~.cudaStreamCaptureStatusNone`: The stream is not capturing.
-
-    - :py:obj:`~.cudaStreamCaptureStatusActive`: The stream is capturing.
-
-    - :py:obj:`~.cudaStreamCaptureStatusInvalidated`: The stream was
-      capturing but an error has invalidated the capture sequence. The
-      capture sequence must be terminated with
-      :py:obj:`~.cudaStreamEndCapture` on the stream where it was initiated
-      in order to continue using `stream`.
-
-    Note that, if this is called on :py:obj:`~.cudaStreamLegacy` (the "null
-    stream") while a blocking stream on the same device is capturing, it
-    will return :py:obj:`~.cudaErrorStreamCaptureImplicit` and
-    `*pCaptureStatus` is unspecified after the call. The blocking stream
-    capture is not invalidated.
-
-    When a blocking stream is capturing, the legacy stream is in an
-    unusable state until the blocking stream capture is terminated. The
-    legacy stream is not supported for stream capture, but attempted use
-    would have an implicit dependency on the capturing stream(s).
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`
-    pCaptureStatus : :py:obj:`~.cudaStreamCaptureStatus`
-        Returns the stream's capture status
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamEndCapture`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus
-    err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus)
-    return (cudaError_t(err), cudaStreamCaptureStatus(pCaptureStatus))
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetCaptureInfo(stream):
-    """ Query a stream's capture state.
-
-    Query stream state related to stream capture.
-
-    If called on :py:obj:`~.cudaStreamLegacy` (the "null stream") while a
-    stream not created with :py:obj:`~.cudaStreamNonBlocking` is capturing,
-    returns :py:obj:`~.cudaErrorStreamCaptureImplicit`.
-
-    Valid data (other than capture status) is returned only if both of the
-    following are true:
-
-    - the call returns cudaSuccess
-
-    - the returned capture status is
-      :py:obj:`~.cudaStreamCaptureStatusActive`
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`
-    captureStatus_out : :py:obj:`~.cudaStreamCaptureStatus`
-        Location to return the capture status of the stream; required
-    id_out : unsigned long long
-        Optional location to return an id for the capture sequence, which
-        is unique over the lifetime of the process
-    graph_out : :py:obj:`~.cudaGraph_t`
-        Optional location to return the graph being captured into. All
-        operations other than destroy and node removal are permitted on the
-        graph while the capture sequence is in progress. This API does not
-        transfer ownership of the graph, which is transferred or destroyed
-        at :py:obj:`~.cudaStreamEndCapture`. Note that the graph handle may
-        be invalidated before end of capture for certain errors. Nodes that
-        are or become unreachable from the original stream at
-        :py:obj:`~.cudaStreamEndCapture` due to direct actions on the graph
-        do not trigger :py:obj:`~.cudaErrorStreamCaptureUnjoined`.
-    dependencies_out : List[:py:obj:`~.cudaGraphNode_t`]
-        Optional location to store a pointer to an array of nodes. The next
-        node to be captured in the stream will depend on this set of nodes,
-        absent operations such as event wait which modify this set. The
-        array pointer is valid until the next API call which operates on
-        the stream or until the capture is terminated. The node handles may
-        be copied out and are valid until they or the graph is destroyed.
-        The driver-owned array may also be passed directly to APIs that
-        operate on the graph (not the stream) without copying.
-    numDependencies_out : int
-        Optional location to store the size of the array returned in
-        dependencies_out.
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamGetCaptureInfo_v3`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamUpdateCaptureDependencies`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaStreamCaptureStatus captureStatus_out
-    cdef unsigned long long id_out = 0
-    cdef cudaGraph_t graph_out = cudaGraph_t()
-    cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL
-    pydependencies_out = []
-    cdef size_t numDependencies_out = 0
-    err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, <cyruntime.cudaGraph_t*>graph_out._ptr, &cydependencies_out, &numDependencies_out)
-    if cudaError_t(err) == cudaError_t(0):
-        pydependencies_out = [cudaGraphNode_t(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
-    return (cudaError_t(err), cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, numDependencies_out)
-{{endif}}
-
-{{if 'cudaStreamGetCaptureInfo_v3' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamGetCaptureInfo_v3(stream):
-    """ Query a stream's capture state (12.3+)
-
-    Query stream state related to stream capture.
-
-    If called on :py:obj:`~.cudaStreamLegacy` (the "null stream") while a
-    stream not created with :py:obj:`~.cudaStreamNonBlocking` is capturing,
-    returns :py:obj:`~.cudaErrorStreamCaptureImplicit`.
-
-    Valid data (other than capture status) is returned only if both of the
-    following are true:
-
-    - the call returns cudaSuccess
-
-    - the returned capture status is
-      :py:obj:`~.cudaStreamCaptureStatusActive`
-
-    If `edgeData_out` is non-NULL then `dependencies_out` must be as well.
-    If `dependencies_out` is non-NULL and `edgeData_out` is NULL, but there
-    is non-zero edge data for one or more of the current stream
-    dependencies, the call will return :py:obj:`~.cudaErrorLossyQuery`.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`, :py:obj:`~.cudaErrorLossyQuery`
-    captureStatus_out : :py:obj:`~.cudaStreamCaptureStatus`
-        Location to return the capture status of the stream; required
-    id_out : unsigned long long
-        Optional location to return an id for the capture sequence, which
-        is unique over the lifetime of the process
-    graph_out : :py:obj:`~.cudaGraph_t`
-        Optional location to return the graph being captured into. All
-        operations other than destroy and node removal are permitted on the
-        graph while the capture sequence is in progress. This API does not
-        transfer ownership of the graph, which is transferred or destroyed
-        at :py:obj:`~.cudaStreamEndCapture`. Note that the graph handle may
-        be invalidated before end of capture for certain errors. Nodes that
-        are or become unreachable from the original stream at
-        :py:obj:`~.cudaStreamEndCapture` due to direct actions on the graph
-        do not trigger :py:obj:`~.cudaErrorStreamCaptureUnjoined`.
-    dependencies_out : List[:py:obj:`~.cudaGraphNode_t`]
-        Optional location to store a pointer to an array of nodes. The next
-        node to be captured in the stream will depend on this set of nodes,
-        absent operations such as event wait which modify this set. The
-        array pointer is valid until the next API call which operates on
-        the stream or until the capture is terminated. The node handles may
-        be copied out and are valid until they or the graph is destroyed.
-        The driver-owned array may also be passed directly to APIs that
-        operate on the graph (not the stream) without copying.
-    edgeData_out : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional location to store a pointer to an array of graph edge
-        data. This array parallels `dependencies_out`; the next node to be
-        added has an edge to `dependencies_out`[i] with annotation
-        `edgeData_out`[i] for each `i`. The array pointer is valid until
-        the next API call which operates on the stream or until the capture
-        is terminated.
-    numDependencies_out : int
-        Optional location to store the size of the array returned in
-        dependencies_out.
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamUpdateCaptureDependencies`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaStreamCaptureStatus captureStatus_out
-    cdef unsigned long long id_out = 0
-    cdef cudaGraph_t graph_out = cudaGraph_t()
-    cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL
-    pydependencies_out = []
-    cdef const cyruntime.cudaGraphEdgeData* cyedgeData_out = NULL
-    pyedgeData_out = []
-    cdef size_t numDependencies_out = 0
-    err = cyruntime.cudaStreamGetCaptureInfo_v3(cystream, &captureStatus_out, &id_out, <cyruntime.cudaGraph_t*>graph_out._ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)
-    if cudaError_t(err) == cudaError_t(0):
-        pydependencies_out = [cudaGraphNode_t(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
-    if cudaError_t(err) == cudaError_t(0):
-        pyedgeData_out = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]
-    return (cudaError_t(err), cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, unsigned int flags):
-    """ Update the set of dependencies in a capturing stream (11.3+)
-
-    Modifies the dependency set of a capturing stream. The dependency set
-    is the set of nodes that the next captured node in the stream will
-    depend on.
-
-    Valid flags are :py:obj:`~.cudaStreamAddCaptureDependencies` and
-    :py:obj:`~.cudaStreamSetCaptureDependencies`. These control whether the
-    set passed to the API is added to the existing set or replaces it. A
-    flags value of 0 defaults to
-    :py:obj:`~.cudaStreamAddCaptureDependencies`.
-
-    Nodes that are removed from the dependency set via this API do not
-    result in :py:obj:`~.cudaErrorStreamCaptureUnjoined` if they are
-    unreachable from the stream at :py:obj:`~.cudaStreamEndCapture`.
-
-    Returns :py:obj:`~.cudaErrorIllegalState` if the stream is not
-    capturing.
-
-    This API is new in CUDA 11.3. Developers requiring compatibility across
-    minor versions of the CUDA driver to 11.0 should not use this API or
-    provide a fallback.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to update
-    dependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        The set of dependencies to add
-    numDependencies : size_t
-        The size of the dependencies array
-    flags : unsigned int
-        See above
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorIllegalState`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`,
-    """
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphNode_t* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
-    err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, numDependencies, flags)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaStreamUpdateCaptureDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaStreamUpdateCaptureDependencies_v2(stream, dependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], dependencyData : Optional[Tuple[cudaGraphEdgeData] | List[cudaGraphEdgeData]], size_t numDependencies, unsigned int flags):
-    """ Update the set of dependencies in a capturing stream (12.3+)
-
-    Modifies the dependency set of a capturing stream. The dependency set
-    is the set of nodes that the next captured node in the stream will
-    depend on.
-
-    Valid flags are :py:obj:`~.cudaStreamAddCaptureDependencies` and
-    :py:obj:`~.cudaStreamSetCaptureDependencies`. These control whether the
-    set passed to the API is added to the existing set or replaces it. A
-    flags value of 0 defaults to
-    :py:obj:`~.cudaStreamAddCaptureDependencies`.
-
-    Nodes that are removed from the dependency set via this API do not
-    result in :py:obj:`~.cudaErrorStreamCaptureUnjoined` if they are
-    unreachable from the stream at :py:obj:`~.cudaStreamEndCapture`.
-
-    Returns :py:obj:`~.cudaErrorIllegalState` if the stream is not
-    capturing.
-
-    Parameters
-    ----------
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream to update
-    dependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        The set of dependencies to add
-    dependencyData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional array of data associated with each dependency.
-    numDependencies : size_t
-        The size of the dependencies array
-    flags : unsigned int
-        See above
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorIllegalState`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`,
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cyruntime.cudaGraphEdgeData,] or List[cyruntime.cudaGraphEdgeData,]")
-    dependencies = [] if dependencies is None else dependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):
-        raise TypeError("Argument 'dependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphNode_t* cydependencies = NULL
-    if len(dependencies) > 0:
-        cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cydependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(dependencies)):
-                cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._ptr[0]
-    cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._ptr, sizeof(cyruntime.cudaGraphEdgeData))
-    err = cyruntime.cudaStreamUpdateCaptureDependencies_v2(cystream, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._ptr if len(dependencies) == 1 else cydependencies, (<cudaGraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, flags)
-    if cydependencies is not NULL:
-        free(cydependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventCreate():
-    """ Creates an event object.
-
-    Creates an event object for the current device using
-    :py:obj:`~.cudaEventDefault`.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
-    event : :py:obj:`~.cudaEvent_t`
-        Newly created event
-
-    See Also
-    --------
-    cudaEventCreate (C++ API), :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`
-    """
-    cdef cudaEvent_t event = cudaEvent_t()
-    err = cyruntime.cudaEventCreate(<cyruntime.cudaEvent_t*>event._ptr)
-    return (cudaError_t(err), event)
-{{endif}}
-
-{{if 'cudaEventCreateWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventCreateWithFlags(unsigned int flags):
-    """ Creates an event object with the specified flags.
-
-    Creates an event object for the current device with the specified
-    flags. Valid flags include:
-
-    - :py:obj:`~.cudaEventDefault`: Default event creation flag.
-
-    - :py:obj:`~.cudaEventBlockingSync`: Specifies that event should use
-      blocking synchronization. A host thread that uses
-      :py:obj:`~.cudaEventSynchronize()` to wait on an event created with
-      this flag will block until the event actually completes.
-
-    - :py:obj:`~.cudaEventDisableTiming`: Specifies that the created event
-      does not need to record timing data. Events created with this flag
-      specified and the :py:obj:`~.cudaEventBlockingSync` flag not
-      specified will provide the best performance when used with
-      :py:obj:`~.cudaStreamWaitEvent()` and :py:obj:`~.cudaEventQuery()`.
-
-    - :py:obj:`~.cudaEventInterprocess`: Specifies that the created event
-      may be used as an interprocess event by
-      :py:obj:`~.cudaIpcGetEventHandle()`.
-      :py:obj:`~.cudaEventInterprocess` must be specified along with
-      :py:obj:`~.cudaEventDisableTiming`.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Flags for new event
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
-    event : :py:obj:`~.cudaEvent_t`
-        Newly created event
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`
-    """
-    cdef cudaEvent_t event = cudaEvent_t()
-    err = cyruntime.cudaEventCreateWithFlags(<cyruntime.cudaEvent_t*>event._ptr, flags)
-    return (cudaError_t(err), event)
-{{endif}}
-
-{{if 'cudaEventRecord' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventRecord(event, stream):
-    """ Records an event.
-
-    Captures in `event` the contents of `stream` at the time of this call.
-    `event` and `stream` must be on the same CUDA context. Calls such as
-    :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will
-    then examine or wait for completion of the work that was captured. Uses
-    of `stream` after this call do not modify `event`. See note on default
-    stream behavior for what is captured in the default case.
-
-    :py:obj:`~.cudaEventRecord()` can be called multiple times on the same
-    event and will overwrite the previously captured state. Other APIs such
-    as :py:obj:`~.cudaStreamWaitEvent()` use the most recently captured
-    state at the time of the API call, and are not affected by later calls
-    to :py:obj:`~.cudaEventRecord()`. Before the first call to
-    :py:obj:`~.cudaEventRecord()`, an event represents an empty set of
-    work, so for example :py:obj:`~.cudaEventQuery()` would return
-    :py:obj:`~.cudaSuccess`.
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to record event
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cuEventRecord`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    err = cyruntime.cudaEventRecord(cyevent, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventRecordWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventRecordWithFlags(event, stream, unsigned int flags):
-    """ Records an event.
-
-    Captures in `event` the contents of `stream` at the time of this call.
-    `event` and `stream` must be on the same CUDA context. Calls such as
-    :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will
-    then examine or wait for completion of the work that was captured. Uses
-    of `stream` after this call do not modify `event`. See note on default
-    stream behavior for what is captured in the default case.
-
-    :py:obj:`~.cudaEventRecordWithFlags()` can be called multiple times on
-    the same event and will overwrite the previously captured state. Other
-    APIs such as :py:obj:`~.cudaStreamWaitEvent()` use the most recently
-    captured state at the time of the API call, and are not affected by
-    later calls to :py:obj:`~.cudaEventRecordWithFlags()`. Before the first
-    call to :py:obj:`~.cudaEventRecordWithFlags()`, an event represents an
-    empty set of work, so for example :py:obj:`~.cudaEventQuery()` would
-    return :py:obj:`~.cudaSuccess`.
-
-    flags include:
-
-    - :py:obj:`~.cudaEventRecordDefault`: Default event creation flag.
-
-    - :py:obj:`~.cudaEventRecordExternal`: Event is captured in the graph
-      as an external event node when performing stream capture.
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to record
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to record event
-    flags : unsigned int
-        Parameters for the operation(See above)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecord`,
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventQuery' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventQuery(event):
-    """ Queries an event's status.
-
-    Queries the status of all work currently captured by `event`. See
-    :py:obj:`~.cudaEventRecord()` for details on what is captured by an
-    event.
-
-    Returns :py:obj:`~.cudaSuccess` if all captured work has been
-    completed, or :py:obj:`~.cudaErrorNotReady` if any captured work is
-    incomplete.
-
-    For the purposes of Unified Memory, a return value of
-    :py:obj:`~.cudaSuccess` is equivalent to having called
-    :py:obj:`~.cudaEventSynchronize()`.
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventQuery`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    err = cyruntime.cudaEventQuery(cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventSynchronize' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventSynchronize(event):
-    """ Waits for an event to complete.
-
-    Waits until the completion of all work currently captured in `event`.
-    See :py:obj:`~.cudaEventRecord()` for details on what is captured by an
-    event.
-
-    Waiting for an event that was created with the
-    :py:obj:`~.cudaEventBlockingSync` flag will cause the calling CPU
-    thread to block until the event has been completed by the device. If
-    the :py:obj:`~.cudaEventBlockingSync` flag has not been set, then the
-    CPU thread will busy-wait until the event has been completed by the
-    device.
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to wait for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventSynchronize`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    err = cyruntime.cudaEventSynchronize(cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventDestroy(event):
-    """ Destroys an event object.
-
-    Destroys the event specified by `event`.
-
-    An event may be destroyed before it is complete (i.e., while
-    :py:obj:`~.cudaEventQuery()` would return
-    :py:obj:`~.cudaErrorNotReady`). In this case, the call does not block
-    on completion of the event, and any associated resources will
-    automatically be released asynchronously at completion.
-
-    Parameters
-    ----------
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to destroy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventDestroy`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    err = cyruntime.cudaEventDestroy(cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaEventElapsedTime' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaEventElapsedTime(start, end):
-    """ Computes the elapsed time between events.
-
-    Computes the elapsed time between two events (in milliseconds with a
-    resolution of around 0.5 microseconds).
-
-    If either event was last recorded in a non-NULL stream, the resulting
-    time may be greater than expected (even if both used the same stream
-    handle). This happens because the :py:obj:`~.cudaEventRecord()`
-    operation takes place asynchronously and there is no guarantee that the
-    measured latency is actually just between the two events. Any number of
-    other different stream operations could execute in between the two
-    measured events, thus altering the timing in a significant way.
-
-    If :py:obj:`~.cudaEventRecord()` has not been called on either event,
-    then :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If
-    :py:obj:`~.cudaEventRecord()` has been called on both events but one or
-    both of them has not yet been completed (that is,
-    :py:obj:`~.cudaEventQuery()` would return :py:obj:`~.cudaErrorNotReady`
-    on at least one of the events), :py:obj:`~.cudaErrorNotReady` is
-    returned. If either event was created with the
-    :py:obj:`~.cudaEventDisableTiming` flag, then this function will return
-    :py:obj:`~.cudaErrorInvalidResourceHandle`.
-
-    Parameters
-    ----------
-    start : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Starting event
-    end : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Ending event
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorUnknown`
-    ms : float
-        Time between `start` and `end` in ms
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventElapsedTime`
-    """
-    cdef cyruntime.cudaEvent_t cyend
-    if end is None:
-        cyend = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(end, (cudaEvent_t,driver.CUevent)):
-        pend = int(end)
-        cyend = <cyruntime.cudaEvent_t><void_ptr>pend
-    else:
-        pend = int(cudaEvent_t(end))
-        cyend = <cyruntime.cudaEvent_t><void_ptr>pend
-    cdef cyruntime.cudaEvent_t cystart
-    if start is None:
-        cystart = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(start, (cudaEvent_t,driver.CUevent)):
-        pstart = int(start)
-        cystart = <cyruntime.cudaEvent_t><void_ptr>pstart
-    else:
-        pstart = int(cudaEvent_t(start))
-        cystart = <cyruntime.cudaEvent_t><void_ptr>pstart
-    cdef float ms = 0
-    err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend)
-    return (cudaError_t(err), ms)
-{{endif}}
-
-{{if 'cudaImportExternalMemory' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaImportExternalMemory(memHandleDesc : Optional[cudaExternalMemoryHandleDesc]):
-    """ Imports an external memory object.
-
-    Imports an externally allocated memory object and returns a handle to
-    that in `extMem_out`.
-
-    The properties of the handle being imported must be described in
-    `memHandleDesc`. The :py:obj:`~.cudaExternalMemoryHandleDesc` structure
-    is defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaExternalMemoryHandleDesc.type` specifies the type
-    of handle being imported. :py:obj:`~.cudaExternalMemoryHandleType` is
-    defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueFd`, then
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::fd must be a valid
-    file descriptor referencing a memory object. Ownership of the file
-    descriptor is transferred to the CUDA driver when the handle is
-    imported successfully. Performing any operations on the file descriptor
-    after it is imported results in undefined behavior.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32`, then exactly one
-    of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
-    be NULL. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
-    NULL, then it must represent a valid shared NT handle that references a
-    memory object. Ownership of this handle is not transferred to CUDA
-    after the import operation, so the application must release the handle
-    using the appropriate system call. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
-    NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a memory object.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32Kmt`, then
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be
-    non-NULL and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be
-    NULL. The handle specified must be a globally shared KMT handle. This
-    handle does not hold a reference to the underlying object, and thus
-    will be invalid when all references to the memory object are destroyed.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Heap`, then exactly one of
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
-    be NULL. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
-    NULL, then it must represent a valid shared NT handle that is returned
-    by ID3D12Device::CreateSharedHandle when referring to a ID3D12Heap
-    object. This handle holds a reference to the underlying object. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
-    NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D12Heap object.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`, then exactly one
-    of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
-    be NULL. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
-    NULL, then it must represent a valid shared NT handle that is returned
-    by ID3D12Device::CreateSharedHandle when referring to a ID3D12Resource
-    object. This handle holds a reference to the underlying object. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
-    NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D12Resource object.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`,then exactly one
-    of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
-    be NULL. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that is
-    returned by IDXGIResource1::CreateSharedHandle when referring to a
-    ID3D11Resource object. If
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
-    NULL, then it must point to a NULL-terminated array of UTF-16
-    characters that refers to a ID3D11Resource object.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`, then
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be
-    non-NULL and
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be
-    NULL. The handle specified must be a valid shared KMT handle that is
-    returned by IDXGIResource::GetSharedHandle when referring to a
-    ID3D11Resource object.
-
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
-    :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then
-    :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::nvSciBufObject must
-    be NON-NULL and reference a valid NvSciBuf object. If the NvSciBuf
-    object imported into CUDA is also mapped by other drivers, then the
-    application must use :py:obj:`~.cudaWaitExternalSemaphoresAsync` or
-    :py:obj:`~.cudaSignalExternalSemaphoresAsync` as approprriate barriers
-    to maintain coherence between CUDA and the other drivers. See
-    :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync` and
-    :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync` for memory
-    synchronization.
-
-    The size of the memory object must be specified in
-    :py:obj:`~.cudaExternalMemoryHandleDesc.size`.
-
-    Specifying the flag :py:obj:`~.cudaExternalMemoryDedicated` in
-    :py:obj:`~.cudaExternalMemoryHandleDesc.flags` indicates that the
-    resource is a dedicated resource. The definition of what a dedicated
-    resource is outside the scope of this extension. This flag must be set
-    if :py:obj:`~.cudaExternalMemoryHandleDesc.type` is one of the
-    following: :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`
-    :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`
-
-    Parameters
-    ----------
-    memHandleDesc : :py:obj:`~.cudaExternalMemoryHandleDesc`
-        Memory import handle descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`
-    extMem_out : :py:obj:`~.cudaExternalMemory_t`
-        Returned handle to an external memory object
-
-    See Also
-    --------
-    :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
-
-    Notes
-    -----
-    If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization
-    and Cache Control" chapter from Vulkan specification.
-    """
-    cdef cudaExternalMemory_t extMem_out = cudaExternalMemory_t()
-    cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr = memHandleDesc._ptr if memHandleDesc != None else NULL
-    err = cyruntime.cudaImportExternalMemory(<cyruntime.cudaExternalMemory_t*>extMem_out._ptr, cymemHandleDesc_ptr)
-    return (cudaError_t(err), extMem_out)
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedBuffer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternalMemoryBufferDesc]):
-    """ Maps a buffer onto an imported memory object.
-
-    Maps a buffer onto an imported memory object and returns a device
-    pointer in `devPtr`.
-
-    The properties of the buffer being mapped must be described in
-    `bufferDesc`. The :py:obj:`~.cudaExternalMemoryBufferDesc` structure is
-    defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaExternalMemoryBufferDesc.offset` is the offset in
-    the memory object where the buffer's base address is.
-    :py:obj:`~.cudaExternalMemoryBufferDesc.size` is the size of the
-    buffer. :py:obj:`~.cudaExternalMemoryBufferDesc.flags` must be zero.
-
-    The offset and size have to be suitably aligned to match the
-    requirements of the external API. Mapping two buffers whose ranges
-    overlap may or may not result in the same virtual address being
-    returned for the overlapped portion. In such cases, the application
-    must ensure that all accesses to that region from the GPU are volatile.
-    Otherwise writes made via one address are not guaranteed to be visible
-    via the other address, even if they're issued by the same thread. It is
-    recommended that applications map the combined range instead of mapping
-    separate buffers and then apply the appropriate offsets to the returned
-    pointer to derive the individual buffers.
-
-    The returned pointer `devPtr` must be freed using :py:obj:`~.cudaFree`.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.cudaExternalMemory_t`
-        Handle to external memory object
-    bufferDesc : :py:obj:`~.cudaExternalMemoryBufferDesc`
-        Buffer descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    devPtr : Any
-        Returned device pointer to buffer
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
-    """
-    cdef cyruntime.cudaExternalMemory_t cyextMem
-    if extMem is None:
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>0
-    elif isinstance(extMem, (cudaExternalMemory_t,)):
-        pextMem = int(extMem)
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    else:
-        pextMem = int(cudaExternalMemory_t(extMem))
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    cdef void_ptr devPtr = 0
-    cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr = bufferDesc._ptr if bufferDesc != None else NULL
-    err = cyruntime.cudaExternalMemoryGetMappedBuffer(<void**>&devPtr, cyextMem, cybufferDesc_ptr)
-    return (cudaError_t(err), devPtr)
-{{endif}}
-
-{{if 'cudaExternalMemoryGetMappedMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cudaExternalMemoryMipmappedArrayDesc]):
-    """ Maps a CUDA mipmapped array onto an external memory object.
-
-    Maps a CUDA mipmapped array onto an external object and returns a
-    handle to it in `mipmap`.
-
-    The properties of the CUDA mipmapped array being mapped must be
-    described in `mipmapDesc`. The structure
-    :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc` is defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.offset` is the
-    offset in the memory object where the base level of the mipmap chain
-    is. :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.formatDesc`
-    describes the format of the data.
-    :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.extent` specifies the
-    dimensions of the base level of the mipmap chain.
-    :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags` are flags
-    associated with CUDA mipmapped arrays. For further details, please
-    refer to the documentation for :py:obj:`~.cudaMalloc3DArray`. Note that
-    if the mipmapped array is bound as a color target in the graphics API,
-    then the flag :py:obj:`~.cudaArrayColorAttachment` must be specified in
-    :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags`.
-    :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` specifies
-    the total number of levels in the mipmap chain.
-
-    The returned CUDA mipmapped array must be freed using
-    :py:obj:`~.cudaFreeMipmappedArray`.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.cudaExternalMemory_t`
-        Handle to external memory object
-    mipmapDesc : :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc`
-        CUDA array descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    mipmap : :py:obj:`~.cudaMipmappedArray_t`
-        Returned CUDA mipmapped array
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`
-
-    Notes
-    -----
-    If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` must not be greater than 1.
-    """
-    cdef cyruntime.cudaExternalMemory_t cyextMem
-    if extMem is None:
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>0
-    elif isinstance(extMem, (cudaExternalMemory_t,)):
-        pextMem = int(extMem)
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    else:
-        pextMem = int(cudaExternalMemory_t(extMem))
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    cdef cudaMipmappedArray_t mipmap = cudaMipmappedArray_t()
-    cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr = mipmapDesc._ptr if mipmapDesc != None else NULL
-    err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmap._ptr, cyextMem, cymipmapDesc_ptr)
-    return (cudaError_t(err), mipmap)
-{{endif}}
-
-{{if 'cudaDestroyExternalMemory' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDestroyExternalMemory(extMem):
-    """ Destroys an external memory object.
-
-    Destroys the specified external memory object. Any existing buffers and
-    CUDA mipmapped arrays mapped onto this object must no longer be used
-    and must be explicitly freed using :py:obj:`~.cudaFree` and
-    :py:obj:`~.cudaFreeMipmappedArray` respectively.
-
-    Parameters
-    ----------
-    extMem : :py:obj:`~.cudaExternalMemory_t`
-        External memory object to be destroyed
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
-    """
-    cdef cyruntime.cudaExternalMemory_t cyextMem
-    if extMem is None:
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>0
-    elif isinstance(extMem, (cudaExternalMemory_t,)):
-        pextMem = int(extMem)
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    else:
-        pextMem = int(cudaExternalMemory_t(extMem))
-        cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
-    err = cyruntime.cudaDestroyExternalMemory(cyextMem)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaImportExternalSemaphore' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaImportExternalSemaphore(semHandleDesc : Optional[cudaExternalSemaphoreHandleDesc]):
-    """ Imports an external semaphore.
-
-    Imports an externally allocated synchronization object and returns a
-    handle to that in `extSem_out`.
-
-    The properties of the handle being imported must be described in
-    `semHandleDesc`. The :py:obj:`~.cudaExternalSemaphoreHandleDesc` is
-    defined as follows:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` specifies the
-    type of handle being imported.
-    :py:obj:`~.cudaExternalSemaphoreHandleType` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`, then
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid
-    file descriptor referencing a synchronization object. Ownership of the
-    file descriptor is transferred to the CUDA driver when the handle is
-    imported successfully. Performing any operations on the file descriptor
-    after it is imported results in undefined behavior.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`, then exactly
-    one of
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
-    not be NULL. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that
-    references a synchronization object. Ownership of this handle is not
-    transferred to CUDA after the import operation, so the application must
-    release the handle using the appropriate system call. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
-    NULL, then it must name a valid synchronization object.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt`, then
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must
-    be non-NULL and
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
-    be NULL. The handle specified must be a globally shared KMT handle.
-    This handle does not hold a reference to the underlying object, and
-    thus will be invalid when all references to the synchronization object
-    are destroyed.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`, then exactly one
-    of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
-    and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that is
-    returned by ID3D12Device::CreateSharedHandle when referring to a
-    ID3D12Fence object. This handle holds a reference to the underlying
-    object. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
-    NULL, then it must name a valid synchronization object that refers to a
-    valid ID3D12Fence object.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`, then exactly one
-    of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
-    and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that is
-    returned by ID3D11Fence::CreateSharedHandle. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
-    NULL, then it must name a valid synchronization object that refers to a
-    valid ID3D11Fence object.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, then
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::nvSciSyncObj
-    represents a valid NvSciSyncObj.
-
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`, then exactly one
-    of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
-    and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
-    must not be NULL. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
-    not NULL, then it represent a valid shared NT handle that is returned
-    by IDXGIResource1::CreateSharedHandle when referring to a
-    IDXGIKeyedMutex object.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must
-    be non-NULL and
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
-    be NULL. The handle specified must represent a valid KMT handle that is
-    returned by IDXGIResource::GetSharedHandle when referring to a
-    IDXGIKeyedMutex object.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`, then
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid
-    file descriptor referencing a synchronization object. Ownership of the
-    file descriptor is transferred to the CUDA driver when the handle is
-    imported successfully. Performing any operations on the file descriptor
-    after it is imported results in undefined behavior.
-
-    If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`, then
-    exactly one of
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
-    not be NULL. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
-    not NULL, then it must represent a valid shared NT handle that
-    references a synchronization object. Ownership of this handle is not
-    transferred to CUDA after the import operation, so the application must
-    release the handle using the appropriate system call. If
-    :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
-    NULL, then it must name a valid synchronization object.
-
-    Parameters
-    ----------
-    semHandleDesc : :py:obj:`~.cudaExternalSemaphoreHandleDesc`
-        Semaphore import handle descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`
-    extSem_out : :py:obj:`~.cudaExternalSemaphore_t`
-        Returned handle to an external semaphore
-
-    See Also
-    --------
-    :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cudaExternalSemaphore_t extSem_out = cudaExternalSemaphore_t()
-    cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr = semHandleDesc._ptr if semHandleDesc != None else NULL
-    err = cyruntime.cudaImportExternalSemaphore(<cyruntime.cudaExternalSemaphore_t*>extSem_out._ptr, cysemHandleDesc_ptr)
-    return (cudaError_t(err), extSem_out)
-{{endif}}
-
-{{if 'cudaSignalExternalSemaphoresAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[Tuple[cudaExternalSemaphore_t] | List[cudaExternalSemaphore_t]], paramsArray : Optional[Tuple[cudaExternalSemaphoreSignalParams] | List[cudaExternalSemaphoreSignalParams]], unsigned int numExtSems, stream):
-    """ Signals a set of external semaphore objects.
-
-    Enqueues a signal operation on a set of externally allocated semaphore
-    object in the specified stream. The operations will be executed when
-    all prior operations in the stream complete.
-
-    The exact semantics of signaling a semaphore depends on the type of the
-    object.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then
-    signaling the semaphore will set it to the signaled state.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then
-    the semaphore will be set to the value specified in
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::fence::value.
-
-    If the semaphore object is of the type
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` this API sets
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
-    to a value that can be used by subsequent waiters of the same NvSciSync
-    object to order operations with those currently submitted in `stream`.
-    Such an update will overwrite previous contents of
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence.
-    By default, signaling such an external semaphore object causes
-    appropriate memory synchronization operations to be performed over all
-    the external memory objects that are imported as
-    :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any
-    subsequent accesses made by other importers of the same set of NvSciBuf
-    memory object(s) are coherent. These operations can be skipped by
-    specifying the flag
-    :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync`, which can
-    be used as a performance optimization when data coherency is not
-    required. But specifying this flag in scenarios where data coherency is
-    required results in undefined behavior. Also, for semaphore object of
-    the type :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the
-    NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags
-    in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to
-    cudaNvSciSyncAttrSignal, this API will return cudaErrorNotSupported.
-
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
-    associated with semaphore object of the type
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` can be
-    deterministic. For this the NvSciSyncAttrList used to create the
-    semaphore object must have value of
-    NvSciSyncAttrKey_RequireDeterministicFences key set to true.
-    Deterministic fences allow users to enqueue a wait over the semaphore
-    object even before corresponding signal is enqueued. For such a
-    semaphore object, CUDA guarantees that each signal operation will
-    increment the fence value by '1'. Users are expected to track count of
-    signals enqueued on the semaphore object and insert waits accordingly.
-    When such a semaphore object is signaled from multiple streams, due to
-    concurrent stream execution, it is possible that the order in which the
-    semaphore gets signaled is indeterministic. This could lead to waiters
-    of the semaphore getting unblocked incorrectly. Users are expected to
-    handle such situations, either by not using the same semaphore object
-    with deterministic fence support enabled in different streams or by
-    adding explicit dependency amongst such streams so that the semaphore
-    is signaled in order.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the
-    keyed mutex will be released with the key specified in
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key.
-
-    Parameters
-    ----------
-    extSemArray : List[:py:obj:`~.cudaExternalSemaphore_t`]
-        Set of external semaphores to be signaled
-    paramsArray : List[:py:obj:`~.cudaExternalSemaphoreSignalParams`]
-        Array of semaphore parameters
-    numExtSems : unsigned int
-        Number of semaphores to signal
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue the signal operations in
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    paramsArray = [] if paramsArray is None else paramsArray
-    if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray):
-        raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or List[cyruntime.cudaExternalSemaphoreSignalParams,]")
-    extSemArray = [] if extSemArray is None else extSemArray
-    if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):
-        raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphore_t,] or List[cyruntime.cudaExternalSemaphore_t,]")
-    cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL
-    if len(extSemArray) > 0:
-        cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))
-        if cyextSemArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-        else:
-            for idx in range(len(extSemArray)):
-                cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._ptr[0]
-    cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL
-    if len(paramsArray) > 0:
-        cyparamsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-        if cyparamsArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
-        for idx in range(len(paramsArray)):
-            string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreSignalParams>paramsArray[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
-    if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
-    if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
-    err = cyruntime.cudaSignalExternalSemaphoresAsync(<cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._ptr if len(extSemArray) == 1 else cyextSemArray, (<cudaExternalSemaphoreSignalParams>paramsArray[0])._ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream)
-    if cyextSemArray is not NULL:
-        free(cyextSemArray)
-    if cyparamsArray is not NULL:
-        free(cyparamsArray)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaWaitExternalSemaphoresAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[Tuple[cudaExternalSemaphore_t] | List[cudaExternalSemaphore_t]], paramsArray : Optional[Tuple[cudaExternalSemaphoreWaitParams] | List[cudaExternalSemaphoreWaitParams]], unsigned int numExtSems, stream):
-    """ Waits on a set of external semaphore objects.
-
-    Enqueues a wait operation on a set of externally allocated semaphore
-    object in the specified stream. The operations will be executed when
-    all prior operations in the stream complete.
-
-    The exact semantics of waiting on a semaphore depends on the type of
-    the object.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then waiting
-    on the semaphore will wait until the semaphore reaches the signaled
-    state. The semaphore will then be reset to the unsignaled state.
-    Therefore for every signal operation, there can only be one wait
-    operation.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then
-    waiting on the semaphore will wait until the value of the semaphore is
-    greater than or equal to
-    :py:obj:`~.cudaExternalSemaphoreWaitParams`::params::fence::value.
-
-    If the semaphore object is of the type
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` then, waiting on
-    the semaphore will wait until the
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
-    is signaled by the signaler of the NvSciSyncObj that was associated
-    with this semaphore object. By default, waiting on such an external
-    semaphore object causes appropriate memory synchronization operations
-    to be performed over all external memory objects that are imported as
-    :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any
-    subsequent accesses made by other importers of the same set of NvSciBuf
-    memory object(s) are coherent. These operations can be skipped by
-    specifying the flag
-    :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync`, which can be
-    used as a performance optimization when data coherency is not required.
-    But specifying this flag in scenarios where data coherency is required
-    results in undefined behavior. Also, for semaphore object of the type
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the
-    NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags
-    in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to
-    cudaNvSciSyncAttrWait, this API will return cudaErrorNotSupported.
-
-    If the semaphore object is any one of the following types:
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,
-    :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the
-    keyed mutex will be acquired when it is released with the key specified
-    in
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key
-    or until the timeout specified by
-    :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::timeoutMs
-    has lapsed. The timeout interval can either be a finite value specified
-    in milliseconds or an infinite value. In case an infinite value is
-    specified the timeout never elapses. The windows INFINITE macro must be
-    used to specify infinite timeout
-
-    Parameters
-    ----------
-    extSemArray : List[:py:obj:`~.cudaExternalSemaphore_t`]
-        External semaphores to be waited on
-    paramsArray : List[:py:obj:`~.cudaExternalSemaphoreWaitParams`]
-        Array of semaphore parameters
-    numExtSems : unsigned int
-        Number of semaphores to wait on
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue the wait operations in
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle` :py:obj:`~.cudaErrorTimeout`
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    paramsArray = [] if paramsArray is None else paramsArray
-    if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray):
-        raise TypeError("Argument 'paramsArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or List[cyruntime.cudaExternalSemaphoreWaitParams,]")
-    extSemArray = [] if extSemArray is None else extSemArray
-    if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):
-        raise TypeError("Argument 'extSemArray' is not instance of type (expected Tuple[cyruntime.cudaExternalSemaphore_t,] or List[cyruntime.cudaExternalSemaphore_t,]")
-    cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL
-    if len(extSemArray) > 0:
-        cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))
-        if cyextSemArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
-        else:
-            for idx in range(len(extSemArray)):
-                cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._ptr[0]
-    cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL
-    if len(paramsArray) > 0:
-        cyparamsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-        if cyparamsArray is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
-        for idx in range(len(paramsArray)):
-            string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreWaitParams>paramsArray[idx])._ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
-    if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
-    if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
-    err = cyruntime.cudaWaitExternalSemaphoresAsync(<cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._ptr if len(extSemArray) == 1 else cyextSemArray, (<cudaExternalSemaphoreWaitParams>paramsArray[0])._ptr if len(paramsArray) == 1 else cyparamsArray, numExtSems, cystream)
-    if cyextSemArray is not NULL:
-        free(cyextSemArray)
-    if cyparamsArray is not NULL:
-        free(cyparamsArray)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDestroyExternalSemaphore' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDestroyExternalSemaphore(extSem):
-    """ Destroys an external semaphore.
-
-    Destroys an external semaphore object and releases any references to
-    the underlying resource. Any outstanding signals or waits must have
-    completed before the semaphore is destroyed.
-
-    Parameters
-    ----------
-    extSem : :py:obj:`~.cudaExternalSemaphore_t`
-        External semaphore to be destroyed
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaExternalSemaphore_t cyextSem
-    if extSem is None:
-        cyextSem = <cyruntime.cudaExternalSemaphore_t><void_ptr>0
-    elif isinstance(extSem, (cudaExternalSemaphore_t,)):
-        pextSem = int(extSem)
-        cyextSem = <cyruntime.cudaExternalSemaphore_t><void_ptr>pextSem
-    else:
-        pextSem = int(cudaExternalSemaphore_t(extSem))
-        cyextSem = <cyruntime.cudaExternalSemaphore_t><void_ptr>pextSem
-    err = cyruntime.cudaDestroyExternalSemaphore(cyextSem)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFuncSetCacheConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFuncSetCacheConfig(func, cacheConfig not None : cudaFuncCache):
-    """ Sets the preferred cache configuration for a device function.
-
-    On devices where the L1 cache and shared memory use the same hardware
-    resources, this sets through `cacheConfig` the preferred cache
-    configuration for the function specified via `func`. This is only a
-    preference. The runtime will use the requested configuration if
-    possible, but it is free to choose a different configuration if
-    required to execute `func`.
-
-    `func` is a device function symbol and must be declared as a `None`
-    function. If the specified function does not exist, then
-    :py:obj:`~.cudaErrorInvalidDeviceFunction` is returned. For templated
-    functions, pass the function symbol as follows:
-    func_name<template_arg_0,...,template_arg_N>
-
-    This setting does nothing on devices where the size of the L1 cache and
-    shared memory are fixed.
-
-    Launching a kernel with a different preference than the most recent
-    preference setting may insert a device-side synchronization point.
-
-    The supported cache configurations are:
-
-    - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
-      or L1 (default)
-
-    - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
-      and smaller L1 cache
-
-    - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
-      shared memory
-
-    - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
-      shared memory
-
-    Parameters
-    ----------
-    func : Any
-        Device function symbol
-    cacheConfig : :py:obj:`~.cudaFuncCache`
-        Requested cache configuration
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2
-
-    See Also
-    --------
-    cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cudaFuncGetAttributes (C API)`, :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncSetCacheConfig`
-    """
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value
-    err = cyruntime.cudaFuncSetCacheConfig(cyfunc_ptr, cycacheConfig)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFuncGetAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFuncGetAttributes(func):
-    """ Find out attributes for a given function.
-
-    This function obtains the attributes of a function specified via
-    `func`. `func` is a device function symbol and must be declared as a
-    `None` function. The fetched attributes are placed in `attr`. If the
-    specified function does not exist, then
-    :py:obj:`~.cudaErrorInvalidDeviceFunction` is returned. For templated
-    functions, pass the function symbol as follows:
-    func_name<template_arg_0,...,template_arg_N>
-
-    Note that some function attributes such as
-    :py:obj:`~.maxThreadsPerBlock` may vary based on the device that is
-    currently being used.
-
-    Parameters
-    ----------
-    func : Any
-        Device function symbol
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2
-    attr : :py:obj:`~.cudaFuncAttributes`
-        Return pointer to function's attributes
-
-    See Also
-    --------
-    :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncGetAttributes (C++ API), :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncGetAttribute`
-    """
-    cdef cudaFuncAttributes attr = cudaFuncAttributes()
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    err = cyruntime.cudaFuncGetAttributes(<cyruntime.cudaFuncAttributes*>attr._ptr, cyfunc_ptr)
-    return (cudaError_t(err), attr)
-{{endif}}
-
-{{if 'cudaFuncSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value):
-    """ Set attributes for a given function.
-
-    This function sets the attributes of a function specified via `func`.
-    The parameter `func` must be a pointer to a function that executes on
-    the device. The parameter specified by `func` must be declared as a
-    `None` function. The enumeration defined by `attr` is set to the value
-    defined by `value`. If the specified function does not exist, then
-    :py:obj:`~.cudaErrorInvalidDeviceFunction` is returned. If the
-    specified attribute cannot be written, or if the value is incorrect,
-    then :py:obj:`~.cudaErrorInvalidValue` is returned.
-
-    Valid values for `attr` are:
-
-    - :py:obj:`~.cudaFuncAttributeMaxDynamicSharedMemorySize` - The
-      requested maximum size in bytes of dynamically-allocated shared
-      memory. The sum of this value and the function attribute
-      :py:obj:`~.sharedSizeBytes` cannot exceed the device attribute
-      :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`. The maximal size
-      of requestable dynamic shared memory may differ by GPU architecture.
-
-    - :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` - On
-      devices where the L1 cache and shared memory use the same hardware
-      resources, this sets the shared memory carveout preference, in
-      percent of the total shared memory. See
-      :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`. This is only
-      a hint, and the driver can choose a different ratio if required to
-      execute the function.
-
-    - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required
-      cluster width in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return cudaErrorNotPermitted.
-
-    - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required
-      cluster height in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return cudaErrorNotPermitted.
-
-    - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required
-      cluster depth in blocks. The width, height, and depth values must
-      either all be 0 or all be positive. The validity of the cluster
-      dimensions is checked at launch time. If the value is set during
-      compile time, it cannot be set at runtime. Setting it at runtime will
-      return cudaErrorNotPermitted.
-
-    - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates
-      whether the function can be launched with non-portable cluster size.
-      1 is allowed, 0 is disallowed.
-
-    - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The
-      block scheduling policy of a function. The value type is
-      cudaClusterSchedulingPolicy.
-
-    cudaLaunchKernel (C++ API), cudaFuncSetCacheConfig (C++ API),
-    :py:obj:`~.cudaFuncGetAttributes (C API)`,
-
-    Parameters
-    ----------
-    func : Any
-        Function to get attributes of
-    attr : :py:obj:`~.cudaFuncAttribute`
-        Attribute to set
-    value : int
-        Value to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`
-    """
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    cdef cyruntime.cudaFuncAttribute cyattr = attr.value
-    err = cyruntime.cudaFuncSetAttribute(cyfunc_ptr, cyattr, value)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaLaunchHostFunc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaLaunchHostFunc(stream, fn, userData):
-    """ Enqueues a host function call in a stream.
-
-    Enqueues a host function to run in a stream. The function will be
-    called after currently enqueued work and will block work added after
-    it.
-
-    The host function must not make any CUDA API calls. Attempting to use a
-    CUDA API may result in :py:obj:`~.cudaErrorNotPermitted`, but this is
-    not required. The host function must not perform any synchronization
-    that may depend on outstanding CUDA work not mandated to run earlier.
-    Host functions without a mandated order (such as in independent
-    streams) execute in undefined order and may be serialized.
-
-    For the purposes of Unified Memory, execution makes a number of
-    guarantees:
-
-    - The stream is considered idle for the duration of the function's
-      execution. Thus, for example, the function may always use memory
-      attached to the stream it was enqueued in.
-
-    - The start of execution of the function has the same effect as
-      synchronizing an event recorded in the same stream immediately prior
-      to the function. It thus synchronizes streams which have been
-      "joined" prior to the function.
-
-    - Adding device work to any stream does not have the effect of making
-      the stream active until all preceding host functions and stream
-      callbacks have executed. Thus, for example, a function might use
-      global attached memory even if work has been added to another stream,
-      if the work has been ordered behind the function call with an event.
-
-    - Completion of the function does not cause a stream to become active
-      except as described above. The stream will remain idle if no device
-      work follows the function, and will remain idle across consecutive
-      host functions or stream callbacks without device work in between.
-      Thus, for example, stream synchronization can be done by signaling
-      from a host function at the end of the stream.
-
-    Note that, in constrast to :py:obj:`~.cuStreamAddCallback`, the
-    function will not be called in the event of an error in the CUDA
-    context.
-
-    Parameters
-    ----------
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue function call in
-    fn : :py:obj:`~.cudaHostFn_t`
-        The function to call once preceding stream operations are complete
-    userData : Any
-        User-specified data to be passed to the function
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuLaunchHostFunc`
-    """
-    cdef cyruntime.cudaHostFn_t cyfn
-    if fn is None:
-        cyfn = <cyruntime.cudaHostFn_t><void_ptr>0
-    elif isinstance(fn, (cudaHostFn_t,)):
-        pfn = int(fn)
-        cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-    else:
-        pfn = int(cudaHostFn_t(fn))
-        cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cyuserData = utils.HelperInputVoidPtr(userData)
-    cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
-    with nogil:
-        err = cyruntime.cudaLaunchHostFunc(cystream, cyfn, cyuserData_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFuncSetSharedMemConfig' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFuncSetSharedMemConfig(func, config not None : cudaSharedMemConfig):
-    """ Sets the shared memory configuration for a device function.
-
-    [Deprecated]
-
-    On devices with configurable shared memory banks, this function will
-    force all subsequent launches of the specified device function to have
-    the given shared memory bank size configuration. On any given launch of
-    the function, the shared memory configuration of the device will be
-    temporarily changed if needed to suit the function's preferred
-    configuration. Changes in shared memory configuration between
-    subsequent launches of functions, may introduce a device side
-    synchronization point.
-
-    Any per-function setting of shared memory bank size set via
-    :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide
-    setting set by :py:obj:`~.cudaDeviceSetSharedMemConfig`.
-
-    Changing the shared memory bank size will not increase shared memory
-    usage or affect occupancy of kernels, but may have major effects on
-    performance. Larger bank sizes will allow for greater potential
-    bandwidth to shared memory, but will change what kinds of accesses to
-    shared memory will result in bank conflicts.
-
-    This function will do nothing on devices with fixed shared memory bank
-    size.
-
-    For templated functions, pass the function symbol as follows:
-    func_name<template_arg_0,...,template_arg_N>
-
-    The supported bank configurations are:
-
-    - :py:obj:`~.cudaSharedMemBankSizeDefault`: use the device's shared
-      memory configuration when launching this function.
-
-    - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank
-      width to be four bytes natively when launching this function.
-
-    - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank
-      width to be eight bytes natively when launching this function.
-
-    Parameters
-    ----------
-    func : Any
-        Device function symbol
-    config : :py:obj:`~.cudaSharedMemConfig`
-        Requested shared memory configuration
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`,2
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuFuncSetSharedMemConfig`
-    """
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    cdef cyruntime.cudaSharedMemConfig cyconfig = config.value
-    err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc_ptr, cyconfig)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessor' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dynamicSMemSize):
-    """ Returns occupancy for a device function.
-
-    Returns in `*numBlocks` the maximum number of active blocks per
-    streaming multiprocessor for the device function.
-
-    Parameters
-    ----------
-    func : Any
-        Kernel function for which occupancy is calculated
-    blockSize : int
-        Block size the kernel is intended to be launched with
-    dynamicSMemSize : size_t
-        Per-block dynamic shared memory usage intended, in bytes
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
-    numBlocks : int
-        Returned occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor`
-    """
-    cdef int numBlocks = 0
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize)
-    return (cudaError_t(err), numBlocks)
-{{endif}}
-
-{{if 'cudaOccupancyAvailableDynamicSMemPerBlock' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize):
-    """ Returns dynamic shared memory available per block when launching `numBlocks` blocks on SM.
-
-    Returns in `*dynamicSmemSize` the maximum size of dynamic shared memory
-    to allow `numBlocks` blocks per SM.
-
-    Parameters
-    ----------
-    func : Any
-        Kernel function for which occupancy is calculated
-    numBlocks : int
-        Number of blocks to fit on SM
-    blockSize : int
-        Size of the block
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
-    dynamicSmemSize : int
-        Returned maximum dynamic shared memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), :py:obj:`~.cudaOccupancyAvailableDynamicSMemPerBlock`
-    """
-    cdef size_t dynamicSmemSize = 0
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc_ptr, numBlocks, blockSize)
-    return (cudaError_t(err), dynamicSmemSize)
-{{endif}}
-
-{{if 'cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, size_t dynamicSMemSize, unsigned int flags):
-    """ Returns occupancy for a device function with the specified flags.
-
-    Returns in `*numBlocks` the maximum number of active blocks per
-    streaming multiprocessor for the device function.
-
-    The `flags` parameter controls how special cases are handled. Valid
-    flags include:
-
-    - :py:obj:`~.cudaOccupancyDefault`: keeps the default behavior as
-      :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`
-
-    - :py:obj:`~.cudaOccupancyDisableCachingOverride`: This flag suppresses
-      the default behavior on platform where global caching affects
-      occupancy. On such platforms, if caching is enabled, but per-block SM
-      resource usage would result in zero occupancy, the occupancy
-      calculator will calculate the occupancy as if caching is disabled.
-      Setting this flag makes the occupancy calculator to return 0 in such
-      cases. More information can be found about this feature in the
-      "Unified L1/Texture Cache" section of the Maxwell tuning guide.
-
-    Parameters
-    ----------
-    func : Any
-        Kernel function for which occupancy is calculated
-    blockSize : int
-        Block size the kernel is intended to be launched with
-    dynamicSMemSize : size_t
-        Per-block dynamic shared memory usage intended, in bytes
-    flags : unsigned int
-        Requested behavior for the occupancy calculator
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
-    numBlocks : int
-        Returned occupancy
-
-    See Also
-    --------
-    :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
-    """
-    cdef int numBlocks = 0
-    cyfunc = utils.HelperInputVoidPtr(func)
-    cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
-    err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize, flags)
-    return (cudaError_t(err), numBlocks)
-{{endif}}
-
-{{if 'cudaMallocManaged' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocManaged(size_t size, unsigned int flags):
-    """ Allocates memory that will be automatically managed by the Unified Memory system.
-
-    Allocates `size` bytes of managed memory on the device and returns in
-    `*devPtr` a pointer to the allocated memory. If the device doesn't
-    support allocating managed memory, :py:obj:`~.cudaErrorNotSupported` is
-    returned. Support for managed memory can be queried using the device
-    attribute :py:obj:`~.cudaDevAttrManagedMemory`. The allocated memory is
-    suitably aligned for any kind of variable. The memory is not cleared.
-    If `size` is 0, :py:obj:`~.cudaMallocManaged` returns
-    :py:obj:`~.cudaErrorInvalidValue`. The pointer is valid on the CPU and
-    on all GPUs in the system that support managed memory. All accesses to
-    this pointer must obey the Unified Memory programming model.
-
-    `flags` specifies the default stream association for this allocation.
-    `flags` must be one of :py:obj:`~.cudaMemAttachGlobal` or
-    :py:obj:`~.cudaMemAttachHost`. The default value for `flags` is
-    :py:obj:`~.cudaMemAttachGlobal`. If :py:obj:`~.cudaMemAttachGlobal` is
-    specified, then this memory is accessible from any stream on any
-    device. If :py:obj:`~.cudaMemAttachHost` is specified, then the
-    allocation should not be accessed from devices that have a zero value
-    for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`; an explicit call to
-    :py:obj:`~.cudaStreamAttachMemAsync` will be required to enable access
-    on such devices.
-
-    If the association is later changed via
-    :py:obj:`~.cudaStreamAttachMemAsync` to a single stream, the default
-    association, as specifed during :py:obj:`~.cudaMallocManaged`, is
-    restored when that stream is destroyed. For managed variables, the
-    default association is always :py:obj:`~.cudaMemAttachGlobal`. Note
-    that destroying a stream is an asynchronous operation, and as a result,
-    the change to default association won't happen until all work in the
-    stream has completed.
-
-    Memory allocated with :py:obj:`~.cudaMallocManaged` should be released
-    with :py:obj:`~.cudaFree`.
-
-    Device memory oversubscription is possible for GPUs that have a non-
-    zero value for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Managed memory on such
-    GPUs may be evicted from device memory to host memory at any time by
-    the Unified Memory driver in order to make room for other allocations.
-
-    In a system where all GPUs have a non-zero value for the device
-    attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, managed
-    memory may not be populated when this API returns and instead may be
-    populated on access. In such systems, managed memory can migrate to any
-    processor's memory at any time. The Unified Memory driver will employ
-    heuristics to maintain data locality and prevent excessive page faults
-    to the extent possible. The application can also guide the driver about
-    memory usage patterns via :py:obj:`~.cudaMemAdvise`. The application
-    can also explicitly migrate memory to a desired processor's memory via
-    :py:obj:`~.cudaMemPrefetchAsync`.
-
-    In a multi-GPU system where all of the GPUs have a zero value for the
-    device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` and all
-    the GPUs have peer-to-peer support with each other, the physical
-    storage for managed memory is created on the GPU which is active at the
-    time :py:obj:`~.cudaMallocManaged` is called. All other GPUs will
-    reference the data at reduced bandwidth via peer mappings over the PCIe
-    bus. The Unified Memory driver does not migrate memory among such GPUs.
-
-    In a multi-GPU system where not all GPUs have peer-to-peer support with
-    each other and where the value of the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is zero for at least one
-    of those GPUs, the location chosen for physical storage of managed
-    memory is system-dependent.
-
-    - On Linux, the location chosen will be device memory as long as the
-      current set of active contexts are on devices that either have peer-
-      to-peer support with each other or have a non-zero value for the
-      device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If
-      there is an active context on a GPU that does not have a non-zero
-      value for that device attribute and it does not have peer-to-peer
-      support with the other devices that have active contexts on them,
-      then the location for physical storage will be 'zero-copy' or host
-      memory. Note that this means that managed memory that is located in
-      device memory is migrated to host memory if a new context is created
-      on a GPU that doesn't have a non-zero value for the device attribute
-      and does not support peer-to-peer with at least one of the other
-      devices that has an active context. This in turn implies that context
-      creation may fail if there is insufficient host memory to migrate all
-      managed allocations.
-
-    - On Windows, the physical storage is always created in 'zero-copy' or
-      host memory. All GPUs will reference the data at reduced bandwidth
-      over the PCIe bus. In these circumstances, use of the environment
-      variable CUDA_VISIBLE_DEVICES is recommended to restrict CUDA to only
-      use those GPUs that have peer-to-peer support. Alternatively, users
-      can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to
-      force the driver to always use device memory for physical storage.
-      When this environment variable is set to a non-zero value, all
-      devices used in that process that support managed memory have to be
-      peer-to-peer compatible with each other. The error
-      :py:obj:`~.cudaErrorInvalidDevice` will be returned if a device that
-      supports managed memory is used and it is not peer-to-peer compatible
-      with any of the other managed memory supporting devices that were
-      previously used in that process, even if :py:obj:`~.cudaDeviceReset`
-      has been called on those devices. These environment variables are
-      described in the CUDA programming guide under the "CUDA environment
-      variables" section.
-
-    Parameters
-    ----------
-    size : size_t
-        Requested allocation size in bytes
-    flags : unsigned int
-        Must be either :py:obj:`~.cudaMemAttachGlobal` or
-        :py:obj:`~.cudaMemAttachHost` (defaults to
-        :py:obj:`~.cudaMemAttachGlobal`)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-    devPtr : Any
-        Pointer to allocated device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cuMemAllocManaged`
-    """
-    cdef void_ptr devPtr = 0
-    with nogil:
-        err = cyruntime.cudaMallocManaged(<void**>&devPtr, size, flags)
-
-    return (cudaError_t(err), devPtr)
-{{endif}}
-
-{{if 'cudaMalloc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMalloc(size_t size):
-    """ Allocate memory on the device.
-
-    Allocates `size` bytes of linear memory on the device and returns in
-    `*devPtr` a pointer to the allocated memory. The allocated memory is
-    suitably aligned for any kind of variable. The memory is not cleared.
-    :py:obj:`~.cudaMalloc()` returns :py:obj:`~.cudaErrorMemoryAllocation`
-    in case of failure.
-
-    The device version of :py:obj:`~.cudaFree` cannot be used with a
-    `*devPtr` allocated using the host API, and vice versa.
-
-    Parameters
-    ----------
-    size : size_t
-        Requested allocation size in bytes
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    devPtr : Any
-        Pointer to allocated device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAlloc`
-    """
-    cdef void_ptr devPtr = 0
-    with nogil:
-        err = cyruntime.cudaMalloc(<void**>&devPtr, size)
-
-    return (cudaError_t(err), devPtr)
-{{endif}}
-
-{{if 'cudaMallocHost' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocHost(size_t size):
-    """ Allocates page-locked memory on the host.
-
-    Allocates `size` bytes of host memory that is page-locked and
-    accessible to the device. The driver tracks the virtual memory ranges
-    allocated with this function and automatically accelerates calls to
-    functions such as :py:obj:`~.cudaMemcpy`*(). Since the memory can be
-    accessed directly by the device, it can be read or written with much
-    higher bandwidth than pageable memory obtained with functions such as
-    :py:obj:`~.malloc()`.
-
-    On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is
-    true, :py:obj:`~.cudaMallocHost` may not page-lock the allocated
-    memory.
-
-    Page-locking excessive amounts of memory with
-    :py:obj:`~.cudaMallocHost()` may degrade system performance, since it
-    reduces the amount of memory available to the system for paging. As a
-    result, this function is best used sparingly to allocate staging areas
-    for data exchange between host and device.
-
-    Parameters
-    ----------
-    size : size_t
-        Requested allocation size in bytes
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    ptr : Any
-        Pointer to allocated host memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, cudaMallocHost (C++ API), :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocHost`
-    """
-    cdef void_ptr ptr = 0
-    err = cyruntime.cudaMallocHost(<void**>&ptr, size)
-    return (cudaError_t(err), ptr)
-{{endif}}
-
-{{if 'cudaMallocPitch' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocPitch(size_t width, size_t height):
-    """ Allocates pitched memory on the device.
-
-    Allocates at least `width` (in bytes) * `height` bytes of linear memory
-    on the device and returns in `*devPtr` a pointer to the allocated
-    memory. The function may pad the allocation to ensure that
-    corresponding pointers in any given row will continue to meet the
-    alignment requirements for coalescing as the address is updated from
-    row to row. The pitch returned in `*pitch` by
-    :py:obj:`~.cudaMallocPitch()` is the width in bytes of the allocation.
-    The intended usage of `pitch` is as a separate parameter of the
-    allocation, used to compute addresses within the 2D array. Given the
-    row and column of an array element of type `T`, the address is computed
-    as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For allocations of 2D arrays, it is recommended that programmers
-    consider performing pitch allocations using
-    :py:obj:`~.cudaMallocPitch()`. Due to pitch alignment restrictions in
-    the hardware, this is especially true if the application will be
-    performing 2D memory copies between different regions of device memory
-    (whether linear memory or CUDA arrays).
-
-    Parameters
-    ----------
-    width : size_t
-        Requested pitched allocation width (in bytes)
-    height : size_t
-        Requested pitched allocation height
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    devPtr : Any
-        Pointer to allocated pitched device memory
-    pitch : int
-        Pitch for allocation
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocPitch`
-    """
-    cdef void_ptr devPtr = 0
-    cdef size_t pitch = 0
-    err = cyruntime.cudaMallocPitch(<void**>&devPtr, &pitch, width, height)
-    return (cudaError_t(err), devPtr, pitch)
-{{endif}}
-
-{{if 'cudaMallocArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocArray(desc : Optional[cudaChannelFormatDesc], size_t width, size_t height, unsigned int flags):
-    """ Allocate an array on the device.
-
-    Allocates a CUDA array according to the
-    :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
-    to the new CUDA array in `*array`.
-
-    The :py:obj:`~.cudaChannelFormatDesc` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaChannelFormatKind` is one of
-    :py:obj:`~.cudaChannelFormatKindSigned`,
-    :py:obj:`~.cudaChannelFormatKindUnsigned`, or
-    :py:obj:`~.cudaChannelFormatKindFloat`.
-
-    The `flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
-      and provides default array allocation
-
-    - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates an array that can be
-      read from or written to using a surface reference
-
-    - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
-      gather operations will be performed on the array.
-
-    - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical
-      backing memory. The subregions within this sparse array can later be
-      mapped onto a physical memory allocation by calling
-      :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be
-      allocated via :py:obj:`~.cuMemCreate`.
-
-    - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without
-      physical backing memory. The entire array can later be mapped onto a
-      physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.
-      The physical backing memory must be allocated via
-      :py:obj:`~.cuMemCreate`.
-
-    `width` and `height` must meet certain size requirements. See
-    :py:obj:`~.cudaMalloc3DArray()` for more details.
-
-    Parameters
-    ----------
-    desc : :py:obj:`~.cudaChannelFormatDesc`
-        Requested channel format
-    width : size_t
-        Requested array allocation width
-    height : size_t
-        Requested array allocation height
-    flags : unsigned int
-        Requested properties of allocated array
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    array : :py:obj:`~.cudaArray_t`
-        Pointer to allocated array in device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayCreate`
-    """
-    cdef cudaArray_t array = cudaArray_t()
-    cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._ptr if desc != None else NULL
-    with nogil:
-        err = cyruntime.cudaMallocArray(<cyruntime.cudaArray_t*>array._ptr, cydesc_ptr, width, height, flags)
-
-    return (cudaError_t(err), array)
-{{endif}}
-
-{{if 'cudaFree' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFree(devPtr):
-    """ Frees memory on the device.
-
-    Frees the memory space pointed to by `devPtr`, which must have been
-    returned by a previous call to one of the following memory allocation
-    APIs - :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()`,
-    :py:obj:`~.cudaMallocManaged()`, :py:obj:`~.cudaMallocAsync()`,
-    :py:obj:`~.cudaMallocFromPoolAsync()`.
-
-    Note - This API will not perform any implicit synchronization when the
-    pointer was allocated with :py:obj:`~.cudaMallocAsync` or
-    :py:obj:`~.cudaMallocFromPoolAsync`. Callers must ensure that all
-    accesses to these pointer have completed before invoking
-    :py:obj:`~.cudaFree`. For best performance and memory reuse, users
-    should use :py:obj:`~.cudaFreeAsync` to free memory allocated via the
-    stream ordered memory allocator. For all other pointers, this API may
-    perform implicit synchronization.
-
-    If :py:obj:`~.cudaFree`(`devPtr`) has already been called before, an
-    error is returned. If `devPtr` is 0, no operation is performed.
-    :py:obj:`~.cudaFree()` returns :py:obj:`~.cudaErrorValue` in case of
-    failure.
-
-    The device version of :py:obj:`~.cudaFree` cannot be used with a
-    `*devPtr` allocated using the host API, and vice versa.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Device pointer to memory to free
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMallocFromPoolAsync` :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaFreeAsync` :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFree`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaFree(cydevPtr_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFreeHost' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFreeHost(ptr):
-    """ Frees page-locked memory.
-
-    Frees the memory space pointed to by `hostPtr`, which must have been
-    returned by a previous call to :py:obj:`~.cudaMallocHost()` or
-    :py:obj:`~.cudaHostAlloc()`.
-
-    Parameters
-    ----------
-    ptr : Any
-        Pointer to memory to free
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFreeHost`
-    """
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    with nogil:
-        err = cyruntime.cudaFreeHost(cyptr_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFreeArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFreeArray(array):
-    """ Frees an array on the device.
-
-    Frees the CUDA array `array`, which must have been returned by a
-    previous call to :py:obj:`~.cudaMallocArray()`. If `devPtr` is 0, no
-    operation is performed.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.cudaArray_t`
-        Pointer to array to free
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayDestroy`
-    """
-    cdef cyruntime.cudaArray_t cyarray
-    if array is None:
-        cyarray = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(array, (cudaArray_t,)):
-        parray = int(array)
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    else:
-        parray = int(cudaArray_t(array))
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    with nogil:
-        err = cyruntime.cudaFreeArray(cyarray)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaFreeMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFreeMipmappedArray(mipmappedArray):
-    """ Frees a mipmapped array on the device.
-
-    Frees the CUDA mipmapped array `mipmappedArray`, which must have been
-    returned by a previous call to :py:obj:`~.cudaMallocMipmappedArray()`.
-    If `devPtr` is 0, no operation is performed.
-
-    Parameters
-    ----------
-    mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
-        Pointer to mipmapped array to free
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMipmappedArrayDestroy`
-    """
-    cdef cyruntime.cudaMipmappedArray_t cymipmappedArray
-    if mipmappedArray is None:
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_t><void_ptr>0
-    elif isinstance(mipmappedArray, (cudaMipmappedArray_t,)):
-        pmipmappedArray = int(mipmappedArray)
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmappedArray
-    else:
-        pmipmappedArray = int(cudaMipmappedArray_t(mipmappedArray))
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmappedArray
-    err = cyruntime.cudaFreeMipmappedArray(cymipmappedArray)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaHostAlloc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaHostAlloc(size_t size, unsigned int flags):
-    """ Allocates page-locked memory on the host.
-
-    Allocates `size` bytes of host memory that is page-locked and
-    accessible to the device. The driver tracks the virtual memory ranges
-    allocated with this function and automatically accelerates calls to
-    functions such as :py:obj:`~.cudaMemcpy()`. Since the memory can be
-    accessed directly by the device, it can be read or written with much
-    higher bandwidth than pageable memory obtained with functions such as
-    :py:obj:`~.malloc()`. Allocating excessive amounts of pinned memory may
-    degrade system performance, since it reduces the amount of memory
-    available to the system for paging. As a result, this function is best
-    used sparingly to allocate staging areas for data exchange between host
-    and device.
-
-    The `flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.cudaHostAllocDefault`: This flag's value is defined to be
-      0 and causes :py:obj:`~.cudaHostAlloc()` to emulate
-      :py:obj:`~.cudaMallocHost()`.
-
-    - :py:obj:`~.cudaHostAllocPortable`: The memory returned by this call
-      will be considered as pinned memory by all CUDA contexts, not just
-      the one that performed the allocation.
-
-    - :py:obj:`~.cudaHostAllocMapped`: Maps the allocation into the CUDA
-      address space. The device pointer to the memory may be obtained by
-      calling :py:obj:`~.cudaHostGetDevicePointer()`.
-
-    - :py:obj:`~.cudaHostAllocWriteCombined`: Allocates the memory as
-      write-combined (WC). WC memory can be transferred across the PCI
-      Express bus more quickly on some system configurations, but cannot be
-      read efficiently by most CPUs. WC memory is a good option for buffers
-      that will be written by the CPU and read by the device via mapped
-      pinned memory or host->device transfers.
-
-    All of these flags are orthogonal to one another: a developer may
-    allocate memory that is portable, mapped and/or write-combined with no
-    restrictions.
-
-    In order for the :py:obj:`~.cudaHostAllocMapped` flag to have any
-    effect, the CUDA context must support the :py:obj:`~.cudaDeviceMapHost`
-    flag, which can be checked via :py:obj:`~.cudaGetDeviceFlags()`. The
-    :py:obj:`~.cudaDeviceMapHost` flag is implicitly set for contexts
-    created via the runtime API.
-
-    The :py:obj:`~.cudaHostAllocMapped` flag may be specified on CUDA
-    contexts for devices that do not support mapped pinned memory. The
-    failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because
-    the memory may be mapped into other CUDA contexts via the
-    :py:obj:`~.cudaHostAllocPortable` flag.
-
-    Memory allocated by this function must be freed with
-    :py:obj:`~.cudaFreeHost()`.
-
-    Parameters
-    ----------
-    size : size_t
-        Requested allocation size in bytes
-    flags : unsigned int
-        Requested properties of allocated memory
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    pHost : Any
-        Device pointer to allocated memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cuMemHostAlloc`
-    """
-    cdef void_ptr pHost = 0
-    with nogil:
-        err = cyruntime.cudaHostAlloc(<void**>&pHost, size, flags)
-
-    return (cudaError_t(err), pHost)
-{{endif}}
-
-{{if 'cudaHostRegister' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaHostRegister(ptr, size_t size, unsigned int flags):
-    """ Registers an existing host memory range for use by CUDA.
-
-    Page-locks the memory range specified by `ptr` and `size` and maps it
-    for the device(s) as specified by `flags`. This memory range also is
-    added to the same tracking mechanism as :py:obj:`~.cudaHostAlloc()` to
-    automatically accelerate calls to functions such as
-    :py:obj:`~.cudaMemcpy()`. Since the memory can be accessed directly by
-    the device, it can be read or written with much higher bandwidth than
-    pageable memory that has not been registered. Page-locking excessive
-    amounts of memory may degrade system performance, since it reduces the
-    amount of memory available to the system for paging. As a result, this
-    function is best used sparingly to register staging areas for data
-    exchange between host and device.
-
-    On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is
-    true, :py:obj:`~.cudaHostRegister` will not page-lock the memory range
-    specified by `ptr` but only populate unpopulated pages.
-
-    :py:obj:`~.cudaHostRegister` is supported only on I/O coherent devices
-    that have a non-zero value for the device attribute
-    :py:obj:`~.cudaDevAttrHostRegisterSupported`.
-
-    The `flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.cudaHostRegisterDefault`: On a system with unified virtual
-      addressing, the memory will be both mapped and portable. On a system
-      with no unified virtual addressing, the memory will be neither mapped
-      nor portable.
-
-    - :py:obj:`~.cudaHostRegisterPortable`: The memory returned by this
-      call will be considered as pinned memory by all CUDA contexts, not
-      just the one that performed the allocation.
-
-    - :py:obj:`~.cudaHostRegisterMapped`: Maps the allocation into the CUDA
-      address space. The device pointer to the memory may be obtained by
-      calling :py:obj:`~.cudaHostGetDevicePointer()`.
-
-    - :py:obj:`~.cudaHostRegisterIoMemory`: The passed memory pointer is
-      treated as pointing to some memory-mapped I/O space, e.g. belonging
-      to a third-party PCIe device, and it will marked as non cache-
-      coherent and contiguous.
-
-    - :py:obj:`~.cudaHostRegisterReadOnly`: The passed memory pointer is
-      treated as pointing to memory that is considered read-only by the
-      device. On platforms without
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, this
-      flag is required in order to register memory mapped to the CPU as
-      read-only. Support for the use of this flag can be queried from the
-      device attribute cudaDeviceAttrReadOnlyHostRegisterSupported. Using
-      this flag with a current context associated with a device that does
-      not have this attribute set will cause :py:obj:`~.cudaHostRegister`
-      to error with cudaErrorNotSupported.
-
-    All of these flags are orthogonal to one another: a developer may page-
-    lock memory that is portable or mapped with no restrictions.
-
-    The CUDA context must have been created with the
-    :py:obj:`~.cudaMapHost` flag in order for the
-    :py:obj:`~.cudaHostRegisterMapped` flag to have any effect.
-
-    The :py:obj:`~.cudaHostRegisterMapped` flag may be specified on CUDA
-    contexts for devices that do not support mapped pinned memory. The
-    failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because
-    the memory may be mapped into other CUDA contexts via the
-    :py:obj:`~.cudaHostRegisterPortable` flag.
-
-    For devices that have a non-zero value for the device attribute
-    :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory
-    can also be accessed from the device using the host pointer `ptr`. The
-    device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` may
-    or may not match the original host pointer `ptr` and depends on the
-    devices visible to the application. If all devices visible to the
-    application have a non-zero value for the device attribute, the device
-    pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match
-    the original pointer `ptr`. If any device visible to the application
-    has a zero value for the device attribute, the device pointer returned
-    by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original
-    host pointer `ptr`, but it will be suitable for use on all devices
-    provided Unified Virtual Addressing is enabled. In such systems, it is
-    valid to access the memory using either pointer on devices that have a
-    non-zero value for the device attribute. Note however that such devices
-    should access the memory using only of the two pointers and not both.
-
-    The memory page-locked by this function must be unregistered with
-    :py:obj:`~.cudaHostUnregister()`.
-
-    Parameters
-    ----------
-    ptr : Any
-        Host pointer to memory to page-lock
-    size : size_t
-        Size in bytes of the address range to page-lock in bytes
-    flags : unsigned int
-        Flags for allocation request
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorHostMemoryAlreadyRegistered`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cudaHostGetFlags`, :py:obj:`~.cudaHostGetDevicePointer`, :py:obj:`~.cuMemHostRegister`
-    """
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    with nogil:
-        err = cyruntime.cudaHostRegister(cyptr_ptr, size, flags)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaHostUnregister' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaHostUnregister(ptr):
-    """ Unregisters a memory range that was registered with cudaHostRegister.
-
-    Unmaps the memory range whose base address is specified by `ptr`, and
-    makes it pageable again.
-
-    The base address must be the same one specified to
-    :py:obj:`~.cudaHostRegister()`.
-
-    Parameters
-    ----------
-    ptr : Any
-        Host pointer to memory to unregister
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorHostMemoryNotRegistered`
-
-    See Also
-    --------
-    :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cuMemHostUnregister`
-    """
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    with nogil:
-        err = cyruntime.cudaHostUnregister(cyptr_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaHostGetDevicePointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaHostGetDevicePointer(pHost, unsigned int flags):
-    """ Passes back device pointer of mapped host memory allocated by cudaHostAlloc or registered by cudaHostRegister.
-
-    Passes back the device pointer corresponding to the mapped, pinned host
-    buffer allocated by :py:obj:`~.cudaHostAlloc()` or registered by
-    :py:obj:`~.cudaHostRegister()`.
-
-    :py:obj:`~.cudaHostGetDevicePointer()` will fail if the
-    :py:obj:`~.cudaDeviceMapHost` flag was not specified before deferred
-    context creation occurred, or if called on a device that does not
-    support mapped, pinned memory.
-
-    For devices that have a non-zero value for the device attribute
-    :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory
-    can also be accessed from the device using the host pointer `pHost`.
-    The device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()`
-    may or may not match the original host pointer `pHost` and depends on
-    the devices visible to the application. If all devices visible to the
-    application have a non-zero value for the device attribute, the device
-    pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match
-    the original pointer `pHost`. If any device visible to the application
-    has a zero value for the device attribute, the device pointer returned
-    by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original
-    host pointer `pHost`, but it will be suitable for use on all devices
-    provided Unified Virtual Addressing is enabled. In such systems, it is
-    valid to access the memory using either pointer on devices that have a
-    non-zero value for the device attribute. Note however that such devices
-    should access the memory using only of the two pointers and not both.
-
-    `flags` provides for future releases. For now, it must be set to 0.
-
-    Parameters
-    ----------
-    pHost : Any
-        Requested host pointer mapping
-    flags : unsigned int
-        Flags for extensions (must be 0 for now)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    pDevice : Any
-        Returned device pointer for mapped memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`
-    """
-    cdef void_ptr pDevice = 0
-    cypHost = utils.HelperInputVoidPtr(pHost)
-    cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr
-    err = cyruntime.cudaHostGetDevicePointer(<void**>&pDevice, cypHost_ptr, flags)
-    return (cudaError_t(err), pDevice)
-{{endif}}
-
-{{if 'cudaHostGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaHostGetFlags(pHost):
-    """ Passes back flags used to allocate pinned host memory allocated by cudaHostAlloc.
-
-    :py:obj:`~.cudaHostGetFlags()` will fail if the input pointer does not
-    reside in an address range allocated by :py:obj:`~.cudaHostAlloc()`.
-
-    Parameters
-    ----------
-    pHost : Any
-        Host pointer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pFlags : unsigned int
-        Returned flags word
-
-    See Also
-    --------
-    :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetFlags`
-    """
-    cdef unsigned int pFlags = 0
-    cypHost = utils.HelperInputVoidPtr(pHost)
-    cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr
-    err = cyruntime.cudaHostGetFlags(&pFlags, cypHost_ptr)
-    return (cudaError_t(err), pFlags)
-{{endif}}
-
-{{if 'cudaMalloc3D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMalloc3D(extent not None : cudaExtent):
-    """ Allocates logical 1D, 2D, or 3D memory objects on the device.
-
-    Allocates at least `width` * `height` * `depth` bytes of linear memory
-    on the device and returns a :py:obj:`~.cudaPitchedPtr` in which `ptr`
-    is a pointer to the allocated memory. The function may pad the
-    allocation to ensure hardware alignment requirements are met. The pitch
-    returned in the `pitch` field of `pitchedDevPtr` is the width in bytes
-    of the allocation.
-
-    The returned :py:obj:`~.cudaPitchedPtr` contains additional fields
-    `xsize` and `ysize`, the logical width and height of the allocation,
-    which are equivalent to the `width` and `height` `extent` parameters
-    provided by the programmer during allocation.
-
-    For allocations of 2D and 3D objects, it is highly recommended that
-    programmers perform allocations using :py:obj:`~.cudaMalloc3D()` or
-    :py:obj:`~.cudaMallocPitch()`. Due to alignment restrictions in the
-    hardware, this is especially true if the application will be performing
-    memory copies involving 2D or 3D objects (whether linear memory or CUDA
-    arrays).
-
-    Parameters
-    ----------
-    extent : :py:obj:`~.cudaExtent`
-        Requested allocation size (`width` field in bytes)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
-        Pointer to allocated pitched device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMemAllocPitch`
-    """
-    cdef cudaPitchedPtr pitchedDevPtr = cudaPitchedPtr()
-    err = cyruntime.cudaMalloc3D(<cyruntime.cudaPitchedPtr*>pitchedDevPtr._ptr, extent._ptr[0])
-    return (cudaError_t(err), pitchedDevPtr)
-{{endif}}
-
-{{if 'cudaMalloc3DArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMalloc3DArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int flags):
-    """ Allocate an array on the device.
-
-    Allocates a CUDA array according to the
-    :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
-    to the new CUDA array in `*array`.
-
-    The :py:obj:`~.cudaChannelFormatDesc` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaChannelFormatKind` is one of
-    :py:obj:`~.cudaChannelFormatKindSigned`,
-    :py:obj:`~.cudaChannelFormatKindUnsigned`, or
-    :py:obj:`~.cudaChannelFormatKindFloat`.
-
-    :py:obj:`~.cudaMalloc3DArray()` can allocate the following:
-
-    - A 1D array is allocated if the height and depth extents are both
-      zero.
-
-    - A 2D array is allocated if only the depth extent is zero.
-
-    - A 3D array is allocated if all three extents are non-zero.
-
-    - A 1D layered CUDA array is allocated if only the height extent is
-      zero and the cudaArrayLayered flag is set. Each layer is a 1D array.
-      The number of layers is determined by the depth extent.
-
-    - A 2D layered CUDA array is allocated if all three extents are non-
-      zero and the cudaArrayLayered flag is set. Each layer is a 2D array.
-      The number of layers is determined by the depth extent.
-
-    - A cubemap CUDA array is allocated if all three extents are non-zero
-      and the cudaArrayCubemap flag is set. Width must be equal to height,
-      and depth must be six. A cubemap is a special type of 2D layered CUDA
-      array, where the six layers represent the six faces of a cube. The
-      order of the six layers in memory is the same as that listed in
-      :py:obj:`~.cudaGraphicsCubeFace`.
-
-    - A cubemap layered CUDA array is allocated if all three extents are
-      non-zero, and both, cudaArrayCubemap and cudaArrayLayered flags are
-      set. Width must be equal to height, and depth must be a multiple of
-      six. A cubemap layered CUDA array is a special type of 2D layered
-      CUDA array that consists of a collection of cubemaps. The first six
-      layers represent the first cubemap, the next six layers form the
-      second cubemap, and so on.
-
-    The `flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
-      and provides default array allocation
-
-    - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA array, with
-      the depth extent indicating the number of layers
-
-    - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA array. Width
-      must be equal to height, and depth must be six. If the
-      cudaArrayLayered flag is also set, depth must be a multiple of six.
-
-    - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates a CUDA array that
-      could be read from or written to using a surface reference.
-
-    - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
-      gather operations will be performed on the CUDA array. Texture gather
-      can only be performed on 2D CUDA arrays.
-
-    - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical
-      backing memory. The subregions within this sparse array can later be
-      mapped onto a physical memory allocation by calling
-      :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for
-      creating 2D, 3D or 2D layered sparse CUDA arrays. The physical
-      backing memory must be allocated via :py:obj:`~.cuMemCreate`.
-
-    - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without
-      physical backing memory. The entire array can later be mapped onto a
-      physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.
-      The physical backing memory must be allocated via
-      :py:obj:`~.cuMemCreate`.
-
-    The width, height and depth extents must meet certain size requirements
-    as listed in the following table. All values are specified in elements.
-
-    Note that 2D CUDA arrays have different size requirements if the
-    :py:obj:`~.cudaArrayTextureGather` flag is set. In that case, the valid
-    range for (width, height, depth) is ((1,maxTexture2DGather[0]),
-    (1,maxTexture2DGather[1]), 0).
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Parameters
-    ----------
-    desc : :py:obj:`~.cudaChannelFormatDesc`
-        Requested channel format
-    extent : :py:obj:`~.cudaExtent`
-        Requested allocation size (`width` field in elements)
-    flags : unsigned int
-        Flags for extensions
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    array : :py:obj:`~.cudaArray_t`
-        Pointer to allocated array in device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuArray3DCreate`
-    """
-    cdef cudaArray_t array = cudaArray_t()
-    cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._ptr if desc != None else NULL
-    with nogil:
-        err = cyruntime.cudaMalloc3DArray(<cyruntime.cudaArray_t*>array._ptr, cydesc_ptr, extent._ptr[0], flags)
-
-    return (cudaError_t(err), array)
-{{endif}}
-
-{{if 'cudaMallocMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocMipmappedArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int numLevels, unsigned int flags):
-    """ Allocate a mipmapped array on the device.
-
-    Allocates a CUDA mipmapped array according to the
-    :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
-    to the new CUDA mipmapped array in `*mipmappedArray`. `numLevels`
-    specifies the number of mipmap levels to be allocated. This value is
-    clamped to the range [1, 1 + floor(log2(max(width, height, depth)))].
-
-    The :py:obj:`~.cudaChannelFormatDesc` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaChannelFormatKind` is one of
-    :py:obj:`~.cudaChannelFormatKindSigned`,
-    :py:obj:`~.cudaChannelFormatKindUnsigned`, or
-    :py:obj:`~.cudaChannelFormatKindFloat`.
-
-    :py:obj:`~.cudaMallocMipmappedArray()` can allocate the following:
-
-    - A 1D mipmapped array is allocated if the height and depth extents are
-      both zero.
-
-    - A 2D mipmapped array is allocated if only the depth extent is zero.
-
-    - A 3D mipmapped array is allocated if all three extents are non-zero.
-
-    - A 1D layered CUDA mipmapped array is allocated if only the height
-      extent is zero and the cudaArrayLayered flag is set. Each layer is a
-      1D mipmapped array. The number of layers is determined by the depth
-      extent.
-
-    - A 2D layered CUDA mipmapped array is allocated if all three extents
-      are non-zero and the cudaArrayLayered flag is set. Each layer is a 2D
-      mipmapped array. The number of layers is determined by the depth
-      extent.
-
-    - A cubemap CUDA mipmapped array is allocated if all three extents are
-      non-zero and the cudaArrayCubemap flag is set. Width must be equal to
-      height, and depth must be six. The order of the six layers in memory
-      is the same as that listed in :py:obj:`~.cudaGraphicsCubeFace`.
-
-    - A cubemap layered CUDA mipmapped array is allocated if all three
-      extents are non-zero, and both, cudaArrayCubemap and cudaArrayLayered
-      flags are set. Width must be equal to height, and depth must be a
-      multiple of six. A cubemap layered CUDA mipmapped array is a special
-      type of 2D layered CUDA mipmapped array that consists of a collection
-      of cubemap mipmapped arrays. The first six layers represent the first
-      cubemap mipmapped array, the next six layers form the second cubemap
-      mipmapped array, and so on.
-
-    The `flags` parameter enables different options to be specified that
-    affect the allocation, as follows.
-
-    - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
-      and provides default mipmapped array allocation
-
-    - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA mipmapped
-      array, with the depth extent indicating the number of layers
-
-    - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA mipmapped
-      array. Width must be equal to height, and depth must be six. If the
-      cudaArrayLayered flag is also set, depth must be a multiple of six.
-
-    - :py:obj:`~.cudaArraySurfaceLoadStore`: This flag indicates that
-      individual mipmap levels of the CUDA mipmapped array will be read
-      from or written to using a surface reference.
-
-    - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
-      gather operations will be performed on the CUDA array. Texture gather
-      can only be performed on 2D CUDA mipmapped arrays, and the gather
-      operations are performed only on the most detailed mipmap level.
-
-    - :py:obj:`~.cudaArraySparse`: Allocates a CUDA mipmapped array without
-      physical backing memory. The subregions within this sparse array can
-      later be mapped onto a physical memory allocation by calling
-      :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for
-      creating 2D, 3D or 2D layered sparse CUDA mipmapped arrays. The
-      physical backing memory must be allocated via
-      :py:obj:`~.cuMemCreate`.
-
-    - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA mipmapped
-      array without physical backing memory. The entire array can later be
-      mapped onto a physical memory allocation by calling
-      :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be
-      allocated via :py:obj:`~.cuMemCreate`.
-
-    The width, height and depth extents must meet certain size requirements
-    as listed in the following table. All values are specified in elements.
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Parameters
-    ----------
-    desc : :py:obj:`~.cudaChannelFormatDesc`
-        Requested channel format
-    extent : :py:obj:`~.cudaExtent`
-        Requested allocation size (`width` field in elements)
-    numLevels : unsigned int
-        Number of mipmap levels to allocate
-    flags : unsigned int
-        Flags for extensions
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
-        Pointer to allocated mipmapped array in device memory
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayCreate`
-    """
-    cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()
-    cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._ptr if desc != None else NULL
-    err = cyruntime.cudaMallocMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._ptr, cydesc_ptr, extent._ptr[0], numLevels, flags)
-    return (cudaError_t(err), mipmappedArray)
-{{endif}}
-
-{{if 'cudaGetMipmappedArrayLevel' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level):
-    """ Gets a mipmap level of a CUDA mipmapped array.
-
-    Returns in `*levelArray` a CUDA array that represents a single mipmap
-    level of the CUDA mipmapped array `mipmappedArray`.
-
-    If `level` is greater than the maximum number of levels in this
-    mipmapped array, :py:obj:`~.cudaErrorInvalidValue` is returned.
-
-    If `mipmappedArray` is NULL, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    is returned.
-
-    Parameters
-    ----------
-    mipmappedArray : :py:obj:`~.cudaMipmappedArray_const_t`
-        CUDA mipmapped array
-    level : unsigned int
-        Mipmap level
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`
-    levelArray : :py:obj:`~.cudaArray_t`
-        Returned mipmap level CUDA array
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayGetLevel`
-    """
-    cdef cyruntime.cudaMipmappedArray_const_t cymipmappedArray
-    if mipmappedArray is None:
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_const_t><void_ptr>0
-    elif isinstance(mipmappedArray, (cudaMipmappedArray_const_t,)):
-        pmipmappedArray = int(mipmappedArray)
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_const_t><void_ptr>pmipmappedArray
-    else:
-        pmipmappedArray = int(cudaMipmappedArray_const_t(mipmappedArray))
-        cymipmappedArray = <cyruntime.cudaMipmappedArray_const_t><void_ptr>pmipmappedArray
-    cdef cudaArray_t levelArray = cudaArray_t()
-    err = cyruntime.cudaGetMipmappedArrayLevel(<cyruntime.cudaArray_t*>levelArray._ptr, cymipmappedArray, level)
-    return (cudaError_t(err), levelArray)
-{{endif}}
-
-{{if 'cudaMemcpy3D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy3D(p : Optional[cudaMemcpy3DParms]):
-    """ Copies data between 3D objects.
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    :py:obj:`~.cudaMemcpy3D()` copies data betwen two 3D objects. The
-    source and destination objects may be in either host memory, device
-    memory, or a CUDA array. The source, destination, extent, and kind of
-    copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct
-    which should be initialized to zero before use:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The struct passed to :py:obj:`~.cudaMemcpy3D()` must specify one of
-    `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing more
-    than one non-zero source or destination will cause
-    :py:obj:`~.cudaMemcpy3D()` to return an error.
-
-    The `srcPos` and `dstPos` fields are optional offsets into the source
-    and destination objects and are defined in units of each object's
-    elements. The element for a host or device pointer is assumed to be
-    unsigned char.
-
-    The `extent` field defines the dimensions of the transferred area in
-    elements. If a CUDA array is participating in the copy, the extent is
-    defined in terms of that array's elements. If no CUDA array is
-    participating in the copy then the extents are defined in elements of
-    unsigned char.
-
-    The `kind` field defines the direction of the copy. It must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or
-    :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type
-    passed as source or destination, if the kind implies cudaArray type to
-    be present on the host, :py:obj:`~.cudaMemcpy3D()` will disregard that
-    implication and silently correct the kind based on the fact that
-    cudaArray type can only be present on the device.
-
-    If the source and destination are both arrays,
-    :py:obj:`~.cudaMemcpy3D()` will return an error if they do not have the
-    same element size.
-
-    The source and destination object may not overlap. If overlapping
-    source and destination objects are specified, undefined behavior will
-    result.
-
-    The source object must entirely contain the region defined by `srcPos`
-    and `extent`. The destination object must entirely contain the region
-    defined by `dstPos` and `extent`.
-
-    :py:obj:`~.cudaMemcpy3D()` returns an error if the pitch of `srcPtr` or
-    `dstPtr` exceeds the maximum allowed. The pitch of a
-    :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`
-    will always be valid.
-
-    Parameters
-    ----------
-    p : :py:obj:`~.cudaMemcpy3DParms`
-        3D memory copy parameters
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3DAsync`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3D`
-    """
-    cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._ptr if p != None else NULL
-    with nogil:
-        err = cyruntime.cudaMemcpy3D(cyp_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy3DPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy3DPeer(p : Optional[cudaMemcpy3DPeerParms]):
-    """ Copies memory between devices.
-
-    Perform a 3D memory copy according to the parameters specified in `p`.
-    See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure
-    for documentation of its parameters.
-
-    Note that this function is synchronous with respect to the host only if
-    the source or destination of the transfer is host memory. Note also
-    that this copy is serialized with respect to all pending and future
-    asynchronous work in to the current device, the copy's source device,
-    and the copy's destination device (use
-    :py:obj:`~.cudaMemcpy3DPeerAsync` to avoid this synchronization).
-
-    Parameters
-    ----------
-    p : :py:obj:`~.cudaMemcpy3DPeerParms`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeer`
-    """
-    cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._ptr if p != None else NULL
-    err = cyruntime.cudaMemcpy3DPeer(cyp_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy3DAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream):
-    """ Copies data between 3D objects.
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    :py:obj:`~.cudaMemcpy3DAsync()` copies data betwen two 3D objects. The
-    source and destination objects may be in either host memory, device
-    memory, or a CUDA array. The source, destination, extent, and kind of
-    copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct
-    which should be initialized to zero before use:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The struct passed to :py:obj:`~.cudaMemcpy3DAsync()` must specify one
-    of `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing
-    more than one non-zero source or destination will cause
-    :py:obj:`~.cudaMemcpy3DAsync()` to return an error.
-
-    The `srcPos` and `dstPos` fields are optional offsets into the source
-    and destination objects and are defined in units of each object's
-    elements. The element for a host or device pointer is assumed to be
-    unsigned char. For CUDA arrays, positions must be in the range [0,
-    2048) for any dimension.
-
-    The `extent` field defines the dimensions of the transferred area in
-    elements. If a CUDA array is participating in the copy, the extent is
-    defined in terms of that array's elements. If no CUDA array is
-    participating in the copy then the extents are defined in elements of
-    unsigned char.
-
-    The `kind` field defines the direction of the copy. It must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or
-    :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type
-    passed as source or destination, if the kind implies cudaArray type to
-    be present on the host, :py:obj:`~.cudaMemcpy3DAsync()` will disregard
-    that implication and silently correct the kind based on the fact that
-    cudaArray type can only be present on the device.
-
-    If the source and destination are both arrays,
-    :py:obj:`~.cudaMemcpy3DAsync()` will return an error if they do not
-    have the same element size.
-
-    The source and destination object may not overlap. If overlapping
-    source and destination objects are specified, undefined behavior will
-    result.
-
-    The source object must lie entirely within the region defined by
-    `srcPos` and `extent`. The destination object must lie entirely within
-    the region defined by `dstPos` and `extent`.
-
-    :py:obj:`~.cudaMemcpy3DAsync()` returns an error if the pitch of
-    `srcPtr` or `dstPtr` exceeds the maximum allowed. The pitch of a
-    :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`
-    will always be valid.
-
-    :py:obj:`~.cudaMemcpy3DAsync()` is asynchronous with respect to the
-    host, so the call may return before the copy is complete. The copy can
-    optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    p : :py:obj:`~.cudaMemcpy3DParms`
-        3D memory copy parameters
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, ::::py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3DAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._ptr if p != None else NULL
-    with nogil:
-        err = cyruntime.cudaMemcpy3DAsync(cyp_ptr, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy3DPeerAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream):
-    """ Copies memory between devices asynchronously.
-
-    Perform a 3D memory copy according to the parameters specified in `p`.
-    See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure
-    for documentation of its parameters.
-
-    Parameters
-    ----------
-    p : :py:obj:`~.cudaMemcpy3DPeerParms`
-        Parameters for the memory copy
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._ptr if p != None else NULL
-    err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemGetInfo' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemGetInfo():
-    """ Gets free and total device memory.
-
-    Returns in `*total` the total amount of memory available to the the
-    current context. Returns in `*free` the amount of memory on the device
-    that is free according to the OS. CUDA is not guaranteed to be able to
-    allocate all of the memory that the OS reports as free. In a multi-
-    tenet situation, free estimate returned is prone to race condition
-    where a new allocation/free done by a different process or a different
-    thread in the same process between the time when free memory was
-    estimated and reported, will result in deviation in free value reported
-    and actual free memory.
-
-    The integrated GPU on Tegra shares memory with CPU and other component
-    of the SoC. The free and total values returned by the API excludes the
-    SWAP memory space maintained by the OS on some platforms. The OS may
-    move some of the memory pages into swap area as the GPU or CPU allocate
-    or access memory. See Tegra app note on how to calculate total and free
-    memory on Tegra.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`
-    free : int
-        Returned free memory in bytes
-    total : int
-        Returned total memory in bytes
-
-    See Also
-    --------
-    :py:obj:`~.cuMemGetInfo`
-    """
-    cdef size_t free = 0
-    cdef size_t total = 0
-    err = cyruntime.cudaMemGetInfo(&free, &total)
-    return (cudaError_t(err), free, total)
-{{endif}}
-
-{{if 'cudaArrayGetInfo' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaArrayGetInfo(array):
-    """ Gets info about the specified cudaArray.
-
-    Returns in `*desc`, `*extent` and `*flags` respectively, the type,
-    shape and flags of `array`.
-
-    Any of `*desc`, `*extent` and `*flags` may be specified as NULL.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.cudaArray_t`
-        The :py:obj:`~.cudaArray` to get info for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    desc : :py:obj:`~.cudaChannelFormatDesc`
-        Returned array type
-    extent : :py:obj:`~.cudaExtent`
-        Returned array shape. 2D arrays will have depth of zero
-    flags : unsigned int
-        Returned array flags
-
-    See Also
-    --------
-    :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuArray3DGetDescriptor`
-    """
-    cdef cyruntime.cudaArray_t cyarray
-    if array is None:
-        cyarray = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(array, (cudaArray_t,)):
-        parray = int(array)
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    else:
-        parray = int(cudaArray_t(array))
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()
-    cdef cudaExtent extent = cudaExtent()
-    cdef unsigned int flags = 0
-    err = cyruntime.cudaArrayGetInfo(<cyruntime.cudaChannelFormatDesc*>desc._ptr, <cyruntime.cudaExtent*>extent._ptr, &flags, cyarray)
-    return (cudaError_t(err), desc, extent, flags)
-{{endif}}
-
-{{if 'cudaArrayGetPlane' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaArrayGetPlane(hArray, unsigned int planeIdx):
-    """ Gets a CUDA array plane from a CUDA array.
-
-    Returns in `pPlaneArray` a CUDA array that represents a single format
-    plane of the CUDA array `hArray`.
-
-    If `planeIdx` is greater than the maximum number of planes in this
-    array or if the array does not have a multi-planar format e.g:
-    :py:obj:`~.cudaChannelFormatKindNV12`, then
-    :py:obj:`~.cudaErrorInvalidValue` is returned.
-
-    Note that if the `hArray` has format
-    :py:obj:`~.cudaChannelFormatKindNV12`, then passing in 0 for `planeIdx`
-    returns a CUDA array of the same size as `hArray` but with one 8-bit
-    channel and :py:obj:`~.cudaChannelFormatKindUnsigned` as its format
-    kind. If 1 is passed for `planeIdx`, then the returned CUDA array has
-    half the height and width of `hArray` with two 8-bit channels and
-    :py:obj:`~.cudaChannelFormatKindUnsigned` as its format kind.
-
-    Parameters
-    ----------
-    hArray : :py:obj:`~.cudaArray_t`
-        CUDA array
-    planeIdx : unsigned int
-        Plane index
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`
-    pPlaneArray : :py:obj:`~.cudaArray_t`
-        Returned CUDA array referenced by the `planeIdx`
-
-    See Also
-    --------
-    :py:obj:`~.cuArrayGetPlane`
-    """
-    cdef cyruntime.cudaArray_t cyhArray
-    if hArray is None:
-        cyhArray = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(hArray, (cudaArray_t,)):
-        phArray = int(hArray)
-        cyhArray = <cyruntime.cudaArray_t><void_ptr>phArray
-    else:
-        phArray = int(cudaArray_t(hArray))
-        cyhArray = <cyruntime.cudaArray_t><void_ptr>phArray
-    cdef cudaArray_t pPlaneArray = cudaArray_t()
-    err = cyruntime.cudaArrayGetPlane(<cyruntime.cudaArray_t*>pPlaneArray._ptr, cyhArray, planeIdx)
-    return (cudaError_t(err), pPlaneArray)
-{{endif}}
-
-{{if 'cudaArrayGetMemoryRequirements' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaArrayGetMemoryRequirements(array, int device):
-    """ Returns the memory requirements of a CUDA array.
-
-    Returns the memory requirements of a CUDA array in `memoryRequirements`
-    If the CUDA array is not allocated with flag
-    :py:obj:`~.cudaArrayDeferredMapping` :py:obj:`~.cudaErrorInvalidValue`
-    will be returned.
-
-    The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`
-    represents the total size of the CUDA array. The returned value in
-    :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents the
-    alignment necessary for mapping the CUDA array.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.cudaArray_t`
-        CUDA array to get the memory requirements of
-    device : int
-        Device to get the memory requirements for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
-    memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`
-        Pointer to :py:obj:`~.cudaArrayMemoryRequirements`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMipmappedArrayGetMemoryRequirements`
-    """
-    cdef cyruntime.cudaArray_t cyarray
-    if array is None:
-        cyarray = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(array, (cudaArray_t,)):
-        parray = int(array)
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    else:
-        parray = int(cudaArray_t(array))
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()
-    err = cyruntime.cudaArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._ptr, cyarray, device)
-    return (cudaError_t(err), memoryRequirements)
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device):
-    """ Returns the memory requirements of a CUDA mipmapped array.
-
-    Returns the memory requirements of a CUDA mipmapped array in
-    `memoryRequirements` If the CUDA mipmapped array is not allocated with
-    flag :py:obj:`~.cudaArrayDeferredMapping`
-    :py:obj:`~.cudaErrorInvalidValue` will be returned.
-
-    The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`
-    represents the total size of the CUDA mipmapped array. The returned
-    value in :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents
-    the alignment necessary for mapping the CUDA mipmapped array.
-
-    Parameters
-    ----------
-    mipmap : :py:obj:`~.cudaMipmappedArray_t`
-        CUDA mipmapped array to get the memory requirements of
-    device : int
-        Device to get the memory requirements for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
-    memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`
-        Pointer to :py:obj:`~.cudaArrayMemoryRequirements`
-
-    See Also
-    --------
-    :py:obj:`~.cudaArrayGetMemoryRequirements`
-    """
-    cdef cyruntime.cudaMipmappedArray_t cymipmap
-    if mipmap is None:
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>0
-    elif isinstance(mipmap, (cudaMipmappedArray_t,)):
-        pmipmap = int(mipmap)
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-    else:
-        pmipmap = int(cudaMipmappedArray_t(mipmap))
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-    cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()
-    err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._ptr, cymipmap, device)
-    return (cudaError_t(err), memoryRequirements)
-{{endif}}
-
-{{if 'cudaArrayGetSparseProperties' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaArrayGetSparseProperties(array):
-    """ Returns the layout properties of a sparse CUDA array.
-
-    Returns the layout properties of a sparse CUDA array in
-    `sparseProperties`. If the CUDA array is not allocated with flag
-    :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be
-    returned.
-
-    If the returned value in :py:obj:`~.cudaArraySparseProperties.flags`
-    contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then
-    :py:obj:`~.cudaArraySparseProperties.miptailSize` represents the total
-    size of the array. Otherwise, it will be zero. Also, the returned value
-    in :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is always
-    zero. Note that the `array` must have been allocated using
-    :py:obj:`~.cudaMallocArray` or :py:obj:`~.cudaMalloc3DArray`. For CUDA
-    arrays obtained using :py:obj:`~.cudaMipmappedArrayGetLevel`,
-    :py:obj:`~.cudaErrorInvalidValue` will be returned. Instead,
-    :py:obj:`~.cudaMipmappedArrayGetSparseProperties` must be used to
-    obtain the sparse properties of the entire CUDA mipmapped array to
-    which `array` belongs to.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.cudaArray_t`
-        The CUDA array to get the sparse properties of
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
-    sparseProperties : :py:obj:`~.cudaArraySparseProperties`
-        Pointer to return the :py:obj:`~.cudaArraySparseProperties`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cyruntime.cudaArray_t cyarray
-    if array is None:
-        cyarray = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(array, (cudaArray_t,)):
-        parray = int(array)
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    else:
-        parray = int(cudaArray_t(array))
-        cyarray = <cyruntime.cudaArray_t><void_ptr>parray
-    cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()
-    err = cyruntime.cudaArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._ptr, cyarray)
-    return (cudaError_t(err), sparseProperties)
-{{endif}}
-
-{{if 'cudaMipmappedArrayGetSparseProperties' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMipmappedArrayGetSparseProperties(mipmap):
-    """ Returns the layout properties of a sparse CUDA mipmapped array.
-
-    Returns the sparse array layout properties in `sparseProperties`. If
-    the CUDA mipmapped array is not allocated with flag
-    :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be
-    returned.
-
-    For non-layered CUDA mipmapped arrays,
-    :py:obj:`~.cudaArraySparseProperties.miptailSize` returns the size of
-    the mip tail region. The mip tail region includes all mip levels whose
-    width, height or depth is less than that of the tile. For layered CUDA
-    mipmapped arrays, if :py:obj:`~.cudaArraySparseProperties.flags`
-    contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then
-    :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies the size of
-    the mip tail of all layers combined. Otherwise,
-    :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies mip tail
-    size per layer. The returned value of
-    :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is valid only
-    if :py:obj:`~.cudaArraySparseProperties.miptailSize` is non-zero.
-
-    Parameters
-    ----------
-    mipmap : :py:obj:`~.cudaMipmappedArray_t`
-        The CUDA mipmapped array to get the sparse properties of
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
-    sparseProperties : :py:obj:`~.cudaArraySparseProperties`
-        Pointer to return :py:obj:`~.cudaArraySparseProperties`
-
-    See Also
-    --------
-    :py:obj:`~.cudaArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
-    """
-    cdef cyruntime.cudaMipmappedArray_t cymipmap
-    if mipmap is None:
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>0
-    elif isinstance(mipmap, (cudaMipmappedArray_t,)):
-        pmipmap = int(mipmap)
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-    else:
-        pmipmap = int(cudaMipmappedArray_t(mipmap))
-        cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
-    cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()
-    err = cyruntime.cudaMipmappedArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._ptr, cymipmap)
-    return (cudaError_t(err), sparseProperties)
-{{endif}}
-
-{{if 'cudaMemcpy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy(dst, src, size_t count, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    Copies `count` bytes from the memory area pointed to by `src` to the
-    memory area pointed to by `dst`, where `kind` specifies the direction
-    of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. Calling :py:obj:`~.cudaMemcpy()` with dst
-    and src pointers that do not match the direction of the copy results in
-    an undefined behavior.
-
-    \note_sync
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy`
-    """
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy(cydst_ptr, cysrc_ptr, count, cykind)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyPeer(dst, int dstDevice, src, int srcDevice, size_t count):
-    """ Copies memory between two devices.
-
-    Copies memory from one device to memory on another device. `dst` is the
-    base device pointer of the destination memory and `dstDevice` is the
-    destination device. `src` is the base device pointer of the source
-    memory and `srcDevice` is the source device. `count` specifies the
-    number of bytes to copy.
-
-    Note that this function is asynchronous with respect to the host, but
-    serialized with respect all pending and future asynchronous work in to
-    the current device, `srcDevice`, and `dstDevice` (use
-    :py:obj:`~.cudaMemcpyPeerAsync` to avoid this synchronization).
-
-    Parameters
-    ----------
-    dst : Any
-        Destination device pointer
-    dstDevice : int
-        Destination device
-    src : Any
-        Source device pointer
-    srcDevice : int
-        Source device
-    count : size_t
-        Size of memory copy in bytes
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeer`
-    """
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    with nogil:
-        err = cyruntime.cudaMemcpyPeer(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2D(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the memory
-    area pointed to by `src` to the memory area pointed to by `dst`, where
-    `kind` specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `dpitch` and `spitch` are the widths in
-    memory in bytes of the 2D arrays pointed to by `dst` and `src`,
-    including any padding added to the end of each row. The memory areas
-    may not overlap. `width` must not exceed either `dpitch` or `spitch`.
-    Calling :py:obj:`~.cudaMemcpy2D()` with `dst` and `src` pointers that
-    do not match the direction of the copy results in an undefined
-    behavior. :py:obj:`~.cudaMemcpy2D()` returns an error if `dpitch` or
-    `spitch` exceeds the maximum allowed.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    dpitch : size_t
-        Pitch of destination memory
-    src : Any
-        Source memory address
-    spitch : size_t
-        Pitch of source memory
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
-    """
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2D(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DToArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DToArray(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the memory
-    area pointed to by `src` to the CUDA array `dst` starting at `hOffset`
-    rows and `wOffset` bytes from the upper left corner, where `kind`
-    specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `spitch` is the width in memory in bytes of
-    the 2D array pointed to by `src`, including any padding added to the
-    end of each row. `wOffset` + `width` must not exceed the width of the
-    CUDA array `dst`. `width` must not exceed `spitch`.
-    :py:obj:`~.cudaMemcpy2DToArray()` returns an error if `spitch` exceeds
-    the maximum allowed.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffset : size_t
-        Destination starting X offset (columns in bytes)
-    hOffset : size_t
-        Destination starting Y offset (rows)
-    src : Any
-        Source memory address
-    spitch : size_t
-        Pitch of source memory
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
-    """
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DFromArray(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the CUDA
-    array `src` starting at `hOffset` rows and `wOffset` bytes from the
-    upper left corner to the memory area pointed to by `dst`, where `kind`
-    specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `dpitch` is the width in memory in bytes of
-    the 2D array pointed to by `dst`, including any padding added to the
-    end of each row. `wOffset` + `width` must not exceed the width of the
-    CUDA array `src`. `width` must not exceed `dpitch`.
-    :py:obj:`~.cudaMemcpy2DFromArray()` returns an error if `dpitch`
-    exceeds the maximum allowed.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    dpitch : size_t
-        Pitch of destination memory
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffset : size_t
-        Source starting X offset (columns in bytes)
-    hOffset : size_t
-        Source starting Y offset (rows)
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
-    """
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2DFromArray(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DArrayToArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the CUDA
-    array `src` starting at `hOffsetSrc` rows and `wOffsetSrc` bytes from
-    the upper left corner to the CUDA array `dst` starting at `hOffsetDst`
-    rows and `wOffsetDst` bytes from the upper left corner, where `kind`
-    specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `wOffsetDst` + `width` must not exceed the
-    width of the CUDA array `dst`. `wOffsetSrc` + `width` must not exceed
-    the width of the CUDA array `src`.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffsetDst : size_t
-        Destination starting X offset (columns in bytes)
-    hOffsetDst : size_t
-        Destination starting Y offset (rows)
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffsetSrc : size_t
-        Source starting X offset (columns in bytes)
-    hOffsetSrc : size_t
-        Source starting Y offset (rows)
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
-    """
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyAsync(dst, src, size_t count, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    Copies `count` bytes from the memory area pointed to by `src` to the
-    memory area pointed to by `dst`, where `kind` specifies the direction
-    of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    The memory areas may not overlap. Calling :py:obj:`~.cudaMemcpyAsync()`
-    with `dst` and `src` pointers that do not match the direction of the
-    copy results in an undefined behavior.
-
-    :py:obj:`~.cudaMemcpyAsync()` is asynchronous with respect to the host,
-    so the call may return before the copy is complete. The copy can
-    optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and the `stream` is non-zero, the
-    copy may overlap with operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemcpyDtoDAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpyAsync(cydst_ptr, cysrc_ptr, count, cykind, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyPeerAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyPeerAsync(dst, int dstDevice, src, int srcDevice, size_t count, stream):
-    """ Copies memory between two devices asynchronously.
-
-    Copies memory from one device to memory on another device. `dst` is the
-    base device pointer of the destination memory and `dstDevice` is the
-    destination device. `src` is the base device pointer of the source
-    memory and `srcDevice` is the source device. `count` specifies the
-    number of bytes to copy.
-
-    Note that this function is asynchronous with respect to the host and
-    all work on other devices.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination device pointer
-    dstDevice : int
-        Destination device
-    src : Any
-        Source device pointer
-    srcDevice : int
-        Source device
-    count : size_t
-        Size of memory copy in bytes
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeerAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    with nogil:
-        err = cyruntime.cudaMemcpyPeerAsync(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DAsync(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the memory
-    area pointed to by `src` to the memory area pointed to by `dst`, where
-    `kind` specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `dpitch` and `spitch` are the widths in
-    memory in bytes of the 2D arrays pointed to by `dst` and `src`,
-    including any padding added to the end of each row. The memory areas
-    may not overlap. `width` must not exceed either `dpitch` or `spitch`.
-
-    Calling :py:obj:`~.cudaMemcpy2DAsync()` with `dst` and `src` pointers
-    that do not match the direction of the copy results in an undefined
-    behavior. :py:obj:`~.cudaMemcpy2DAsync()` returns an error if `dpitch`
-    or `spitch` is greater than the maximum allowed.
-
-    :py:obj:`~.cudaMemcpy2DAsync()` is asynchronous with respect to the
-    host, so the call may return before the copy is complete. The copy can
-    optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    dpitch : size_t
-        Pitch of destination memory
-    src : Any
-        Source memory address
-    spitch : size_t
-        Pitch of source memory
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2DAsync(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DToArrayAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the memory
-    area pointed to by `src` to the CUDA array `dst` starting at `hOffset`
-    rows and `wOffset` bytes from the upper left corner, where `kind`
-    specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `spitch` is the width in memory in bytes of
-    the 2D array pointed to by `src`, including any padding added to the
-    end of each row. `wOffset` + `width` must not exceed the width of the
-    CUDA array `dst`. `width` must not exceed `spitch`.
-    :py:obj:`~.cudaMemcpy2DToArrayAsync()` returns an error if `spitch`
-    exceeds the maximum allowed.
-
-    :py:obj:`~.cudaMemcpy2DToArrayAsync()` is asynchronous with respect to
-    the host, so the call may return before the copy is complete. The copy
-    can optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    :py:obj:`~.cudaMemcpy2DFromArrayAsync`,
-    :py:obj:`~.cudaMemcpyToSymbolAsync`,
-    :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffset : size_t
-        Destination starting X offset (columns in bytes)
-    hOffset : size_t
-        Destination starting Y offset (rows)
-    src : Any
-        Source memory address
-    spitch : size_t
-        Pitch of source memory
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`,
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpy2DFromArrayAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpy2DFromArrayAsync(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    Copies a matrix (`height` rows of `width` bytes each) from the CUDA
-    array `src` starting at `hOffset` rows and `wOffset` bytes from the
-    upper left corner to the memory area pointed to by `dst`, where `kind`
-    specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. `dpitch` is the width in memory in bytes of
-    the 2D array pointed to by `dst`, including any padding added to the
-    end of each row. `wOffset` + `width` must not exceed the width of the
-    CUDA array `src`. `width` must not exceed `dpitch`.
-    :py:obj:`~.cudaMemcpy2DFromArrayAsync()` returns an error if `dpitch`
-    exceeds the maximum allowed.
-
-    :py:obj:`~.cudaMemcpy2DFromArrayAsync()` is asynchronous with respect
-    to the host, so the call may return before the copy is complete. The
-    copy can optionally be associated to a stream by passing a non-zero
-    `stream` argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    :py:obj:`~.cudaMemcpyToSymbolAsync`,
-    :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    dpitch : size_t
-        Pitch of destination memory
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffset : size_t
-        Source starting X offset (columns in bytes)
-    hOffset : size_t
-        Source starting Y offset (rows)
-    width : size_t
-        Width of matrix transfer (columns in bytes)
-    height : size_t
-        Height of matrix transfer (rows)
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`,
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    with nogil:
-        err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemset' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemset(devPtr, int value, size_t count):
-    """ Initializes or sets device memory to a value.
-
-    Fills the first `count` bytes of the memory area pointed to by `devPtr`
-    with the constant byte value `value`.
-
-    Note that this function is asynchronous with respect to the host unless
-    `devPtr` refers to pinned host memory.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to device memory
-    value : int
-        Value to set for each byte of specified memory
-    count : size_t
-        Size in bytes to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaMemset(cydevPtr_ptr, value, count)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemset2D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemset2D(devPtr, size_t pitch, int value, size_t width, size_t height):
-    """ Initializes or sets device memory to a value.
-
-    Sets to the specified value `value` a matrix (`height` rows of `width`
-    bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of
-    the 2D array pointed to by `dstPtr`, including any padding added to the
-    end of each row. This function performs fastest when the pitch is one
-    that has been passed back by :py:obj:`~.cudaMallocPitch()`.
-
-    Note that this function is asynchronous with respect to the host unless
-    `devPtr` refers to pinned host memory.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to 2D device memory
-    pitch : size_t
-        Pitch in bytes of 2D device memory(Unused if `height` is 1)
-    value : int
-        Value to set for each byte of specified memory
-    width : size_t
-        Width of matrix set (columns in bytes)
-    height : size_t
-        Height of matrix set (rows)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaMemset2D(cydevPtr_ptr, pitch, value, width, height)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemset3D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemset3D(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent):
-    """ Initializes or sets device memory to a value.
-
-    Initializes each element of a 3D array to the specified value `value`.
-    The object to initialize is defined by `pitchedDevPtr`. The `pitch`
-    field of `pitchedDevPtr` is the width in memory in bytes of the 3D
-    array pointed to by `pitchedDevPtr`, including any padding added to the
-    end of each row. The `xsize` field specifies the logical width of each
-    row in bytes, while the `ysize` field specifies the height of each 2D
-    slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when
-    `height` and `depth` are both equal to 1.
-
-    The extents of the initialized region are specified as a `width` in
-    bytes, a `height` in rows, and a `depth` in slices.
-
-    Extents with `width` greater than or equal to the `xsize` of
-    `pitchedDevPtr` may perform significantly faster than extents narrower
-    than the `xsize`. Secondarily, extents with `height` equal to the
-    `ysize` of `pitchedDevPtr` will perform faster than when the `height`
-    is shorter than the `ysize`.
-
-    This function performs fastest when the `pitchedDevPtr` has been
-    allocated by :py:obj:`~.cudaMalloc3D()`.
-
-    Note that this function is asynchronous with respect to the host unless
-    `pitchedDevPtr` refers to pinned host memory.
-
-    Parameters
-    ----------
-    pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
-        Pointer to pitched device memory
-    value : int
-        Value to set for each byte of specified memory
-    extent : :py:obj:`~.cudaExtent`
-        Size parameters for where to set device memory (`width` field in
-        bytes)
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`
-    """
-    err = cyruntime.cudaMemset3D(pitchedDevPtr._ptr[0], value, extent._ptr[0])
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemsetAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemsetAsync(devPtr, int value, size_t count, stream):
-    """ Initializes or sets device memory to a value.
-
-    Fills the first `count` bytes of the memory area pointed to by `devPtr`
-    with the constant byte value `value`.
-
-    :py:obj:`~.cudaMemsetAsync()` is asynchronous with respect to the host,
-    so the call may return before the memset is complete. The operation can
-    optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `stream` is non-zero, the operation may overlap with
-    operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to device memory
-    value : int
-        Value to set for each byte of specified memory
-    count : size_t
-        Size in bytes to set
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32Async`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaMemsetAsync(cydevPtr_ptr, value, count, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemset2DAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemset2DAsync(devPtr, size_t pitch, int value, size_t width, size_t height, stream):
-    """ Initializes or sets device memory to a value.
-
-    Sets to the specified value `value` a matrix (`height` rows of `width`
-    bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of
-    the 2D array pointed to by `dstPtr`, including any padding added to the
-    end of each row. This function performs fastest when the pitch is one
-    that has been passed back by :py:obj:`~.cudaMallocPitch()`.
-
-    :py:obj:`~.cudaMemset2DAsync()` is asynchronous with respect to the
-    host, so the call may return before the memset is complete. The
-    operation can optionally be associated to a stream by passing a non-
-    zero `stream` argument. If `stream` is non-zero, the operation may
-    overlap with operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to 2D device memory
-    pitch : size_t
-        Pitch in bytes of 2D device memory(Unused if `height` is 1)
-    value : int
-        Value to set for each byte of specified memory
-    width : size_t
-        Width of matrix set (columns in bytes)
-    height : size_t
-        Height of matrix set (rows)
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32Async`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaMemset2DAsync(cydevPtr_ptr, pitch, value, width, height, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemset3DAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemset3DAsync(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent, stream):
-    """ Initializes or sets device memory to a value.
-
-    Initializes each element of a 3D array to the specified value `value`.
-    The object to initialize is defined by `pitchedDevPtr`. The `pitch`
-    field of `pitchedDevPtr` is the width in memory in bytes of the 3D
-    array pointed to by `pitchedDevPtr`, including any padding added to the
-    end of each row. The `xsize` field specifies the logical width of each
-    row in bytes, while the `ysize` field specifies the height of each 2D
-    slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when
-    `height` and `depth` are both equal to 1.
-
-    The extents of the initialized region are specified as a `width` in
-    bytes, a `height` in rows, and a `depth` in slices.
-
-    Extents with `width` greater than or equal to the `xsize` of
-    `pitchedDevPtr` may perform significantly faster than extents narrower
-    than the `xsize`. Secondarily, extents with `height` equal to the
-    `ysize` of `pitchedDevPtr` will perform faster than when the `height`
-    is shorter than the `ysize`.
-
-    This function performs fastest when the `pitchedDevPtr` has been
-    allocated by :py:obj:`~.cudaMalloc3D()`.
-
-    :py:obj:`~.cudaMemset3DAsync()` is asynchronous with respect to the
-    host, so the call may return before the memset is complete. The
-    operation can optionally be associated to a stream by passing a non-
-    zero `stream` argument. If `stream` is non-zero, the operation may
-    overlap with operations in other streams.
-
-    The device version of this function only handles device to device
-    copies and cannot be given local or shared pointers.
-
-    Parameters
-    ----------
-    pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
-        Pointer to pitched device memory
-    value : int
-        Value to set for each byte of specified memory
-    extent : :py:obj:`~.cudaExtent`
-        Size parameters for where to set device memory (`width` field in
-        bytes)
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    err = cyruntime.cudaMemset3DAsync(pitchedDevPtr._ptr[0], value, extent._ptr[0], cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPrefetchAsync(devPtr, size_t count, int dstDevice, stream):
-    """ Prefetches memory to the specified destination device.
-
-    Prefetches memory to the specified destination device. `devPtr` is the
-    base device pointer of the memory to be prefetched and `dstDevice` is
-    the destination device. `count` specifies the number of bytes to copy.
-    `stream` is the stream in which the operation is enqueued. The memory
-    range must refer to managed memory allocated via
-    :py:obj:`~.cudaMallocManaged` or declared via managed variables, or it
-    may also refer to system-allocated memory on systems with non-zero
-    cudaDevAttrPageableMemoryAccess.
-
-    Passing in cudaCpuDeviceId for `dstDevice` will prefetch the data to
-    host memory. If `dstDevice` is a GPU, then the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess` must be non-zero.
-    Additionally, `stream` must be associated with a device that has a non-
-    zero value for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
-
-    The start address and end address of the memory range will be rounded
-    down and rounded up respectively to be aligned to CPU page size before
-    the prefetch operation is enqueued in the stream.
-
-    If no physical memory has been allocated for this region, then this
-    memory region will be populated and mapped on the destination device.
-    If there's insufficient memory to prefetch the desired region, the
-    Unified Memory driver may evict pages from other
-    :py:obj:`~.cudaMallocManaged` allocations to host memory in order to
-    make room. Device memory allocated using :py:obj:`~.cudaMalloc` or
-    :py:obj:`~.cudaMallocArray` will not be evicted.
-
-    By default, any mappings to the previous location of the migrated pages
-    are removed and mappings for the new location are only setup on
-    `dstDevice`. The exact behavior however also depends on the settings
-    applied to this memory range via :py:obj:`~.cudaMemAdvise` as described
-    below:
-
-    If :py:obj:`~.cudaMemAdviseSetReadMostly` was set on any subset of this
-    memory range, then that subset will create a read-only copy of the
-    pages on `dstDevice`.
-
-    If :py:obj:`~.cudaMemAdviseSetPreferredLocation` was called on any
-    subset of this memory range, then the pages will be migrated to
-    `dstDevice` even if `dstDevice` is not the preferred location of any
-    pages in the memory range.
-
-    If :py:obj:`~.cudaMemAdviseSetAccessedBy` was called on any subset of
-    this memory range, then mappings to those pages from all the
-    appropriate processors are updated to refer to the new location if
-    establishing such a mapping is possible. Otherwise, those mappings are
-    cleared.
-
-    Note that this API is not required for functionality and only serves to
-    improve performance by allowing the application to migrate data to a
-    suitable location before it is accessed. Memory accesses to this range
-    are always coherent and are allowed even when the data is actively
-    being migrated.
-
-    Note that this function is asynchronous with respect to the host and
-    all work on other devices.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to be prefetched
-    count : size_t
-        Size in bytes
-    dstDevice : int
-        Destination device to prefetch to
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue prefetch operation
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemAdvise_v2` :py:obj:`~.cuMemPrefetchAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaMemPrefetchAsync(cydevPtr_ptr, count, dstDevice, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPrefetchAsync_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPrefetchAsync_v2(devPtr, size_t count, location not None : cudaMemLocation, unsigned int flags, stream):
-    """ Prefetches memory to the specified destination location.
-
-    Prefetches memory to the specified destination location. `devPtr` is
-    the base device pointer of the memory to be prefetched and `location`
-    specifies the destination location. `count` specifies the number of
-    bytes to copy. `stream` is the stream in which the operation is
-    enqueued. The memory range must refer to managed memory allocated via
-    :py:obj:`~.cudaMallocManaged` or declared via managed variables, or it
-    may also refer to system-allocated memory on systems with non-zero
-    cudaDevAttrPageableMemoryAccess.
-
-    Specifying :py:obj:`~.cudaMemLocationTypeDevice` for
-    :py:obj:`~.cudaMemLocation.type` will prefetch memory to GPU specified
-    by device ordinal :py:obj:`~.cudaMemLocation.id` which must have non-
-    zero value for the device attribute
-    :py:obj:`~.concurrentManagedAccess`. Additionally, `stream` must be
-    associated with a device that has a non-zero value for the device
-    attribute :py:obj:`~.concurrentManagedAccess`. Specifying
-    :py:obj:`~.cudaMemLocationTypeHost` as :py:obj:`~.cudaMemLocation.type`
-    will prefetch data to host memory. Applications can request prefetching
-    memory to a specific host NUMA node by specifying
-    :py:obj:`~.cudaMemLocationTypeHostNuma` for
-    :py:obj:`~.cudaMemLocation.type` and a valid host NUMA node id in
-    :py:obj:`~.cudaMemLocation.id` Users can also request prefetching
-    memory to the host NUMA node closest to the current thread's CPU by
-    specifying :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` for
-    :py:obj:`~.cudaMemLocation.type`. Note when
-    :py:obj:`~.cudaMemLocation.type` is etiher
-    :py:obj:`~.cudaMemLocationTypeHost` OR
-    :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,
-    :py:obj:`~.cudaMemLocation.id` will be ignored.
-
-    The start address and end address of the memory range will be rounded
-    down and rounded up respectively to be aligned to CPU page size before
-    the prefetch operation is enqueued in the stream.
-
-    If no physical memory has been allocated for this region, then this
-    memory region will be populated and mapped on the destination device.
-    If there's insufficient memory to prefetch the desired region, the
-    Unified Memory driver may evict pages from other
-    :py:obj:`~.cudaMallocManaged` allocations to host memory in order to
-    make room. Device memory allocated using :py:obj:`~.cudaMalloc` or
-    :py:obj:`~.cudaMallocArray` will not be evicted.
-
-    By default, any mappings to the previous location of the migrated pages
-    are removed and mappings for the new location are only setup on the
-    destination location. The exact behavior however also depends on the
-    settings applied to this memory range via :py:obj:`~.cuMemAdvise` as
-    described below:
-
-    If :py:obj:`~.cudaMemAdviseSetReadMostly` was set on any subset of this
-    memory range, then that subset will create a read-only copy of the
-    pages on destination location. If however the destination location is a
-    host NUMA node, then any pages of that subset that are already in
-    another host NUMA node will be transferred to the destination.
-
-    If :py:obj:`~.cudaMemAdviseSetPreferredLocation` was called on any
-    subset of this memory range, then the pages will be migrated to
-    `location` even if `location` is not the preferred location of any
-    pages in the memory range.
-
-    If :py:obj:`~.cudaMemAdviseSetAccessedBy` was called on any subset of
-    this memory range, then mappings to those pages from all the
-    appropriate processors are updated to refer to the new location if
-    establishing such a mapping is possible. Otherwise, those mappings are
-    cleared.
-
-    Note that this API is not required for functionality and only serves to
-    improve performance by allowing the application to migrate data to a
-    suitable location before it is accessed. Memory accesses to this range
-    are always coherent and are allowed even when the data is actively
-    being migrated.
-
-    Note that this function is asynchronous with respect to the host and
-    all work on other devices.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to be prefetched
-    count : size_t
-        Size in bytes
-    location : :py:obj:`~.cudaMemLocation`
-        location to prefetch to
-    flags : unsigned int
-        flags for future use, must be zero now.
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream to enqueue prefetch operation
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemAdvise_v2` :py:obj:`~.cuMemPrefetchAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaMemPrefetchAsync_v2(cydevPtr_ptr, count, location._ptr[0], flags, cystream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemAdvise' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemAdvise(devPtr, size_t count, advice not None : cudaMemoryAdvise, int device):
-    """ Advise about the usage of a given memory range.
-
-    Advise the Unified Memory subsystem about the usage pattern for the
-    memory range starting at `devPtr` with a size of `count` bytes. The
-    start address and end address of the memory range will be rounded down
-    and rounded up respectively to be aligned to CPU page size before the
-    advice is applied. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
-    variables. The memory range could also refer to system-allocated
-    pageable memory provided it represents a valid, host-accessible region
-    of memory and all additional constraints imposed by `advice` as
-    outlined below are also satisfied. Specifying an invalid system-
-    allocated pageable memory range results in an error being returned.
-
-    The `advice` parameter can take the following values:
-
-    - :py:obj:`~.cudaMemAdviseSetReadMostly`: This implies that the data is
-      mostly going to be read from and only occasionally written to. Any
-      read accesses from any processor to this region will create a read-
-      only copy of at least the accessed pages in that processor's memory.
-      Additionally, if :py:obj:`~.cudaMemPrefetchAsync` is called on this
-      region, it will create a read-only copy of the data on the
-      destination processor. If any processor writes to this region, all
-      copies of the corresponding page will be invalidated except for the
-      one where the write occurred. The `device` argument is ignored for
-      this advice. Note that for a page to be read-duplicated, the
-      accessing processor must either be the CPU or a GPU that has a non-
-      zero value for the device attribute
-      :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Also, if a context is
-      created on a device that does not have the device attribute
-      :py:obj:`~.cudaDevAttrConcurrentManagedAccess` set, then read-
-      duplication will not occur until all such contexts are destroyed. If
-      the memory region refers to valid system-allocated pageable memory,
-      then the accessing device must have a non-zero value for the device
-      attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess` for a read-only
-      copy to be created on that device. Note however that if the accessing
-      device also has a non-zero value for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      setting this advice will not create a read-only copy when that device
-      accesses this memory region.
-
-    - :py:obj:`~.cudaMemAdviceUnsetReadMostly`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviceReadMostly` and also prevents the Unified
-      Memory driver from attempting heuristic read-duplication on the
-      memory range. Any read-duplicated copies of the data will be
-      collapsed into a single copy. The location for the collapsed copy
-      will be the preferred location if the page has a preferred location
-      and one of the read-duplicated copies was resident at that location.
-      Otherwise, the location chosen is arbitrary.
-
-    - :py:obj:`~.cudaMemAdviseSetPreferredLocation`: This advice sets the
-      preferred location for the data to be the memory belonging to
-      `device`. Passing in cudaCpuDeviceId for `device` sets the preferred
-      location as host memory. If `device` is a GPU, then it must have a
-      non-zero value for the device attribute
-      :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Setting the preferred
-      location does not cause data to migrate to that location immediately.
-      Instead, it guides the migration policy when a fault occurs on that
-      memory region. If the data is already in its preferred location and
-      the faulting processor can establish a mapping without requiring the
-      data to be migrated, then data migration will be avoided. On the
-      other hand, if the data is not in its preferred location or if a
-      direct mapping cannot be established, then it will be migrated to the
-      processor accessing it. It is important to note that setting the
-      preferred location does not prevent data prefetching done using
-      :py:obj:`~.cudaMemPrefetchAsync`. Having a preferred location can
-      override the page thrash detection and resolution logic in the
-      Unified Memory driver. Normally, if a page is detected to be
-      constantly thrashing between for example host and device memory, the
-      page may eventually be pinned to host memory by the Unified Memory
-      driver. But if the preferred location is set as device memory, then
-      the page will continue to thrash indefinitely. If
-      :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice, unless read
-      accesses from `device` will not result in a read-only copy being
-      created on that device as outlined in description for the advice
-      :py:obj:`~.cudaMemAdviseSetReadMostly`. If the memory region refers
-      to valid system-allocated pageable memory, then `device` must have a
-      non-zero value for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-
-    - :py:obj:`~.cudaMemAdviseUnsetPreferredLocation`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviseSetPreferredLocation` and changes the
-      preferred location to none.
-
-    - :py:obj:`~.cudaMemAdviseSetAccessedBy`: This advice implies that the
-      data will be accessed by `device`. Passing in
-      :py:obj:`~.cudaCpuDeviceId` for `device` will set the advice for the
-      CPU. If `device` is a GPU, then the device attribute
-      :py:obj:`~.cudaDevAttrConcurrentManagedAccess` must be non-zero. This
-      advice does not cause data migration and has no impact on the
-      location of the data per se. Instead, it causes the data to always be
-      mapped in the specified processor's page tables, as long as the
-      location of the data permits a mapping to be established. If the data
-      gets migrated for any reason, the mappings are updated accordingly.
-      This advice is recommended in scenarios where data locality is not
-      important, but avoiding faults is. Consider for example a system
-      containing multiple GPUs with peer-to-peer access enabled, where the
-      data located on one GPU is occasionally accessed by peer GPUs. In
-      such scenarios, migrating data over to the other GPUs is not as
-      important because the accesses are infrequent and the overhead of
-      migration may be too high. But preventing faults can still help
-      improve performance, and so having a mapping set up in advance is
-      useful. Note that on CPU access of this data, the data may be
-      migrated to host memory because the CPU typically cannot access
-      device memory directly. Any GPU that had the
-      :py:obj:`~.cudaMemAdviceSetAccessedBy` flag set for this data will
-      now have its mapping updated to point to the page in host memory. If
-      :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice. Additionally, if
-      the preferred location of this memory region or any subset of it is
-      also `device`, then the policies associated with
-      :py:obj:`~.cudaMemAdviseSetPreferredLocation` will override the
-      policies of this advice. If the memory region refers to valid system-
-      allocated pageable memory, then `device` must have a non-zero value
-      for the device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-      Additionally, if `device` has a non-zero value for the device
-      attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      this call has no effect.
-
-    - :py:obj:`~.cudaMemAdviseUnsetAccessedBy`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviseSetAccessedBy`. Any mappings to the data from
-      `device` may be removed at any time causing accesses to result in
-      non-fatal page faults. If the memory region refers to valid system-
-      allocated pageable memory, then `device` must have a non-zero value
-      for the device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-      Additionally, if `device` has a non-zero value for the device
-      attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      this call has no effect.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to memory to set the advice for
-    count : size_t
-        Size in bytes of the memory range
-    advice : :py:obj:`~.cudaMemoryAdvise`
-        Advice to be applied for the specified memory range
-    device : int
-        Device to apply the advice for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value
-    with nogil:
-        err = cyruntime.cudaMemAdvise(cydevPtr_ptr, count, cyadvice, device)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemAdvise_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemAdvise_v2(devPtr, size_t count, advice not None : cudaMemoryAdvise, location not None : cudaMemLocation):
-    """ Advise about the usage of a given memory range.
-
-    Advise the Unified Memory subsystem about the usage pattern for the
-    memory range starting at `devPtr` with a size of `count` bytes. The
-    start address and end address of the memory range will be rounded down
-    and rounded up respectively to be aligned to CPU page size before the
-    advice is applied. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
-    variables. The memory range could also refer to system-allocated
-    pageable memory provided it represents a valid, host-accessible region
-    of memory and all additional constraints imposed by `advice` as
-    outlined below are also satisfied. Specifying an invalid system-
-    allocated pageable memory range results in an error being returned.
-
-    The `advice` parameter can take the following values:
-
-    - :py:obj:`~.cudaMemAdviseSetReadMostly`: This implies that the data is
-      mostly going to be read from and only occasionally written to. Any
-      read accesses from any processor to this region will create a read-
-      only copy of at least the accessed pages in that processor's memory.
-      Additionally, if :py:obj:`~.cudaMemPrefetchAsync` or
-      :py:obj:`~.cudaMemPrefetchAsync_v2` is called on this region, it will
-      create a read-only copy of the data on the destination processor. If
-      the target location for :py:obj:`~.cudaMemPrefetchAsync_v2` is a host
-      NUMA node and a read-only copy already exists on another host NUMA
-      node, that copy will be migrated to the targeted host NUMA node. If
-      any processor writes to this region, all copies of the corresponding
-      page will be invalidated except for the one where the write occurred.
-      If the writing processor is the CPU and the preferred location of the
-      page is a host NUMA node, then the page will also be migrated to that
-      host NUMA node. The `location` argument is ignored for this advice.
-      Note that for a page to be read-duplicated, the accessing processor
-      must either be the CPU or a GPU that has a non-zero value for the
-      device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
-      Also, if a context is created on a device that does not have the
-      device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` set,
-      then read-duplication will not occur until all such contexts are
-      destroyed. If the memory region refers to valid system-allocated
-      pageable memory, then the accessing device must have a non-zero value
-      for the device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`
-      for a read-only copy to be created on that device. Note however that
-      if the accessing device also has a non-zero value for the device
-      attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      setting this advice will not create a read-only copy when that device
-      accesses this memory region.
-
-    - :py:obj:`~.cudaMemAdviceUnsetReadMostly`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviseSetReadMostly` and also prevents the Unified
-      Memory driver from attempting heuristic read-duplication on the
-      memory range. Any read-duplicated copies of the data will be
-      collapsed into a single copy. The location for the collapsed copy
-      will be the preferred location if the page has a preferred location
-      and one of the read-duplicated copies was resident at that location.
-      Otherwise, the location chosen is arbitrary. Note: The `location`
-      argument is ignored for this advice.
-
-    - :py:obj:`~.cudaMemAdviseSetPreferredLocation`: This advice sets the
-      preferred location for the data to be the memory belonging to
-      `location`. When :py:obj:`~.cudaMemLocation.type` is
-      :py:obj:`~.cudaMemLocationTypeHost`, :py:obj:`~.cudaMemLocation.id`
-      is ignored and the preferred location is set to be host memory. To
-      set the preferred location to a specific host NUMA node, applications
-      must set :py:obj:`~.cudaMemLocation.type` to
-      :py:obj:`~.cudaMemLocationTypeHostNuma` and
-      :py:obj:`~.cudaMemLocation.id` must specify the NUMA ID of the host
-      NUMA node. If :py:obj:`~.cudaMemLocation.type` is set to
-      :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,
-      :py:obj:`~.cudaMemLocation.id` will be ignored and the host NUMA node
-      closest to the calling thread's CPU will be used as the preferred
-      location. If :py:obj:`~.cudaMemLocation.type` is a
-      :py:obj:`~.cudaMemLocationTypeDevice`, then
-      :py:obj:`~.cudaMemLocation.id` must be a valid device ordinal and the
-      device must have a non-zero value for the device attribute
-      :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Setting the preferred
-      location does not cause data to migrate to that location immediately.
-      Instead, it guides the migration policy when a fault occurs on that
-      memory region. If the data is already in its preferred location and
-      the faulting processor can establish a mapping without requiring the
-      data to be migrated, then data migration will be avoided. On the
-      other hand, if the data is not in its preferred location or if a
-      direct mapping cannot be established, then it will be migrated to the
-      processor accessing it. It is important to note that setting the
-      preferred location does not prevent data prefetching done using
-      :py:obj:`~.cudaMemPrefetchAsync`. Having a preferred location can
-      override the page thrash detection and resolution logic in the
-      Unified Memory driver. Normally, if a page is detected to be
-      constantly thrashing between for example host and device memory, the
-      page may eventually be pinned to host memory by the Unified Memory
-      driver. But if the preferred location is set as device memory, then
-      the page will continue to thrash indefinitely. If
-      :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice, unless read
-      accesses from `location` will not result in a read-only copy being
-      created on that procesor as outlined in description for the advice
-      :py:obj:`~.cudaMemAdviseSetReadMostly`. If the memory region refers
-      to valid system-allocated pageable memory, and
-      :py:obj:`~.cudaMemLocation.type` is
-      :py:obj:`~.cudaMemLocationTypeDevice` then
-      :py:obj:`~.cudaMemLocation.id` must be a valid device that has a non-
-      zero alue for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-
-    - :py:obj:`~.cudaMemAdviseUnsetPreferredLocation`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviseSetPreferredLocation` and changes the
-      preferred location to none. The `location` argument is ignored for
-      this advice.
-
-    - :py:obj:`~.cudaMemAdviseSetAccessedBy`: This advice implies that the
-      data will be accessed by processor `location`. The
-      :py:obj:`~.cudaMemLocation.type` must be either
-      :py:obj:`~.cudaMemLocationTypeDevice` with
-      :py:obj:`~.cudaMemLocation.id` representing a valid device ordinal or
-      :py:obj:`~.cudaMemLocationTypeHost` and
-      :py:obj:`~.cudaMemLocation.id` will be ignored. All other location
-      types are invalid. If :py:obj:`~.cudaMemLocation.id` is a GPU, then
-      the device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`
-      must be non-zero. This advice does not cause data migration and has
-      no impact on the location of the data per se. Instead, it causes the
-      data to always be mapped in the specified processor's page tables, as
-      long as the location of the data permits a mapping to be established.
-      If the data gets migrated for any reason, the mappings are updated
-      accordingly. This advice is recommended in scenarios where data
-      locality is not important, but avoiding faults is. Consider for
-      example a system containing multiple GPUs with peer-to-peer access
-      enabled, where the data located on one GPU is occasionally accessed
-      by peer GPUs. In such scenarios, migrating data over to the other
-      GPUs is not as important because the accesses are infrequent and the
-      overhead of migration may be too high. But preventing faults can
-      still help improve performance, and so having a mapping set up in
-      advance is useful. Note that on CPU access of this data, the data may
-      be migrated to host memory because the CPU typically cannot access
-      device memory directly. Any GPU that had the
-      :py:obj:`~.cudaMemAdviseSetAccessedBy` flag set for this data will
-      now have its mapping updated to point to the page in host memory. If
-      :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
-      region or any subset of it, then the policies associated with that
-      advice will override the policies of this advice. Additionally, if
-      the preferred location of this memory region or any subset of it is
-      also `location`, then the policies associated with
-      :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` will override the
-      policies of this advice. If the memory region refers to valid system-
-      allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is
-      :py:obj:`~.cudaMemLocationTypeDevice` then device in
-      :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the
-      device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-      Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value
-      for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      this call has no effect.
-
-    - :py:obj:`~.CU_MEM_ADVISE_UNSET_ACCESSED_BY`: Undoes the effect of
-      :py:obj:`~.cudaMemAdviseSetAccessedBy`. Any mappings to the data from
-      `location` may be removed at any time causing accesses to result in
-      non-fatal page faults. If the memory region refers to valid system-
-      allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is
-      :py:obj:`~.cudaMemLocationTypeDevice` then device in
-      :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the
-      device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
-      Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value
-      for the device attribute
-      :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
-      this call has no effect.
-
-    Parameters
-    ----------
-    devPtr : Any
-        Pointer to memory to set the advice for
-    count : size_t
-        Size in bytes of the memory range
-    advice : :py:obj:`~.cudaMemoryAdvise`
-        Advice to be applied for the specified memory range
-    location : :py:obj:`~.cudaMemLocation`
-        location to apply the advice for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemAdvise_v2`
-    """
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value
-    with nogil:
-        err = cyruntime.cudaMemAdvise_v2(cydevPtr_ptr, count, cyadvice, location._ptr[0])
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemRangeGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemRangeGetAttribute(size_t dataSize, attribute not None : cudaMemRangeAttribute, devPtr, size_t count):
-    """ Query an attribute of a given memory range.
-
-    Query an attribute about the memory range starting at `devPtr` with a
-    size of `count` bytes. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
-    variables.
-
-    The `attribute` parameter can take the following values:
-
-    - :py:obj:`~.cudaMemRangeAttributeReadMostly`: If this attribute is
-      specified, `data` will be interpreted as a 32-bit integer, and
-      `dataSize` must be 4. The result returned will be 1 if all pages in
-      the given memory range have read-duplication enabled, or 0 otherwise.
-
-    - :py:obj:`~.cudaMemRangeAttributePreferredLocation`: If this attribute
-      is specified, `data` will be interpreted as a 32-bit integer, and
-      `dataSize` must be 4. The result returned will be a GPU device id if
-      all pages in the memory range have that GPU as their preferred
-      location, or it will be cudaCpuDeviceId if all pages in the memory
-      range have the CPU as their preferred location, or it will be
-      cudaInvalidDeviceId if either all the pages don't have the same
-      preferred location or some of the pages don't have a preferred
-      location at all. Note that the actual location of the pages in the
-      memory range at the time of the query may be different from the
-      preferred location.
-
-    - :py:obj:`~.cudaMemRangeAttributeAccessedBy`: If this attribute is
-      specified, `data` will be interpreted as an array of 32-bit integers,
-      and `dataSize` must be a non-zero multiple of 4. The result returned
-      will be a list of device ids that had
-      :py:obj:`~.cudaMemAdviceSetAccessedBy` set for that entire memory
-      range. If any device does not have that advice set for the entire
-      memory range, that device will not be included. If `data` is larger
-      than the number of devices that have that advice set for that memory
-      range, cudaInvalidDeviceId will be returned in all the extra space
-      provided. For ex., if `dataSize` is 12 (i.e. `data` has 3 elements)
-      and only device 0 has the advice set, then the result returned will
-      be { 0, cudaInvalidDeviceId, cudaInvalidDeviceId }. If `data` is
-      smaller than the number of devices that have that advice set, then
-      only as many devices will be returned as can fit in the array. There
-      is no guarantee on which specific devices will be returned, however.
-
-    - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`: If this
-      attribute is specified, `data` will be interpreted as a 32-bit
-      integer, and `dataSize` must be 4. The result returned will be the
-      last location to which all pages in the memory range were prefetched
-      explicitly via :py:obj:`~.cudaMemPrefetchAsync`. This will either be
-      a GPU id or cudaCpuDeviceId depending on whether the last location
-      for prefetch was a GPU or the CPU respectively. If any page in the
-      memory range was never explicitly prefetched or if all pages were not
-      prefetched to the same location, cudaInvalidDeviceId will be
-      returned. Note that this simply returns the last location that the
-      applicaton requested to prefetch the memory range to. It gives no
-      indication as to whether the prefetch operation to that location has
-      completed or even begun.
-
-      - :py:obj:`~.cudaMemRangeAttributePreferredLocationType`: If this
-        attribute is specified, `data` will be interpreted as a
-        :py:obj:`~.cudaMemLocationType`, and `dataSize` must be
-        sizeof(cudaMemLocationType). The :py:obj:`~.cudaMemLocationType`
-        returned will be :py:obj:`~.cudaMemLocationTypeDevice` if all pages
-        in the memory range have the same GPU as their preferred location,
-        or :py:obj:`~.cudaMemLocationType` will be
-        :py:obj:`~.cudaMemLocationTypeHost` if all pages in the memory
-        range have the CPU as their preferred location, or or it will be
-        :py:obj:`~.cudaMemLocationTypeHostNuma` if all the pages in the
-        memory range have the same host NUMA node ID as their preferred
-        location or it will be :py:obj:`~.cudaMemLocationTypeInvalid` if
-        either all the pages don't have the same preferred location or some
-        of the pages don't have a preferred location at all. Note that the
-        actual location type of the pages in the memory range at the time
-        of the query may be different from the preferred location type.
-
-    - :py:obj:`~.cudaMemRangeAttributePreferredLocationId`: If this
-    attribute is specified, `data` will be interpreted as a 32-bit integer,
-    and `dataSize` must be 4. If the
-    :py:obj:`~.cudaMemRangeAttributePreferredLocationType` query for the
-    same address range returns :py:obj:`~.cudaMemLocationTypeDevice`, it
-    will be a valid device ordinal or if it returns
-    :py:obj:`~.cudaMemLocationTypeHostNuma`, it will be a valid host NUMA
-    node ID or if it returns any other location type, the id should be
-    ignored.
-
-      - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType`: If this
-        attribute is specified, `data` will be interpreted as a
-        :py:obj:`~.cudaMemLocationType`, and `dataSize` must be
-        sizeof(cudaMemLocationType). The result returned will be the last
-        location type to which all pages in the memory range were
-        prefetched explicitly via :py:obj:`~.cuMemPrefetchAsync`. The
-        :py:obj:`~.cudaMemLocationType` returned will be
-        :py:obj:`~.cudaMemLocationTypeDevice` if the last prefetch location
-        was the GPU or :py:obj:`~.cudaMemLocationTypeHost` if it was the
-        CPU or :py:obj:`~.cudaMemLocationTypeHostNuma` if the last prefetch
-        location was a specific host NUMA node. If any page in the memory
-        range was never explicitly prefetched or if all pages were not
-        prefetched to the same location, :py:obj:`~.CUmemLocationType` will
-        be :py:obj:`~.cudaMemLocationTypeInvalid`. Note that this simply
-        returns the last location type that the application requested to
-        prefetch the memory range to. It gives no indication as to whether
-        the prefetch operation to that location has completed or even
-        begun.
-
-    - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationId`: If this
-    attribute is specified, `data` will be interpreted as a 32-bit integer,
-    and `dataSize` must be 4. If the
-    :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType` query for the
-    same address range returns :py:obj:`~.cudaMemLocationTypeDevice`, it
-    will be a valid device ordinal or if it returns
-    :py:obj:`~.cudaMemLocationTypeHostNuma`, it will be a valid host NUMA
-    node ID or if it returns any other location type, the id should be
-    ignored.
-
-    Parameters
-    ----------
-    dataSize : size_t
-        Array containing the size of data
-    attribute : :py:obj:`~.cudaMemRangeAttribute`
-        The attribute to query
-    devPtr : Any
-        Start of the range to query
-    count : size_t
-        Size of the range to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    data : Any
-        A pointers to a memory location where the result of each attribute
-        query will be written to.
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemRangeGetAttributes`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemRangeGetAttribute`
-    """
-    cdef utils.HelperCUmem_range_attribute cydata = utils.HelperCUmem_range_attribute(attribute, dataSize)
-    cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr
-    cdef cyruntime.cudaMemRangeAttribute cyattribute = attribute.value
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr_ptr, count)
-    return (cudaError_t(err), cydata.pyObj())
-{{endif}}
-
-{{if 'cudaMemRangeGetAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemRangeGetAttributes(dataSizes : Tuple[int] | List[int], attributes : Optional[Tuple[cudaMemRangeAttribute] | List[cudaMemRangeAttribute]], size_t numAttributes, devPtr, size_t count):
-    """ Query attributes of a given memory range.
-
-    Query attributes of the memory range starting at `devPtr` with a size
-    of `count` bytes. The memory range must refer to managed memory
-    allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
-    variables. The `attributes` array will be interpreted to have
-    `numAttributes` entries. The `dataSizes` array will also be interpreted
-    to have `numAttributes` entries. The results of the query will be
-    stored in `data`.
-
-    The list of supported attributes are given below. Please refer to
-    :py:obj:`~.cudaMemRangeGetAttribute` for attribute descriptions and
-    restrictions.
-
-    - :py:obj:`~.cudaMemRangeAttributeReadMostly`
-
-    - :py:obj:`~.cudaMemRangeAttributePreferredLocation`
-
-    - :py:obj:`~.cudaMemRangeAttributeAccessedBy`
-
-    - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`
-
-    - :: cudaMemRangeAttributePreferredLocationType
-
-    - :: cudaMemRangeAttributePreferredLocationId
-
-    - :: cudaMemRangeAttributeLastPrefetchLocationType
-
-    - :: cudaMemRangeAttributeLastPrefetchLocationId
-
-    Parameters
-    ----------
-    dataSizes : List[int]
-        Array containing the sizes of each result
-    attributes : List[:py:obj:`~.cudaMemRangeAttribute`]
-        An array of attributes to query (numAttributes and the number of
-        attributes in this array should match)
-    numAttributes : size_t
-        Number of attributes to query
-    devPtr : Any
-        Start of the range to query
-    count : size_t
-        Size of the range to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    data : List[Any]
-        A two-dimensional array containing pointers to memory locations
-        where the result of each attribute query will be written to.
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemRangeGetAttribute`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemRangeGetAttributes`
-    """
-    attributes = [] if attributes is None else attributes
-    if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes):
-        raise TypeError("Argument 'attributes' is not instance of type (expected Tuple[cyruntime.cudaMemRangeAttribute] or List[cyruntime.cudaMemRangeAttribute]")
-    if not all(isinstance(_x, (int)) for _x in dataSizes):
-        raise TypeError("Argument 'dataSizes' is not instance of type (expected Tuple[int] or List[int]")
-    pylist = [utils.HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)]
-    cdef utils.InputVoidPtrPtrHelper voidStarHelperdata = utils.InputVoidPtrPtrHelper(pylist)
-    cdef void** cyvoidStarHelper_ptr = <void**><void_ptr>voidStarHelperdata.cptr
-    cdef vector[size_t] cydataSizes = dataSizes
-    cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes = [pyattributes.value for pyattributes in (attributes)]
-    if numAttributes > <size_t>len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes))
-    if numAttributes > <size_t>len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes))
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr_ptr, count)
-    return (cudaError_t(err), [obj.pyObj() for obj in pylist])
-{{endif}}
-
-{{if 'cudaMemcpyToArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyToArray(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    [Deprecated]
-
-    Copies `count` bytes from the memory area pointed to by `src` to the
-    CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from
-    the upper left corner, where `kind` specifies the direction of the
-    copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffset : size_t
-        Destination starting X offset (columns in bytes)
-    hOffset : size_t
-        Destination starting Y offset (rows)
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyDtoA`
-    """
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc_ptr, count, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyFromArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyFromArray(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    [Deprecated]
-
-    Copies `count` bytes from the CUDA array `src` starting at `hOffset`
-    rows and `wOffset` bytes from the upper left corner to the memory area
-    pointed to by `dst`, where `kind` specifies the direction of the copy,
-    and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffset : size_t
-        Source starting X offset (columns in bytes)
-    hOffset : size_t
-        Source starting Y offset (rows)
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoD`
-    """
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpyFromArray(cydst_ptr, cysrc, wOffset, hOffset, count, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyArrayToArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, kind not None : cudaMemcpyKind):
-    """ Copies data between host and device.
-
-    [Deprecated]
-
-    Copies `count` bytes from the CUDA array `src` starting at `hOffsetSrc`
-    rows and `wOffsetSrc` bytes from the upper left corner to the CUDA
-    array `dst` starting at `hOffsetDst` rows and `wOffsetDst` bytes from
-    the upper left corner, where `kind` specifies the direction of the
-    copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffsetDst : size_t
-        Destination starting X offset (columns in bytes)
-    hOffsetDst : size_t
-        Destination starting Y offset (rows)
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffsetSrc : size_t
-        Source starting X offset (columns in bytes)
-    hOffsetSrc : size_t
-        Source starting Y offset (rows)
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoA`
-    """
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyToArrayAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    [Deprecated]
-
-    Copies `count` bytes from the memory area pointed to by `src` to the
-    CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from
-    the upper left corner, where `kind` specifies the direction of the
-    copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    :py:obj:`~.cudaMemcpyToArrayAsync()` is asynchronous with respect to
-    the host, so the call may return before the copy is complete. The copy
-    can optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.cudaArray_t`
-        Destination memory address
-    wOffset : size_t
-        Destination starting X offset (columns in bytes)
-    hOffset : size_t
-        Destination starting Y offset (rows)
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpy2DAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaArray_t cydst
-    if dst is None:
-        cydst = <cyruntime.cudaArray_t><void_ptr>0
-    elif isinstance(dst, (cudaArray_t,)):
-        pdst = int(dst)
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    else:
-        pdst = int(cudaArray_t(dst))
-        cydst = <cyruntime.cudaArray_t><void_ptr>pdst
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, count, cykind, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemcpyFromArrayAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemcpyFromArrayAsync(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind, stream):
-    """ Copies data between host and device.
-
-    [Deprecated]
-
-    Copies `count` bytes from the CUDA array `src` starting at `hOffset`
-    rows and `wOffset` bytes from the upper left corner to the memory area
-    pointed to by `dst`, where `kind` specifies the direction of the copy,
-    and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
-    :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing.
-
-    :py:obj:`~.cudaMemcpyFromArrayAsync()` is asynchronous with respect to
-    the host, so the call may return before the copy is complete. The copy
-    can optionally be associated to a stream by passing a non-zero `stream`
-    argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
-    :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
-    may overlap with operations in other streams.
-
-    Parameters
-    ----------
-    dst : Any
-        Destination memory address
-    src : :py:obj:`~.cudaArray_const_t`
-        Source memory address
-    wOffset : size_t
-        Source starting X offset (columns in bytes)
-    hOffset : size_t
-        Source starting Y offset (rows)
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream identifier
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpy2DAsync`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaArray_const_t cysrc
-    if src is None:
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(src, (cudaArray_const_t,)):
-        psrc = int(src)
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    else:
-        psrc = int(cudaArray_const_t(src))
-        cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaMemcpyFromArrayAsync(cydst_ptr, cysrc, wOffset, hOffset, count, cykind, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMallocAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocAsync(size_t size, hStream):
-    """ Allocates memory with stream ordered semantics.
-
-    Inserts an allocation operation into `hStream`. A pointer to the
-    allocated memory is returned immediately in *dptr. The allocation must
-    not be accessed until the the allocation operation completes. The
-    allocation comes from the memory pool associated with the stream's
-    device.
-
-    Parameters
-    ----------
-    size : size_t
-        Number of bytes to allocate
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering contract and the memory
-        pool to allocate from
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`,
-    devPtr : Any
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolGetAttribute`
-
-    Notes
-    -----
-    The default memory pool of a device contains device memory from that device.
-
-    Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
-
-    During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cdef void_ptr devPtr = 0
-    with nogil:
-        err = cyruntime.cudaMallocAsync(<void**>&devPtr, size, cyhStream)
-
-    return (cudaError_t(err), devPtr)
-{{endif}}
-
-{{if 'cudaFreeAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaFreeAsync(devPtr, hStream):
-    """ Frees memory with stream ordered semantics.
-
-    Inserts a free operation into `hStream`. The allocation must not be
-    accessed after stream execution reaches the free. After this API
-    returns, accessing the memory from any subsequent work launched on the
-    GPU or querying its pointer attributes results in undefined behavior.
-
-    Parameters
-    ----------
-    dptr : Any
-        memory to free
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering promise
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cudaMallocAsync`
-
-    Notes
-    -----
-    During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation.
-    """
-    cdef cyruntime.cudaStream_t cyhStream
-    if hStream is None:
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
-        phStream = int(hStream)
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    else:
-        phStream = int(cudaStream_t(hStream))
-        cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
-    cydevPtr = utils.HelperInputVoidPtr(devPtr)
-    cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
-    with nogil:
-        err = cyruntime.cudaFreeAsync(cydevPtr_ptr, cyhStream)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPoolTrimTo' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolTrimTo(memPool, size_t minBytesToKeep):
-    """ Tries to release memory back to the OS.
-
-    Releases memory back to the OS until the pool contains fewer than
-    minBytesToKeep reserved bytes, or there is no more memory that the
-    allocator can safely release. The allocator cannot release OS
-    allocations that back outstanding asynchronous allocations. The OS
-    allocations may happen at different granularity from the user
-    allocations.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to trim
-    minBytesToKeep : size_t
-        If the pool has less than minBytesToKeep reserved, the TrimTo
-        operation is a no-op. Otherwise the pool will be guaranteed to have
-        at least minBytesToKeep bytes reserved after the operation.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
-
-    Notes
-    -----
-    : Allocations that have not been freed count as outstanding.
-
-    : Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as outstanding.
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    with nogil:
-        err = cyruntime.cudaMemPoolTrimTo(cymemPool, minBytesToKeep)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPoolSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value):
-    """ Sets attributes of a memory pool.
-
-    Supported attributes are:
-
-    - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =
-      cuuint64_t) Amount of reserved memory in bytes to hold onto before
-      trying to release memory back to the OS. When more than the release
-      threshold bytes of memory are held by the memory pool, the allocator
-      will try to release memory back to the OS on the next call to stream,
-      event or context synchronize. (default 0)
-
-    - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =
-      int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously
-      freed in another stream as long as a stream ordering dependency of
-      the allocating stream on the free action exists. Cuda events and null
-      stream interactions can create the required stream ordered
-      dependencies. (default enabled)
-
-    - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)
-      Allow reuse of already completed frees when there is no dependency
-      between the free and allocation. (default enabled)
-
-    - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =
-      int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream
-      dependencies in order to establish the stream ordering required to
-      reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`
-      (default enabled).
-
-    - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)
-      Reset the high watermark that tracks the amount of backing memory
-      that was allocated for the memory pool. It is illegal to set this
-      attribute to a non-zero value.
-
-    - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)
-      Reset the high watermark that tracks the amount of used memory that
-      was allocated for the memory pool. It is illegal to set this
-      attribute to a non-zero value.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to modify
-    attr : :py:obj:`~.cudaMemPoolAttr`
-        The attribute to modify
-    value : Any
-        Pointer to the value to assign
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef cyruntime.cudaMemPoolAttr cyattr = attr.value
-    cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    with nogil:
-        err = cyruntime.cudaMemPoolSetAttribute(cymemPool, cyattr, cyvalue_ptr)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPoolGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr):
-    """ Gets attributes of a memory pool.
-
-    Supported attributes are:
-
-    - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =
-      cuuint64_t) Amount of reserved memory in bytes to hold onto before
-      trying to release memory back to the OS. When more than the release
-      threshold bytes of memory are held by the memory pool, the allocator
-      will try to release memory back to the OS on the next call to stream,
-      event or context synchronize. (default 0)
-
-    - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =
-      int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously
-      freed in another stream as long as a stream ordering dependency of
-      the allocating stream on the free action exists. Cuda events and null
-      stream interactions can create the required stream ordered
-      dependencies. (default enabled)
-
-    - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)
-      Allow reuse of already completed frees when there is no dependency
-      between the free and allocation. (default enabled)
-
-    - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =
-      int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream
-      dependencies in order to establish the stream ordering required to
-      reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`
-      (default enabled).
-
-    - :py:obj:`~.cudaMemPoolAttrReservedMemCurrent`: (value type =
-      cuuint64_t) Amount of backing memory currently allocated for the
-      mempool.
-
-    - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)
-      High watermark of backing memory allocated for the mempool since the
-      last time it was reset.
-
-    - :py:obj:`~.cudaMemPoolAttrUsedMemCurrent`: (value type = cuuint64_t)
-      Amount of memory from the pool that is currently in use by the
-      application.
-
-    - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)
-      High watermark of the amount of memory from the pool that was in use
-      by the application since the last time it was reset.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The memory pool to get attributes of
-    attr : :py:obj:`~.cudaMemPoolAttr`
-        The attribute to get
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    value : Any
-        Retrieved value
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef cyruntime.cudaMemPoolAttr cyattr = attr.value
-    cdef utils.HelperCUmemPool_attribute cyvalue = utils.HelperCUmemPool_attribute(attr, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    with nogil:
-        err = cyruntime.cudaMemPoolGetAttribute(cymemPool, cyattr, cyvalue_ptr)
-
-    return (cudaError_t(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cudaMemPoolSetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolSetAccess(memPool, descList : Optional[Tuple[cudaMemAccessDesc] | List[cudaMemAccessDesc]], size_t count):
-    """ Controls visibility of pools between devices.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The pool being modified
-    map : List[:py:obj:`~.cudaMemAccessDesc`]
-        Array of access descriptors. Each descriptor instructs the access
-        to enable for a single gpu
-    count : size_t
-        Number of descriptors in the map array.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cudaMemPoolGetAccess`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
-    """
-    descList = [] if descList is None else descList
-    if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList):
-        raise TypeError("Argument 'descList' is not instance of type (expected Tuple[cyruntime.cudaMemAccessDesc,] or List[cyruntime.cudaMemAccessDesc,]")
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef cyruntime.cudaMemAccessDesc* cydescList = NULL
-    if len(descList) > 0:
-        cydescList = <cyruntime.cudaMemAccessDesc*> calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc))
-        if cydescList is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
-        for idx in range(len(descList)):
-            string.memcpy(&cydescList[idx], (<cudaMemAccessDesc>descList[idx])._ptr, sizeof(cyruntime.cudaMemAccessDesc))
-    if count > <size_t>len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count))
-    err = cyruntime.cudaMemPoolSetAccess(cymemPool, (<cudaMemAccessDesc>descList[0])._ptr if len(descList) == 1 else cydescList, count)
-    if cydescList is not NULL:
-        free(cydescList)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMemPoolGetAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]):
-    """ Returns the accessibility of a pool from a device.
-
-    Returns the accessibility of the pool's memory from the specified
-    location.
-
-    Parameters
-    ----------
-    memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        the pool being queried
-    location : :py:obj:`~.cudaMemLocation`
-        the location accessing the pool
-
-    Returns
-    -------
-    cudaError_t
-
-    flags : :py:obj:`~.cudaMemAccessFlags`
-        the accessibility of the pool from the specified location
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolGetAccess`, :py:obj:`~.cudaMemPoolSetAccess`
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef cyruntime.cudaMemAccessFlags flags
-    cdef cyruntime.cudaMemLocation* cylocation_ptr = location._ptr if location != None else NULL
-    err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr)
-    return (cudaError_t(err), cudaMemAccessFlags(flags))
-{{endif}}
-
-{{if 'cudaMemPoolCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]):
-    """ Creates a memory pool.
-
-    Creates a CUDA memory pool and returns the handle in `pool`. The
-    `poolProps` determines the properties of the pool such as the backing
-    device and IPC capabilities.
-
-    To create a memory pool targeting a specific host NUMA node,
-    applications must set
-    :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type to
-    :py:obj:`~.cudaMemLocationTypeHostNuma` and
-    :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::id must specify the NUMA
-    ID of the host memory node. Specifying
-    :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` or
-    :py:obj:`~.cudaMemLocationTypeHost` as the
-    :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type will result in
-    :py:obj:`~.cudaErrorInvalidValue`. By default, the pool's memory will
-    be accessible from the device it is allocated on. In the case of pools
-    created with :py:obj:`~.cudaMemLocationTypeHostNuma`, their default
-    accessibility will be from the host CPU. Applications can control the
-    maximum size of the pool by specifying a non-zero value for
-    :py:obj:`~.cudaMemPoolProps.maxSize`. If set to 0, the maximum size of
-    the pool will default to a system dependent value.
-
-    Applications can set :py:obj:`~.cudaMemPoolProps.handleTypes` to
-    :py:obj:`~.cudaMemHandleTypeFabric` in order to create
-    :py:obj:`~.cudaMemPool_t` suitable for sharing within an IMEX domain.
-    An IMEX domain is either an OS instance or a group of securely
-    connected OS instances using the NVIDIA IMEX daemon. An IMEX channel is
-    a global resource within the IMEX domain that represents a logical
-    entity that aims to provide fine grained accessibility control for the
-    participating processes. When exporter and importer CUDA processes have
-    been granted access to the same IMEX channel, they can securely share
-    memory. If the allocating process does not have access setup for an
-    IMEX channel, attempting to export a :py:obj:`~.CUmemoryPool` with
-    :py:obj:`~.cudaMemHandleTypeFabric` will result in
-    :py:obj:`~.cudaErrorNotPermitted`. The nvidia-modprobe CLI provides
-    more information regarding setting up of IMEX channels.
-
-    Parameters
-    ----------
-    poolProps : :py:obj:`~.cudaMemPoolProps`
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-    memPool : :py:obj:`~.cudaMemPool_t`
-        None
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`
-
-    Notes
-    -----
-    Specifying cudaMemHandleTypeNone creates a memory pool that will not support IPC.
-    """
-    cdef cudaMemPool_t memPool = cudaMemPool_t()
-    cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr = poolProps._ptr if poolProps != None else NULL
-    err = cyruntime.cudaMemPoolCreate(<cyruntime.cudaMemPool_t*>memPool._ptr, cypoolProps_ptr)
-    return (cudaError_t(err), memPool)
-{{endif}}
-
-{{if 'cudaMemPoolDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolDestroy(memPool):
-    """ Destroys the specified memory pool.
-
-    If any pointers obtained from this pool haven't been freed or the pool
-    has free operations that haven't completed when
-    :py:obj:`~.cudaMemPoolDestroy` is invoked, the function will return
-    immediately and the resources associated with the pool will be released
-    automatically once there are no more outstanding allocations.
-
-    Destroying the current mempool of a device sets the default mempool of
-    that device as the current mempool for that device.
-
-    Parameters
-    ----------
-    memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        None
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    cuMemPoolDestroy, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
-
-    Notes
-    -----
-    A device's default memory pool cannot be destroyed.
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    err = cyruntime.cudaMemPoolDestroy(cymemPool)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaMallocFromPoolAsync' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMallocFromPoolAsync(size_t size, memPool, stream):
-    """ Allocates memory from a specified pool with stream ordered semantics.
-
-    Inserts an allocation operation into `hStream`. A pointer to the
-    allocated memory is returned immediately in *dptr. The allocation must
-    not be accessed until the the allocation operation completes. The
-    allocation comes from the specified memory pool.
-
-    Parameters
-    ----------
-    bytesize : size_t
-        Number of bytes to allocate
-    memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        The pool to allocate from
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        The stream establishing the stream ordering semantic
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`
-    ptr : Any
-        Returned device pointer
-
-    See Also
-    --------
-    :py:obj:`~.cuMemAllocFromPoolAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`
-
-    Notes
-    -----
-    During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef void_ptr ptr = 0
-    err = cyruntime.cudaMallocFromPoolAsync(<void**>&ptr, size, cymemPool, cystream)
-    return (cudaError_t(err), ptr)
-{{endif}}
-
-{{if 'cudaMemPoolExportToShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAllocationHandleType, unsigned int flags):
-    """ Exports a memory pool to the requested handle type.
-
-    Given an IPC capable mempool, create an OS handle to share the pool
-    with another process. A recipient process can convert the shareable
-    handle into a mempool with
-    :py:obj:`~.cudaMemPoolImportFromShareableHandle`. Individual pointers
-    can then be shared with the :py:obj:`~.cudaMemPoolExportPointer` and
-    :py:obj:`~.cudaMemPoolImportPointer` APIs. The implementation of what
-    the shareable handle is and how it can be transferred is defined by the
-    requested handle type.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        pool to export
-    handleType : :py:obj:`~.cudaMemAllocationHandleType`
-        the type of handle to create
-    flags : unsigned int
-        must be 0
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
-    handle_out : Any
-        pointer to the location in which to store the requested handle
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`
-
-    Notes
-    -----
-    : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than cudaMemHandleTypeNone.
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef utils.HelperCUmemAllocationHandleType cyshareableHandle = utils.HelperCUmemAllocationHandleType(handleType)
-    cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr
-    cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value
-    err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags)
-    return (cudaError_t(err), cyshareableHandle.pyObj())
-{{endif}}
-
-{{if 'cudaMemPoolImportFromShareableHandle' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolImportFromShareableHandle(shareableHandle, handleType not None : cudaMemAllocationHandleType, unsigned int flags):
-    """ imports a memory pool from a shared handle.
-
-    Specific allocations can be imported from the imported pool with
-    :py:obj:`~.cudaMemPoolImportPointer`.
-
-    Parameters
-    ----------
-    handle : Any
-        OS handle of the pool to open
-    handleType : :py:obj:`~.cudaMemAllocationHandleType`
-        The type of handle being imported
-    flags : unsigned int
-        must be 0
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
-    pool_out : :py:obj:`~.cudaMemPool_t`
-        Returned memory pool
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`
-
-    Notes
-    -----
-    Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in :py:obj:`~.cudaDeviceSetMemPool` or :py:obj:`~.cudaMallocFromPoolAsync` calls.
-    """
-    cdef cudaMemPool_t memPool = cudaMemPool_t()
-    cyshareableHandle = utils.HelperInputVoidPtr(shareableHandle)
-    cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr
-    cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value
-    err = cyruntime.cudaMemPoolImportFromShareableHandle(<cyruntime.cudaMemPool_t*>memPool._ptr, cyshareableHandle_ptr, cyhandleType, flags)
-    return (cudaError_t(err), memPool)
-{{endif}}
-
-{{if 'cudaMemPoolExportPointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolExportPointer(ptr):
-    """ Export data to share a memory pool allocation between processes.
-
-    Constructs `shareData_out` for sharing a specific allocation from an
-    already shared memory pool. The recipient process can import the
-    allocation with the :py:obj:`~.cudaMemPoolImportPointer` api. The data
-    is not a handle and may be shared through any IPC mechanism.
-
-    Parameters
-    ----------
-    ptr : Any
-        pointer to memory being exported
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
-    shareData_out : :py:obj:`~.cudaMemPoolPtrExportData`
-        Returned export data
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolImportPointer`
-    """
-    cdef cudaMemPoolPtrExportData exportData = cudaMemPoolPtrExportData()
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    err = cyruntime.cudaMemPoolExportPointer(<cyruntime.cudaMemPoolPtrExportData*>exportData._ptr, cyptr_ptr)
-    return (cudaError_t(err), exportData)
-{{endif}}
-
-{{if 'cudaMemPoolImportPointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExportData]):
-    """ Import a memory pool allocation from another process.
-
-    Returns in `ptr_out` a pointer to the imported memory. The imported
-    memory must not be accessed before the allocation operation completes
-    in the exporting process. The imported memory must be freed from all
-    importing processes before being freed in the exporting process. The
-    pointer may be freed with cudaFree or cudaFreeAsync. If
-    :py:obj:`~.cudaFreeAsync` is used, the free must be completed on the
-    importing process before the free operation on the exporting process.
-
-    Parameters
-    ----------
-    pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
-        pool from which to import
-    shareData : :py:obj:`~.cudaMemPoolPtrExportData`
-        data specifying the memory to import
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
-    ptr_out : Any
-        pointer to imported memory
-
-    See Also
-    --------
-    :py:obj:`~.cuMemPoolImportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`
-
-    Notes
-    -----
-    The :py:obj:`~.cudaFreeAsync` api may be used in the exporting process before the :py:obj:`~.cudaFreeAsync` operation completes in its stream as long as the :py:obj:`~.cudaFreeAsync` in the exporting process specifies a stream with a stream dependency on the importing process's :py:obj:`~.cudaFreeAsync`.
-    """
-    cdef cyruntime.cudaMemPool_t cymemPool
-    if memPool is None:
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>0
-    elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
-        pmemPool = int(memPool)
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    else:
-        pmemPool = int(cudaMemPool_t(memPool))
-        cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
-    cdef void_ptr ptr = 0
-    cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr = exportData._ptr if exportData != None else NULL
-    err = cyruntime.cudaMemPoolImportPointer(<void**>&ptr, cymemPool, cyexportData_ptr)
-    return (cudaError_t(err), ptr)
-{{endif}}
-
-{{if 'cudaPointerGetAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaPointerGetAttributes(ptr):
-    """ Returns attributes about a specified pointer.
-
-    Returns in `*attributes` the attributes of the pointer `ptr`. If
-    pointer was not allocated in, mapped by or registered with context
-    supporting unified addressing :py:obj:`~.cudaErrorInvalidValue` is
-    returned.
-
-    The :py:obj:`~.cudaPointerAttributes` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    In this structure, the individual fields mean
-
-    - :py:obj:`~.cudaPointerAttributes.type` identifies type of memory. It
-      can be :py:obj:`~.cudaMemoryTypeUnregistered` for unregistered host
-      memory, :py:obj:`~.cudaMemoryTypeHost` for registered host memory,
-      :py:obj:`~.cudaMemoryTypeDevice` for device memory or
-      :py:obj:`~.cudaMemoryTypeManaged` for managed memory.
-
-    - :py:obj:`~.device` is the device against which `ptr` was allocated.
-      If `ptr` has memory type :py:obj:`~.cudaMemoryTypeDevice` then this
-      identifies the device on which the memory referred to by `ptr`
-      physically resides. If `ptr` has memory type
-      :py:obj:`~.cudaMemoryTypeHost` then this identifies the device which
-      was current when the allocation was made (and if that device is
-      deinitialized then this allocation will vanish with that device's
-      state).
-
-    - :py:obj:`~.devicePointer` is the device pointer alias through which
-      the memory referred to by `ptr` may be accessed on the current
-      device. If the memory referred to by `ptr` cannot be accessed
-      directly by the current device then this is NULL.
-
-    - :py:obj:`~.hostPointer` is the host pointer alias through which the
-      memory referred to by `ptr` may be accessed on the host. If the
-      memory referred to by `ptr` cannot be accessed directly by the host
-      then this is NULL.
-
-    Parameters
-    ----------
-    ptr : Any
-        Pointer to get attributes for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
-    attributes : :py:obj:`~.cudaPointerAttributes`
-        Attributes for the specified pointer
-
-    See Also
-    --------
-    :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuPointerGetAttributes`
-
-    Notes
-    -----
-    In CUDA 11.0 forward passing host pointer will return :py:obj:`~.cudaMemoryTypeUnregistered` in :py:obj:`~.cudaPointerAttributes.type` and call will return :py:obj:`~.cudaSuccess`.
-    """
-    cdef cudaPointerAttributes attributes = cudaPointerAttributes()
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    err = cyruntime.cudaPointerGetAttributes(<cyruntime.cudaPointerAttributes*>attributes._ptr, cyptr_ptr)
-    return (cudaError_t(err), attributes)
-{{endif}}
-
-{{if 'cudaDeviceCanAccessPeer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceCanAccessPeer(int device, int peerDevice):
-    """ Queries if a device may directly access a peer device's memory.
-
-    Returns in `*canAccessPeer` a value of 1 if device `device` is capable
-    of directly accessing memory from `peerDevice` and 0 otherwise. If
-    direct access of `peerDevice` from `device` is possible, then access
-    may be enabled by calling :py:obj:`~.cudaDeviceEnablePeerAccess()`.
-
-    Parameters
-    ----------
-    device : int
-        Device from which allocations on `peerDevice` are to be directly
-        accessed.
-    peerDevice : int
-        Device on which the allocations to be directly accessed by `device`
-        reside.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
-    canAccessPeer : int
-        Returned access capability
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`
-    """
-    cdef int canAccessPeer = 0
-    err = cyruntime.cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice)
-    return (cudaError_t(err), canAccessPeer)
-{{endif}}
-
-{{if 'cudaDeviceEnablePeerAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags):
-    """ Enables direct access to memory allocations on a peer device.
-
-    On success, all allocations from `peerDevice` will immediately be
-    accessible by the current device. They will remain accessible until
-    access is explicitly disabled using
-    :py:obj:`~.cudaDeviceDisablePeerAccess()` or either device is reset
-    using :py:obj:`~.cudaDeviceReset()`.
-
-    Note that access granted by this call is unidirectional and that in
-    order to access memory on the current device from `peerDevice`, a
-    separate symmetric call to :py:obj:`~.cudaDeviceEnablePeerAccess()` is
-    required.
-
-    Note that there are both device-wide and system-wide limitations per
-    system configuration, as noted in the CUDA Programming Guide under the
-    section "Peer-to-Peer Memory Access".
-
-    Returns :py:obj:`~.cudaErrorInvalidDevice` if
-    :py:obj:`~.cudaDeviceCanAccessPeer()` indicates that the current device
-    cannot directly access memory from `peerDevice`.
-
-    Returns :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled` if direct access
-    of `peerDevice` from the current device has already been enabled.
-
-    Returns :py:obj:`~.cudaErrorInvalidValue` if `flags` is not 0.
-
-    Parameters
-    ----------
-    peerDevice : int
-        Peer device to enable direct access to from the current device
-    flags : unsigned int
-        Reserved for future use and must be set to 0
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuCtxEnablePeerAccess`
-    """
-    err = cyruntime.cudaDeviceEnablePeerAccess(peerDevice, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceDisablePeerAccess' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceDisablePeerAccess(int peerDevice):
-    """ Disables direct access to memory allocations on a peer device.
-
-    Returns :py:obj:`~.cudaErrorPeerAccessNotEnabled` if direct access to
-    memory on `peerDevice` has not yet been enabled from the current
-    device.
-
-    Parameters
-    ----------
-    peerDevice : int
-        Peer device to disable direct access to
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorPeerAccessNotEnabled`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`
-    """
-    err = cyruntime.cudaDeviceDisablePeerAccess(peerDevice)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphicsUnregisterResource' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsUnregisterResource(resource):
-    """ Unregisters a graphics resource for access by CUDA.
-
-    Unregisters the graphics resource `resource` so it is not accessible by
-    CUDA unless registered again.
-
-    If `resource` is invalid then
-    :py:obj:`~.cudaErrorInvalidResourceHandle` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        Resource to unregister
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsD3D9RegisterResource`, :py:obj:`~.cudaGraphicsD3D10RegisterResource`, :py:obj:`~.cudaGraphicsD3D11RegisterResource`, :py:obj:`~.cudaGraphicsGLRegisterBuffer`, :py:obj:`~.cudaGraphicsGLRegisterImage`, :py:obj:`~.cuGraphicsUnregisterResource`
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    err = cyruntime.cudaGraphicsUnregisterResource(cyresource)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphicsResourceSetMapFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsResourceSetMapFlags(resource, unsigned int flags):
-    """ Set usage flags for mapping a graphics resource.
-
-    Set `flags` for mapping the graphics resource `resource`.
-
-    Changes to `flags` will take effect the next time `resource` is mapped.
-    The `flags` argument may be any of the following:
-
-    - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
-      `resource` will be used. It is therefore assumed that CUDA may read
-      from or write to `resource`.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
-      not write to `resource`.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies CUDA will not
-      read from `resource` and will write over the entire contents of
-      `resource`, so none of the data previously stored in `resource` will
-      be preserved.
-
-    If `resource` is presently mapped for access by CUDA then
-    :py:obj:`~.cudaErrorUnknown` is returned. If `flags` is not one of the
-    above values then :py:obj:`~.cudaErrorInvalidValue` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        Registered resource to set flags for
-    flags : unsigned int
-        Parameters for resource mapping
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsResourceSetMapFlags`
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    err = cyruntime.cudaGraphicsResourceSetMapFlags(cyresource, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphicsMapResources' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsMapResources(int count, resources, stream):
-    """ Map graphics resources for access by CUDA.
-
-    Maps the `count` graphics resources in `resources` for access by CUDA.
-
-    The resources in `resources` may be accessed by CUDA until they are
-    unmapped. The graphics API from which `resources` were registered
-    should not access any resources while they are mapped by CUDA. If an
-    application does so, the results are undefined.
-
-    This function provides the synchronization guarantee that any graphics
-    calls issued before :py:obj:`~.cudaGraphicsMapResources()` will
-    complete before any subsequent CUDA work issued in `stream` begins.
-
-    If `resources` contains any duplicate entries then
-    :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of
-    `resources` are presently mapped for access by CUDA then
-    :py:obj:`~.cudaErrorUnknown` is returned.
-
-    Parameters
-    ----------
-    count : int
-        Number of resources to map
-    resources : :py:obj:`~.cudaGraphicsResource_t`
-        Resources to map for CUDA
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream for synchronization
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsUnmapResources`, :py:obj:`~.cuGraphicsMapResources`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphicsResource_t *cyresources
-    if resources is None:
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
-    elif isinstance(resources, (cudaGraphicsResource_t,)):
-        presources = resources.getPtr()
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources
-    elif isinstance(resources, (int)):
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources
-    else:
-        raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))
-    err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphicsUnmapResources' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsUnmapResources(int count, resources, stream):
-    """ Unmap graphics resources.
-
-    Unmaps the `count` graphics resources in `resources`.
-
-    Once unmapped, the resources in `resources` may not be accessed by CUDA
-    until they are mapped again.
-
-    This function provides the synchronization guarantee that any CUDA work
-    issued in `stream` before :py:obj:`~.cudaGraphicsUnmapResources()` will
-    complete before any subsequently issued graphics work begins.
-
-    If `resources` contains any duplicate entries then
-    :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of
-    `resources` are not presently mapped for access by CUDA then
-    :py:obj:`~.cudaErrorUnknown` is returned.
-
-    Parameters
-    ----------
-    count : int
-        Number of resources to unmap
-    resources : :py:obj:`~.cudaGraphicsResource_t`
-        Resources to unmap
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream for synchronization
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphicsResource_t *cyresources
-    if resources is None:
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
-    elif isinstance(resources, (cudaGraphicsResource_t,)):
-        presources = resources.getPtr()
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources
-    elif isinstance(resources, (int)):
-        cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources
-    else:
-        raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))
-    err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedPointer' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsResourceGetMappedPointer(resource):
-    """ Get an device pointer through which to access a mapped graphics resource.
-
-    Returns in `*devPtr` a pointer through which the mapped graphics
-    resource `resource` may be accessed. Returns in `*size` the size of the
-    memory in bytes which may be accessed from that pointer. The value set
-    in `devPtr` may change every time that `resource` is mapped.
-
-    If `resource` is not a buffer then it cannot be accessed via a pointer
-    and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not
-    mapped then :py:obj:`~.cudaErrorUnknown` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        None
-
-    Returns
-    -------
-    cudaError_t
-
-    devPtr : Any
-        None
-    size : int
-        None
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    cdef void_ptr devPtr = 0
-    cdef size_t size = 0
-    err = cyruntime.cudaGraphicsResourceGetMappedPointer(<void**>&devPtr, &size, cyresource)
-    return (cudaError_t(err), devPtr, size)
-{{endif}}
-
-{{if 'cudaGraphicsSubResourceGetMappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsigned int mipLevel):
-    """ Get an array through which to access a subresource of a mapped graphics resource.
-
-    Returns in `*array` an array through which the subresource of the
-    mapped graphics resource `resource` which corresponds to array index
-    `arrayIndex` and mipmap level `mipLevel` may be accessed. The value set
-    in `array` may change every time that `resource` is mapped.
-
-    If `resource` is not a texture then it cannot be accessed via an array
-    and :py:obj:`~.cudaErrorUnknown` is returned. If `arrayIndex` is not a
-    valid array index for `resource` then :py:obj:`~.cudaErrorInvalidValue`
-    is returned. If `mipLevel` is not a valid mipmap level for `resource`
-    then :py:obj:`~.cudaErrorInvalidValue` is returned. If `resource` is
-    not mapped then :py:obj:`~.cudaErrorUnknown` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        Mapped resource to access
-    arrayIndex : unsigned int
-        Array index for array textures or cubemap face index as defined by
-        :py:obj:`~.cudaGraphicsCubeFace` for cubemap textures for the
-        subresource to access
-    mipLevel : unsigned int
-        Mipmap level for the subresource to access
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-    array : :py:obj:`~.cudaArray_t`
-        Returned array through which a subresource of `resource` may be
-        accessed
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    cdef cudaArray_t array = cudaArray_t()
-    err = cyruntime.cudaGraphicsSubResourceGetMappedArray(<cyruntime.cudaArray_t*>array._ptr, cyresource, arrayIndex, mipLevel)
-    return (cudaError_t(err), array)
-{{endif}}
-
-{{if 'cudaGraphicsResourceGetMappedMipmappedArray' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphicsResourceGetMappedMipmappedArray(resource):
-    """ Get a mipmapped array through which to access a mapped graphics resource.
-
-    Returns in `*mipmappedArray` a mipmapped array through which the mapped
-    graphics resource `resource` may be accessed. The value set in
-    `mipmappedArray` may change every time that `resource` is mapped.
-
-    If `resource` is not a texture then it cannot be accessed via an array
-    and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not
-    mapped then :py:obj:`~.cudaErrorUnknown` is returned.
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        Mapped resource to access
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-    mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
-        Returned mipmapped array through which `resource` may be accessed
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedMipmappedArray`
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()
-    err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._ptr, cyresource)
-    return (cudaError_t(err), mipmappedArray)
-{{endif}}
-
-{{if 'cudaGetChannelDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetChannelDesc(array):
-    """ Get the channel descriptor of an array.
-
-    Returns in `*desc` the channel descriptor of the CUDA array `array`.
-
-    Parameters
-    ----------
-    array : :py:obj:`~.cudaArray_const_t`
-        Memory array on device
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    desc : :py:obj:`~.cudaChannelFormatDesc`
-        Channel format
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateChannelDesc (C API)`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`
-    """
-    cdef cyruntime.cudaArray_const_t cyarray
-    if array is None:
-        cyarray = <cyruntime.cudaArray_const_t><void_ptr>0
-    elif isinstance(array, (cudaArray_const_t,)):
-        parray = int(array)
-        cyarray = <cyruntime.cudaArray_const_t><void_ptr>parray
-    else:
-        parray = int(cudaArray_const_t(array))
-        cyarray = <cyruntime.cudaArray_const_t><void_ptr>parray
-    cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()
-    with nogil:
-        err = cyruntime.cudaGetChannelDesc(<cyruntime.cudaChannelFormatDesc*>desc._ptr, cyarray)
-
-    return (cudaError_t(err), desc)
-{{endif}}
-
-{{if 'cudaCreateChannelDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaCreateChannelDesc(int x, int y, int z, int w, f not None : cudaChannelFormatKind):
-    """ Returns a channel descriptor using the specified format.
-
-    Returns a channel descriptor with format `f` and number of bits of each
-    component `x`, `y`, `z`, and `w`. The :py:obj:`~.cudaChannelFormatDesc`
-    is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where :py:obj:`~.cudaChannelFormatKind` is one of
-    :py:obj:`~.cudaChannelFormatKindSigned`,
-    :py:obj:`~.cudaChannelFormatKindUnsigned`, or
-    :py:obj:`~.cudaChannelFormatKindFloat`.
-
-    Parameters
-    ----------
-    x : int
-        X component
-    y : int
-        Y component
-    z : int
-        Z component
-    w : int
-        W component
-    f : :py:obj:`~.cudaChannelFormatKind`
-        Channel format
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    :py:obj:`~.cudaChannelFormatDesc`
-        Channel descriptor with format `f`
-
-    See Also
-    --------
-    cudaCreateChannelDesc (C++ API), :py:obj:`~.cudaGetChannelDesc`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`
-    """
-    cdef cyruntime.cudaChannelFormatKind cyf = f.value
-    err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf)
-    cdef cudaChannelFormatDesc wrapper = cudaChannelFormatDesc()
-    wrapper._ptr[0] = err
-    return (cudaError_t.cudaSuccess, wrapper)
-{{endif}}
-
-{{if 'cudaCreateTextureObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Optional[cudaTextureDesc], pResViewDesc : Optional[cudaResourceViewDesc]):
-    """ Creates a texture object.
-
-    Creates a texture object and returns it in `pTexObject`. `pResDesc`
-    describes the data to texture from. `pTexDesc` describes how the data
-    should be sampled. `pResViewDesc` is an optional argument that
-    specifies an alternate format for the data described by `pResDesc`, and
-    also describes the subresource region to restrict access to when
-    texturing. `pResViewDesc` can only be specified if the type of resource
-    is a CUDA array or a CUDA mipmapped array not in a block compressed
-    format.
-
-    Texture objects are only supported on devices of compute capability 3.0
-    or higher. Additionally, a texture object is an opaque value, and, as
-    such, should only be accessed through CUDA API calls.
-
-    The :py:obj:`~.cudaResourceDesc` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.cudaResourceDesc.resType` specifies the type of resource
-      to texture from. CUresourceType is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    If :py:obj:`~.cudaResourceDesc.resType` is set to
-    :py:obj:`~.cudaResourceTypeArray`,
-    :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid
-    CUDA array handle.
-
-    If :py:obj:`~.cudaResourceDesc.resType` is set to
-    :py:obj:`~.cudaResourceTypeMipmappedArray`,
-    :py:obj:`~.cudaResourceDesc`::res::mipmap::mipmap must be set to a
-    valid CUDA mipmapped array handle and
-    :py:obj:`~.cudaTextureDesc.normalizedCoords` must be set to true.
-
-    If :py:obj:`~.cudaResourceDesc.resType` is set to
-    :py:obj:`~.cudaResourceTypeLinear`,
-    :py:obj:`~.cudaResourceDesc`::res::linear::devPtr must be set to a
-    valid device pointer, that is aligned to
-    :py:obj:`~.cudaDeviceProp.textureAlignment`.
-    :py:obj:`~.cudaResourceDesc`::res::linear::desc describes the format
-    and the number of components per array element.
-    :py:obj:`~.cudaResourceDesc`::res::linear::sizeInBytes specifies the
-    size of the array in bytes. The total number of elements in the linear
-    address range cannot exceed
-    :py:obj:`~.cudaDeviceProp.maxTexture1DLinear`. The number of elements
-    is computed as (sizeInBytes / sizeof(desc)).
-
-    If :py:obj:`~.cudaResourceDesc.resType` is set to
-    :py:obj:`~.cudaResourceTypePitch2D`,
-    :py:obj:`~.cudaResourceDesc`::res::pitch2D::devPtr must be set to a
-    valid device pointer, that is aligned to
-    :py:obj:`~.cudaDeviceProp.textureAlignment`.
-    :py:obj:`~.cudaResourceDesc`::res::pitch2D::desc describes the format
-    and the number of components per array element.
-    :py:obj:`~.cudaResourceDesc`::res::pitch2D::width and
-    :py:obj:`~.cudaResourceDesc`::res::pitch2D::height specify the width
-    and height of the array in elements, and cannot exceed
-    :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[0] and
-    :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[1] respectively.
-    :py:obj:`~.cudaResourceDesc`::res::pitch2D::pitchInBytes specifies the
-    pitch between two rows in bytes and has to be aligned to
-    :py:obj:`~.cudaDeviceProp.texturePitchAlignment`. Pitch cannot exceed
-    :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[2].
-
-    The :py:obj:`~.cudaTextureDesc` struct is defined as
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where
-
-    - :py:obj:`~.cudaTextureDesc.addressMode` specifies the addressing mode
-      for each dimension of the texture data.
-      :py:obj:`~.cudaTextureAddressMode` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is
-      :py:obj:`~.cudaResourceTypeLinear`. Also, if
-      :py:obj:`~.cudaTextureDesc.normalizedCoords` is set to zero,
-      :py:obj:`~.cudaAddressModeWrap` and :py:obj:`~.cudaAddressModeMirror`
-      won't be supported and will be switched to
-      :py:obj:`~.cudaAddressModeClamp`.
-
-    - :py:obj:`~.cudaTextureDesc.filterMode` specifies the filtering mode
-      to be used when fetching from the texture.
-      :py:obj:`~.cudaTextureFilterMode` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is
-      :py:obj:`~.cudaResourceTypeLinear`.
-
-    - :py:obj:`~.cudaTextureDesc.readMode` specifies whether integer data
-      should be converted to floating point or not.
-      :py:obj:`~.cudaTextureReadMode` is defined as:
-
-    - **View CUDA Toolkit Documentation for a C++ code example**
-
-    - Note that this applies only to 8-bit and 16-bit integer formats.
-      32-bit integer format would not be promoted, regardless of whether or
-      not this :py:obj:`~.cudaTextureDesc.readMode` is set
-      :py:obj:`~.cudaReadModeNormalizedFloat` is specified.
-
-    - :py:obj:`~.cudaTextureDesc.sRGB` specifies whether sRGB to linear
-      conversion should be performed during texture fetch.
-
-    - :py:obj:`~.cudaTextureDesc.borderColor` specifies the float values of
-      color. where: :py:obj:`~.cudaTextureDesc.borderColor`[0] contains
-      value of 'R', :py:obj:`~.cudaTextureDesc.borderColor`[1] contains
-      value of 'G', :py:obj:`~.cudaTextureDesc.borderColor`[2] contains
-      value of 'B', :py:obj:`~.cudaTextureDesc.borderColor`[3] contains
-      value of 'A' Note that application using integer border color values
-      will need to <reinterpret_cast> these values to float. The values are
-      set only when the addressing mode specified by
-      :py:obj:`~.cudaTextureDesc.addressMode` is cudaAddressModeBorder.
-
-    - :py:obj:`~.cudaTextureDesc.normalizedCoords` specifies whether the
-      texture coordinates will be normalized or not.
-
-    - :py:obj:`~.cudaTextureDesc.maxAnisotropy` specifies the maximum
-      anistropy ratio to be used when doing anisotropic filtering. This
-      value will be clamped to the range [1,16].
-
-    - :py:obj:`~.cudaTextureDesc.mipmapFilterMode` specifies the filter
-      mode when the calculated mipmap level lies between two defined mipmap
-      levels.
-
-    - :py:obj:`~.cudaTextureDesc.mipmapLevelBias` specifies the offset to
-      be applied to the calculated mipmap level.
-
-    - :py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` specifies the lower
-      end of the mipmap level range to clamp access to.
-
-    - :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` specifies the upper
-      end of the mipmap level range to clamp access to.
-
-    - :py:obj:`~.cudaTextureDesc.disableTrilinearOptimization` specifies
-      whether the trilinear filtering optimizations will be disabled.
-
-    - :py:obj:`~.cudaTextureDesc.seamlessCubemap` specifies whether
-      seamless cube map filtering is enabled. This flag can only be
-      specified if the underlying resource is a CUDA array or a CUDA
-      mipmapped array that was created with the flag
-      :py:obj:`~.cudaArrayCubemap`. When seamless cube map filtering is
-      enabled, texture address modes specified by
-      :py:obj:`~.cudaTextureDesc.addressMode` are ignored. Instead, if the
-      :py:obj:`~.cudaTextureDesc.filterMode` is set to
-      :py:obj:`~.cudaFilterModePoint` the address mode
-      :py:obj:`~.cudaAddressModeClamp` will be applied for all dimensions.
-      If the :py:obj:`~.cudaTextureDesc.filterMode` is set to
-      :py:obj:`~.cudaFilterModeLinear` seamless cube map filtering will be
-      performed when sampling along the cube face borders.
-
-    The :py:obj:`~.cudaResourceViewDesc` struct is defined as
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    where:
-
-    - :py:obj:`~.cudaResourceViewDesc.format` specifies how the data
-      contained in the CUDA array or CUDA mipmapped array should be
-      interpreted. Note that this can incur a change in size of the texture
-      data. If the resource view format is a block compressed format, then
-      the underlying CUDA array or CUDA mipmapped array has to have a
-      32-bit unsigned integer format with 2 or 4 channels, depending on the
-      block compressed format. For ex., BC1 and BC4 require the underlying
-      CUDA array to have a 32-bit unsigned int with 2 channels. The other
-      BC formats require the underlying resource to have the same 32-bit
-      unsigned int format but with 4 channels.
-
-    - :py:obj:`~.cudaResourceViewDesc.width` specifies the new width of the
-      texture data. If the resource view format is a block compressed
-      format, this value has to be 4 times the original width of the
-      resource. For non block compressed formats, this value has to be
-      equal to that of the original resource.
-
-    - :py:obj:`~.cudaResourceViewDesc.height` specifies the new height of
-      the texture data. If the resource view format is a block compressed
-      format, this value has to be 4 times the original height of the
-      resource. For non block compressed formats, this value has to be
-      equal to that of the original resource.
-
-    - :py:obj:`~.cudaResourceViewDesc.depth` specifies the new depth of the
-      texture data. This value has to be equal to that of the original
-      resource.
-
-    - :py:obj:`~.cudaResourceViewDesc.firstMipmapLevel` specifies the most
-      detailed mipmap level. This will be the new mipmap level zero. For
-      non-mipmapped resources, this value has to be
-      zero.:py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` and
-      :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` will be relative to
-      this value. For ex., if the firstMipmapLevel is set to 2, and a
-      minMipmapLevelClamp of 1.2 is specified, then the actual minimum
-      mipmap level clamp will be 3.2.
-
-    - :py:obj:`~.cudaResourceViewDesc.lastMipmapLevel` specifies the least
-      detailed mipmap level. For non-mipmapped resources, this value has to
-      be zero.
-
-    - :py:obj:`~.cudaResourceViewDesc.firstLayer` specifies the first layer
-      index for layered textures. This will be the new layer zero. For non-
-      layered resources, this value has to be zero.
-
-    - :py:obj:`~.cudaResourceViewDesc.lastLayer` specifies the last layer
-      index for layered textures. For non-layered resources, this value has
-      to be zero.
-
-    Parameters
-    ----------
-    pResDesc : :py:obj:`~.cudaResourceDesc`
-        Resource descriptor
-    pTexDesc : :py:obj:`~.cudaTextureDesc`
-        Texture descriptor
-    pResViewDesc : :py:obj:`~.cudaResourceViewDesc`
-        Resource view descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pTexObject : :py:obj:`~.cudaTextureObject_t`
-        Texture object to create
-
-    See Also
-    --------
-    :py:obj:`~.cudaDestroyTextureObject`, :py:obj:`~.cuTexObjectCreate`
-    """
-    cdef cudaTextureObject_t pTexObject = cudaTextureObject_t()
-    cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._ptr if pResDesc != None else NULL
-    cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr = pTexDesc._ptr if pTexDesc != None else NULL
-    cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr = pResViewDesc._ptr if pResViewDesc != None else NULL
-    err = cyruntime.cudaCreateTextureObject(<cyruntime.cudaTextureObject_t*>pTexObject._ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr)
-    return (cudaError_t(err), pTexObject)
-{{endif}}
-
-{{if 'cudaDestroyTextureObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDestroyTextureObject(texObject):
-    """ Destroys a texture object.
-
-    Destroys the texture object specified by `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.cudaTextureObject_t`
-        Texture object to destroy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectDestroy`
-    """
-    cdef cyruntime.cudaTextureObject_t cytexObject
-    if texObject is None:
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>0
-    elif isinstance(texObject, (cudaTextureObject_t,)):
-        ptexObject = int(texObject)
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    else:
-        ptexObject = int(cudaTextureObject_t(texObject))
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    with nogil:
-        err = cyruntime.cudaDestroyTextureObject(cytexObject)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetTextureObjectResourceDesc(texObject):
-    """ Returns a texture object's resource descriptor.
-
-    Returns the resource descriptor for the texture object specified by
-    `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.cudaTextureObject_t`
-        Texture object
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pResDesc : :py:obj:`~.cudaResourceDesc`
-        Resource descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceDesc`
-    """
-    cdef cyruntime.cudaTextureObject_t cytexObject
-    if texObject is None:
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>0
-    elif isinstance(texObject, (cudaTextureObject_t,)):
-        ptexObject = int(texObject)
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    else:
-        ptexObject = int(cudaTextureObject_t(texObject))
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    cdef cudaResourceDesc pResDesc = cudaResourceDesc()
-    with nogil:
-        err = cyruntime.cudaGetTextureObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._ptr, cytexObject)
-
-    return (cudaError_t(err), pResDesc)
-{{endif}}
-
-{{if 'cudaGetTextureObjectTextureDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetTextureObjectTextureDesc(texObject):
-    """ Returns a texture object's texture descriptor.
-
-    Returns the texture descriptor for the texture object specified by
-    `texObject`.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.cudaTextureObject_t`
-        Texture object
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pTexDesc : :py:obj:`~.cudaTextureDesc`
-        Texture descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetTextureDesc`
-    """
-    cdef cyruntime.cudaTextureObject_t cytexObject
-    if texObject is None:
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>0
-    elif isinstance(texObject, (cudaTextureObject_t,)):
-        ptexObject = int(texObject)
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    else:
-        ptexObject = int(cudaTextureObject_t(texObject))
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    cdef cudaTextureDesc pTexDesc = cudaTextureDesc()
-    with nogil:
-        err = cyruntime.cudaGetTextureObjectTextureDesc(<cyruntime.cudaTextureDesc*>pTexDesc._ptr, cytexObject)
-
-    return (cudaError_t(err), pTexDesc)
-{{endif}}
-
-{{if 'cudaGetTextureObjectResourceViewDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetTextureObjectResourceViewDesc(texObject):
-    """ Returns a texture object's resource view descriptor.
-
-    Returns the resource view descriptor for the texture object specified
-    by `texObject`. If no resource view was specified,
-    :py:obj:`~.cudaErrorInvalidValue` is returned.
-
-    Parameters
-    ----------
-    texObject : :py:obj:`~.cudaTextureObject_t`
-        Texture object
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pResViewDesc : :py:obj:`~.cudaResourceViewDesc`
-        Resource view descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceViewDesc`
-    """
-    cdef cyruntime.cudaTextureObject_t cytexObject
-    if texObject is None:
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>0
-    elif isinstance(texObject, (cudaTextureObject_t,)):
-        ptexObject = int(texObject)
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    else:
-        ptexObject = int(cudaTextureObject_t(texObject))
-        cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
-    cdef cudaResourceViewDesc pResViewDesc = cudaResourceViewDesc()
-    err = cyruntime.cudaGetTextureObjectResourceViewDesc(<cyruntime.cudaResourceViewDesc*>pResViewDesc._ptr, cytexObject)
-    return (cudaError_t(err), pResViewDesc)
-{{endif}}
-
-{{if 'cudaCreateSurfaceObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaCreateSurfaceObject(pResDesc : Optional[cudaResourceDesc]):
-    """ Creates a surface object.
-
-    Creates a surface object and returns it in `pSurfObject`. `pResDesc`
-    describes the data to perform surface load/stores on.
-    :py:obj:`~.cudaResourceDesc.resType` must be
-    :py:obj:`~.cudaResourceTypeArray` and
-    :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid
-    CUDA array handle.
-
-    Surface objects are only supported on devices of compute capability 3.0
-    or higher. Additionally, a surface object is an opaque value, and, as
-    such, should only be accessed through CUDA API calls.
-
-    Parameters
-    ----------
-    pResDesc : :py:obj:`~.cudaResourceDesc`
-        Resource descriptor
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    pSurfObject : :py:obj:`~.cudaSurfaceObject_t`
-        Surface object to create
-
-    See Also
-    --------
-    :py:obj:`~.cudaDestroySurfaceObject`, :py:obj:`~.cuSurfObjectCreate`
-    """
-    cdef cudaSurfaceObject_t pSurfObject = cudaSurfaceObject_t()
-    cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._ptr if pResDesc != None else NULL
-    with nogil:
-        err = cyruntime.cudaCreateSurfaceObject(<cyruntime.cudaSurfaceObject_t*>pSurfObject._ptr, cypResDesc_ptr)
-
-    return (cudaError_t(err), pSurfObject)
-{{endif}}
-
-{{if 'cudaDestroySurfaceObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDestroySurfaceObject(surfObject):
-    """ Destroys a surface object.
-
-    Destroys the surface object specified by `surfObject`.
-
-    Parameters
-    ----------
-    surfObject : :py:obj:`~.cudaSurfaceObject_t`
-        Surface object to destroy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectDestroy`
-    """
-    cdef cyruntime.cudaSurfaceObject_t cysurfObject
-    if surfObject is None:
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>0
-    elif isinstance(surfObject, (cudaSurfaceObject_t,)):
-        psurfObject = int(surfObject)
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
-    else:
-        psurfObject = int(cudaSurfaceObject_t(surfObject))
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
-    with nogil:
-        err = cyruntime.cudaDestroySurfaceObject(cysurfObject)
-
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGetSurfaceObjectResourceDesc' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetSurfaceObjectResourceDesc(surfObject):
-    """ Returns a surface object's resource descriptor Returns the resource descriptor for the surface object specified by `surfObject`.
-
-    Parameters
-    ----------
-    surfObject : :py:obj:`~.cudaSurfaceObject_t`
-        Surface object
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pResDesc : :py:obj:`~.cudaResourceDesc`
-        Resource descriptor
-
-    See Also
-    --------
-    :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectGetResourceDesc`
-    """
-    cdef cyruntime.cudaSurfaceObject_t cysurfObject
-    if surfObject is None:
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>0
-    elif isinstance(surfObject, (cudaSurfaceObject_t,)):
-        psurfObject = int(surfObject)
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
-    else:
-        psurfObject = int(cudaSurfaceObject_t(surfObject))
-        cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
-    cdef cudaResourceDesc pResDesc = cudaResourceDesc()
-    err = cyruntime.cudaGetSurfaceObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._ptr, cysurfObject)
-    return (cudaError_t(err), pResDesc)
-{{endif}}
-
-{{if 'cudaDriverGetVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDriverGetVersion():
-    """ Returns the latest version of CUDA supported by the driver.
-
-    Returns in `*driverVersion` the latest version of CUDA supported by the
-    driver. The version is returned as (1000 * major + 10 * minor). For
-    example, CUDA 9.2 would be represented by 9020. If no driver is
-    installed, then 0 is returned as the driver version.
-
-    This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
-    if `driverVersion` is NULL.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    driverVersion : int
-        Returns the CUDA driver version.
-
-    See Also
-    --------
-    :py:obj:`~.cudaRuntimeGetVersion`, :py:obj:`~.cuDriverGetVersion`
-    """
-    cdef int driverVersion = 0
-    err = cyruntime.cudaDriverGetVersion(&driverVersion)
-    return (cudaError_t(err), driverVersion)
-{{endif}}
-
-{{if 'cudaRuntimeGetVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaRuntimeGetVersion():
-    """ Returns the CUDA Runtime version.
-
-    Returns in `*runtimeVersion` the version number of the current CUDA
-    Runtime instance. The version is returned as (1000 * major + 10 *
-    minor). For example, CUDA 9.2 would be represented by 9020.
-
-    As of CUDA 12.0, this function no longer initializes CUDA. The purpose
-    of this API is solely to return a compile-time constant stating the
-    CUDA Toolkit version in the above format.
-
-    This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
-    if the `runtimeVersion` argument is NULL.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    runtimeVersion : int
-        Returns the CUDA Runtime version.
-
-    See Also
-    --------
-    :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`
-    """
-    cdef int runtimeVersion = 0
-    err = cyruntime.cudaRuntimeGetVersion(&runtimeVersion)
-    return (cudaError_t(err), runtimeVersion)
-{{endif}}
-
-{{if 'cudaGraphCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphCreate(unsigned int flags):
-    """ Creates a graph.
-
-    Creates an empty graph, which is returned via `pGraph`.
-
-    Parameters
-    ----------
-    flags : unsigned int
-        Graph creation flags, must be 0
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    pGraph : :py:obj:`~.cudaGraph_t`
-        Returns newly created graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphDestroy`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphClone`
-    """
-    cdef cudaGraph_t pGraph = cudaGraph_t()
-    err = cyruntime.cudaGraphCreate(<cyruntime.cudaGraph_t*>pGraph._ptr, flags)
-    return (cudaError_t(err), pGraph)
-{{endif}}
-
-{{if 'cudaGraphAddKernelNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddKernelNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaKernelNodeParams]):
-    """ Creates a kernel execution node and adds it to a graph.
-
-    Creates a new kernel execution node and adds it to `graph` with
-    `numDependencies` dependencies specified via `pDependencies` and
-    arguments specified in `pNodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `pDependencies` may not have any duplicate entries.
-    A handle to the new node will be returned in `pGraphNode`.
-
-    The :py:obj:`~.cudaKernelNodeParams` structure is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    When the graph is launched, the node will invoke kernel `func` on a
-    (`gridDim.x` x `gridDim.y` x `gridDim.z`) grid of blocks. Each block
-    contains (`blockDim.x` x `blockDim.y` x `blockDim.z`) threads.
-
-    `sharedMem` sets the amount of dynamic shared memory that will be
-    available to each thread block.
-
-    Kernel parameters to `func` can be specified in one of two ways:
-
-    1) Kernel parameters can be specified via `kernelParams`. If the kernel
-    has N parameters, then `kernelParams` needs to be an array of N
-    pointers. Each pointer, from `kernelParams`[0] to `kernelParams`[N-1],
-    points to the region of memory from which the actual parameter will be
-    copied. The number of kernel parameters and their offsets and sizes do
-    not need to be specified as that information is retrieved directly from
-    the kernel's image.
-
-    2) Kernel parameters can also be packaged by the application into a
-    single buffer that is passed in via `extra`. This places the burden on
-    the application of knowing each kernel parameter's size and
-    alignment/padding within the buffer. The `extra` parameter exists to
-    allow this function to take additional less commonly used arguments.
-    `extra` specifies a list of names of extra settings and their
-    corresponding values. Each extra setting name is immediately followed
-    by the corresponding value. The list must be terminated with either
-    NULL or CU_LAUNCH_PARAM_END.
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_END`, which indicates the end of the
-      `extra` array;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`, which specifies that the
-      next value in `extra` will be a pointer to a buffer containing all
-      the kernel parameters for launching kernel `func`;
-
-    - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE`, which specifies that the
-      next value in `extra` will be a pointer to a size_t containing the
-      size of the buffer specified with
-      :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`;
-
-    The error :py:obj:`~.cudaErrorInvalidValue` will be returned if kernel
-    parameters are specified with both `kernelParams` and `extra` (i.e.
-    both `kernelParams` and `extra` are non-NULL).
-
-    The `kernelParams` or `extra` array, as well as the argument values it
-    points to, are copied during this call.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    pNodeParams : :py:obj:`~.cudaKernelNodeParams`
-        Parameters for the GPU execution node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-
-    Notes
-    -----
-    Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphAddKernelNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypNodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphKernelNodeGetParams(node):
-    """ Returns a kernel node's parameters.
-
-    Returns the parameters of kernel node `node` in `pNodeParams`. The
-    `kernelParams` or `extra` array returned in `pNodeParams`, as well as
-    the argument values it points to, are owned by the node. This memory
-    remains valid until the node is destroyed or its parameters are
-    modified, and should not be modified directly. Use
-    :py:obj:`~.cudaGraphKernelNodeSetParams` to update the parameters of
-    this node.
-
-    The params will contain either `kernelParams` or `extra`, according to
-    which of these was most recently set on the node.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`
-    pNodeParams : :py:obj:`~.cudaKernelNodeParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaKernelNodeParams pNodeParams = cudaKernelNodeParams()
-    err = cyruntime.cudaGraphKernelNodeGetParams(cynode, <cyruntime.cudaKernelNodeParams*>pNodeParams._ptr)
-    return (cudaError_t(err), pNodeParams)
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodeParams]):
-    """ Sets a kernel node's parameters.
-
-    Sets the parameters of kernel node `node` to `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    pNodeParams : :py:obj:`~.cudaKernelNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphKernelNodeCopyAttributes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphKernelNodeCopyAttributes(hSrc, hDst):
-    """ Copies attributes from source node to destination node.
-
-    Copies attributes from source node `src` to destination node `dst`.
-    Both node must have the same context.
-
-    Parameters
-    ----------
-    dst : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Destination node
-    src : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Source node For list of attributes see
-        :py:obj:`~.cudaKernelNodeAttrID`
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidContext`
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhDst
-    if hDst is None:
-        cyhDst = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hDst, (cudaGraphNode_t,driver.CUgraphNode)):
-        phDst = int(hDst)
-        cyhDst = <cyruntime.cudaGraphNode_t><void_ptr>phDst
-    else:
-        phDst = int(cudaGraphNode_t(hDst))
-        cyhDst = <cyruntime.cudaGraphNode_t><void_ptr>phDst
-    cdef cyruntime.cudaGraphNode_t cyhSrc
-    if hSrc is None:
-        cyhSrc = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hSrc, (cudaGraphNode_t,driver.CUgraphNode)):
-        phSrc = int(hSrc)
-        cyhSrc = <cyruntime.cudaGraphNode_t><void_ptr>phSrc
-    else:
-        phSrc = int(cudaGraphNode_t(hSrc))
-        cyhSrc = <cyruntime.cudaGraphNode_t><void_ptr>phSrc
-    err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhSrc, cyhDst)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphKernelNodeGetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID):
-    """ Queries node attribute.
-
-    Queries attribute `attr` from node `hNode` and stores it in
-    corresponding member of `value_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-
-    attr : :py:obj:`~.cudaKernelNodeAttrID`
-
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-    value_out : :py:obj:`~.cudaKernelNodeAttrValue`
-
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value
-    cdef cudaKernelNodeAttrValue value_out = cudaKernelNodeAttrValue()
-    err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, <cyruntime.cudaKernelNodeAttrValue*>value_out._ptr)
-    return (cudaError_t(err), value_out)
-{{endif}}
-
-{{if 'cudaGraphKernelNodeSetAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, value : Optional[cudaKernelNodeAttrValue]):
-    """ Sets node attribute.
-
-    Sets attribute `attr` on node `hNode` from corresponding attribute of
-    `value`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-
-    attr : :py:obj:`~.cudaKernelNodeAttrID`
-
-    value : :py:obj:`~.cudaKernelNodeAttrValue`
-
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
-
-    See Also
-    --------
-    :py:obj:`~.cudaAccessPolicyWindow`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value
-    cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr = value._ptr if value != None else NULL
-    err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, pCopyParams : Optional[cudaMemcpy3DParms]):
-    """ Creates a memcpy node and adds it to a graph.
-
-    Creates a new memcpy node and adds it to `graph` with `numDependencies`
-    dependencies specified via `pDependencies`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `pDependencies` may not have any duplicate entries.
-    A handle to the new node will be returned in `pGraphNode`.
-
-    When the graph is launched, the node will perform the memcpy described
-    by `pCopyParams`. See :py:obj:`~.cudaMemcpy3D()` for a description of
-    the structure and its restrictions.
-
-    Memcpy nodes have some additional restrictions with regards to managed
-    memory, if the system contains at least one device which has a zero
-    value for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    pCopyParams : :py:obj:`~.cudaMemcpy3DParms`
-        Parameters for the memory copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNodeToSymbol`, :py:obj:`~.cudaGraphAddMemcpyNodeFromSymbol`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr = pCopyParams._ptr if pCopyParams != None else NULL
-    err = cyruntime.cudaGraphAddMemcpyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypCopyParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphAddMemcpyNode1D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, dst, src, size_t count, kind not None : cudaMemcpyKind):
-    """ Creates a 1D memcpy node and adds it to a graph.
-
-    Creates a new 1D memcpy node and adds it to `graph` with
-    `numDependencies` dependencies specified via `pDependencies`. It is
-    possible for `numDependencies` to be 0, in which case the node will be
-    placed at the root of the graph. `pDependencies` may not have any
-    duplicate entries. A handle to the new node will be returned in
-    `pGraphNode`.
-
-    When the graph is launched, the node will copy `count` bytes from the
-    memory area pointed to by `src` to the memory area pointed to by `dst`,
-    where `kind` specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. Launching a memcpy node with dst and src
-    pointers that do not match the direction of the copy results in an
-    undefined behavior.
-
-    Memcpy nodes have some additional restrictions with regards to managed
-    memory, if the system contains at least one device which has a zero
-    value for the device attribute
-    :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    dst : Any
-        Destination memory address
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaGraphAddMemcpyNode1D(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cydst_ptr, cysrc_ptr, count, cykind)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemcpyNodeGetParams(node):
-    """ Returns a memcpy node's parameters.
-
-    Returns the parameters of memcpy node `node` in `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaMemcpy3DParms pNodeParams = cudaMemcpy3DParms()
-    err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, <cyruntime.cudaMemcpy3DParms*>pNodeParams._ptr)
-    return (cudaError_t(err), pNodeParams)
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms]):
-    """ Sets a memcpy node's parameters.
-
-    Sets the parameters of memcpy node `node` to `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphMemcpyNodeSetParams1D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemcpyNodeSetParams1D(node, dst, src, size_t count, kind not None : cudaMemcpyKind):
-    """ Sets a memcpy node's parameters to perform a 1-dimensional copy.
-
-    Sets the parameters of memcpy node `node` to the copy described by the
-    provided parameters.
-
-    When the graph is launched, the node will copy `count` bytes from the
-    memory area pointed to by `src` to the memory area pointed to by `dst`,
-    where `kind` specifies the direction of the copy, and must be one of
-    :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
-    :py:obj:`~.cudaMemcpyDeviceToHost`,
-    :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
-    Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
-    type of transfer is inferred from the pointer values. However,
-    :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
-    unified virtual addressing. Launching a memcpy node with dst and src
-    pointers that do not match the direction of the copy results in an
-    undefined behavior.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    dst : Any
-        Destination memory address
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst_ptr, cysrc_ptr, count, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddMemsetNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddMemsetNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, pMemsetParams : Optional[cudaMemsetParams]):
-    """ Creates a memset node and adds it to a graph.
-
-    Creates a new memset node and adds it to `graph` with `numDependencies`
-    dependencies specified via `pDependencies`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `pDependencies` may not have any duplicate entries.
-    A handle to the new node will be returned in `pGraphNode`.
-
-    The element size must be 1, 2, or 4 bytes. When the graph is launched,
-    the node will perform the memset described by `pMemsetParams`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    pMemsetParams : :py:obj:`~.cudaMemsetParams`
-        Parameters for the memory set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr = pMemsetParams._ptr if pMemsetParams != None else NULL
-    err = cyruntime.cudaGraphAddMemsetNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypMemsetParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemsetNodeGetParams(node):
-    """ Returns a memset node's parameters.
-
-    Returns the parameters of memset node `node` in `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pNodeParams : :py:obj:`~.cudaMemsetParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaMemsetParams pNodeParams = cudaMemsetParams()
-    err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, <cyruntime.cudaMemsetParams*>pNodeParams._ptr)
-    return (cudaError_t(err), pNodeParams)
-{{endif}}
-
-{{if 'cudaGraphMemsetNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]):
-    """ Sets a memset node's parameters.
-
-    Sets the parameters of memset node `node` to `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    pNodeParams : :py:obj:`~.cudaMemsetParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddHostNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddHostNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaHostNodeParams]):
-    """ Creates a host execution node and adds it to a graph.
-
-    Creates a new CPU execution node and adds it to `graph` with
-    `numDependencies` dependencies specified via `pDependencies` and
-    arguments specified in `pNodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `pDependencies` may not have any duplicate entries.
-    A handle to the new node will be returned in `pGraphNode`.
-
-    When the graph is launched, the node will invoke the specified CPU
-    function. Host nodes are not supported under MPS with pre-Volta GPUs.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    pNodeParams : :py:obj:`~.cudaHostNodeParams`
-        Parameters for the host node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphAddHostNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cypNodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphHostNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphHostNodeGetParams(node):
-    """ Returns a host node's parameters.
-
-    Returns the parameters of host node `node` in `pNodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pNodeParams : :py:obj:`~.cudaHostNodeParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaHostNodeParams pNodeParams = cudaHostNodeParams()
-    err = cyruntime.cudaGraphHostNodeGetParams(cynode, <cyruntime.cudaHostNodeParams*>pNodeParams._ptr)
-    return (cudaError_t(err), pNodeParams)
-{{endif}}
-
-{{if 'cudaGraphHostNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]):
-    """ Sets a host node's parameters.
-
-    Sets the parameters of host node `node` to `nodeParams`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    pNodeParams : :py:obj:`~.cudaHostNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddChildGraphNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, childGraph):
-    """ Creates a child graph node and adds it to a graph.
-
-    Creates a new node which executes an embedded graph, and adds it to
-    `graph` with `numDependencies` dependencies specified via
-    `pDependencies`. It is possible for `numDependencies` to be 0, in which
-    case the node will be placed at the root of the graph. `pDependencies`
-    may not have any duplicate entries. A handle to the new node will be
-    returned in `pGraphNode`.
-
-    If `hGraph` contains allocation or free nodes, this call will return an
-    error.
-
-    The node executes an embedded child graph. The child graph is cloned in
-    this call.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to clone into this node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphClone`
-    """
-    cdef cyruntime.cudaGraph_t cychildGraph
-    if childGraph is None:
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):
-        pchildGraph = int(childGraph)
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
-    else:
-        pchildGraph = int(cudaGraph_t(childGraph))
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphAddChildGraphNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cychildGraph)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphChildGraphNodeGetGraph' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphChildGraphNodeGetGraph(node):
-    """ Gets a handle to the embedded graph of a child graph node.
-
-    Gets a handle to the embedded graph in a child graph node. This call
-    does not clone the graph. Changes to the graph will be reflected in the
-    node, and the node retains ownership of the graph.
-
-    Allocation and free nodes cannot be added to the returned graph.
-    Attempting to do so will return an error.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the embedded graph for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraph : :py:obj:`~.cudaGraph_t`
-        Location to store a handle to the graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphNodeFindInClone`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaGraph_t pGraph = cudaGraph_t()
-    err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, <cyruntime.cudaGraph_t*>pGraph._ptr)
-    return (cudaError_t(err), pGraph)
-{{endif}}
-
-{{if 'cudaGraphAddEmptyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddEmptyNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies):
-    """ Creates an empty node and adds it to a graph.
-
-    Creates a new node which performs no operation, and adds it to `graph`
-    with `numDependencies` dependencies specified via `pDependencies`. It
-    is possible for `numDependencies` to be 0, in which case the node will
-    be placed at the root of the graph. `pDependencies` may not have any
-    duplicate entries. A handle to the new node will be returned in
-    `pGraphNode`.
-
-    An empty node performs no operation during execution, but can be used
-    for transitive ordering. For example, a phased execution graph with 2
-    groups of n nodes with a barrier between them can be represented using
-    an empty node and 2*n dependency edges, rather than no empty node and
-    n^2 dependency edges.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphAddEmptyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphAddEventRecordNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, event):
-    """ Creates an event record node and adds it to a graph.
-
-    Creates a new event record node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and event
-    specified in `event`. It is possible for `numDependencies` to be 0, in
-    which case the node will be placed at the root of the graph.
-    `dependencies` may not have any duplicate entries. A handle to the new
-    node will be returned in `phGraphNode`.
-
-    Each launch of the graph will record `event` to capture execution of
-    the node's dependencies.
-
-    These nodes may not be used in loops or conditionals.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event for the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    phGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphAddEventRecordNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cyevent)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeGetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphEventRecordNodeGetEvent(node):
-    """ Returns the event associated with an event record node.
-
-    Returns the event of event record node `hNode` in `event_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the event for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    event_out : :py:obj:`~.cudaEvent_t`
-        Pointer to return the event
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaEvent_t event_out = cudaEvent_t()
-    err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._ptr)
-    return (cudaError_t(err), event_out)
-{{endif}}
-
-{{if 'cudaGraphEventRecordNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphEventRecordNodeSetEvent(node, event):
-    """ Sets an event record node's event.
-
-    Sets the event of event record node `hNode` to `event`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the event for
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to use
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddEventWaitNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, event):
-    """ Creates an event wait node and adds it to a graph.
-
-    Creates a new event wait node and adds it to `hGraph` with
-    `numDependencies` dependencies specified via `dependencies` and event
-    specified in `event`. It is possible for `numDependencies` to be 0, in
-    which case the node will be placed at the root of the graph.
-    `dependencies` may not have any duplicate entries. A handle to the new
-    node will be returned in `phGraphNode`.
-
-    The graph node will wait for all work captured in `event`. See
-    :py:obj:`~.cuEventRecord()` for details on what is captured by an
-    event. The synchronization will be performed efficiently on the device
-    when applicable. `event` may be from a different context or device than
-    the launch stream.
-
-    These nodes may not be used in loops or conditionals.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    dependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event for the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    phGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphAddEventWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cyevent)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeGetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphEventWaitNodeGetEvent(node):
-    """ Returns the event associated with an event wait node.
-
-    Returns the event of event wait node `hNode` in `event_out`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the event for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    event_out : :py:obj:`~.cudaEvent_t`
-        Pointer to return the event
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaEvent_t event_out = cudaEvent_t()
-    err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._ptr)
-    return (cudaError_t(err), event_out)
-{{endif}}
-
-{{if 'cudaGraphEventWaitNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphEventWaitNodeSetEvent(node, event):
-    """ Sets an event wait node's event.
-
-    Sets the event of event wait node `hNode` to `event`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the event for
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Event to use
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresSignalNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
-    """ Creates an external semaphore signal node and adds it to a graph.
-
-    Creates a new external semaphore signal node and adds it to `graph`
-    with `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `pGraphNode`.
-
-    Performs a signal operation on a set of externally allocated semaphore
-    objects when the node is launched. The operation(s) will occur after
-    all of the node's dependencies have completed.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
-        Parameters for the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode):
-    """ Returns an external semaphore signal node's parameters.
-
-    Returns the parameters of an external semaphore signal node `hNode` in
-    `params_out`. The `extSemArray` and `paramsArray` returned in
-    `params_out`, are owned by the node. This memory remains valid until
-    the node is destroyed or its parameters are modified, and should not be
-    modified directly. Use
-    :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update
-    the parameters of this node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    params_out : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cudaExternalSemaphoreSignalNodeParams params_out = cudaExternalSemaphoreSignalNodeParams()
-    err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreSignalNodeParams*>params_out._ptr)
-    return (cudaError_t(err), params_out)
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
-    """ Sets an external semaphore signal node's parameters.
-
-    Sets the parameters of an external semaphore signal node `hNode` to
-    `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddExternalSemaphoresWaitNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
-    """ Creates an external semaphore wait node and adds it to a graph.
-
-    Creates a new external semaphore wait node and adds it to `graph` with
-    `numDependencies` dependencies specified via `dependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `dependencies` may not have any duplicate entries. A
-    handle to the new node will be returned in `pGraphNode`.
-
-    Performs a wait operation on a set of externally allocated semaphore
-    objects when the node is launched. The node's dependencies will not be
-    launched until the wait operation has completed.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
-        Parameters for the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode):
-    """ Returns an external semaphore wait node's parameters.
-
-    Returns the parameters of an external semaphore wait node `hNode` in
-    `params_out`. The `extSemArray` and `paramsArray` returned in
-    `params_out`, are owned by the node. This memory remains valid until
-    the node is destroyed or its parameters are modified, and should not be
-    modified directly. Use
-    :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update
-    the parameters of this node.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    params_out : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cudaExternalSemaphoreWaitNodeParams params_out = cudaExternalSemaphoreWaitNodeParams()
-    err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreWaitNodeParams*>params_out._ptr)
-    return (cudaError_t(err), params_out)
-{{endif}}
-
-{{if 'cudaGraphExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
-    """ Sets an external semaphore wait node's parameters.
-
-    Sets the parameters of an external semaphore wait node `hNode` to
-    `nodeParams`.
-
-    Parameters
-    ----------
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddMemAllocNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaMemAllocNodeParams]):
-    """ Creates an allocation node and adds it to a graph.
-
-    Creates a new allocation node and adds it to `graph` with
-    `numDependencies` dependencies specified via `pDependencies` and
-    arguments specified in `nodeParams`. It is possible for
-    `numDependencies` to be 0, in which case the node will be placed at the
-    root of the graph. `pDependencies` may not have any duplicate entries.
-    A handle to the new node will be returned in `pGraphNode`.
-
-    When :py:obj:`~.cudaGraphAddMemAllocNode` creates an allocation node,
-    it returns the address of the allocation in `nodeParams.dptr`. The
-    allocation's address remains fixed across instantiations and launches.
-
-    If the allocation is freed in the same graph, by creating a free node
-    using :py:obj:`~.cudaGraphAddMemFreeNode`, the allocation can be
-    accessed by nodes ordered after the allocation node but before the free
-    node. These allocations cannot be freed outside the owning graph, and
-    they can only be freed once in the owning graph.
-
-    If the allocation is not freed in the same graph, then it can be
-    accessed not only by nodes in the graph which are ordered after the
-    allocation node, but also by stream operations ordered after the
-    graph's execution but before the allocation is freed.
-
-    Allocations which are not freed in the same graph can be freed by:
-
-    - passing the allocation to :py:obj:`~.cudaMemFreeAsync` or
-      :py:obj:`~.cudaMemFree`;
-
-    - launching a graph with a free node for that allocation; or
-
-    - specifying :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`
-      during instantiation, which makes each launch behave as though it
-      called :py:obj:`~.cudaMemFreeAsync` for every unfreed allocation.
-
-    It is not possible to free an allocation in both the owning graph and
-    another graph. If the allocation is freed in the same graph, a free
-    node cannot be added to another graph. If the allocation is freed in
-    another graph, a free node can no longer be added to the owning graph.
-
-    The following restrictions apply to graphs which contain allocation
-    and/or memory free nodes:
-
-    - Nodes and edges of the graph cannot be deleted.
-
-    - The graph cannot be used in a child node.
-
-    - Only one instantiation of the graph may exist at any point in time.
-
-    - The graph cannot be cloned.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.cudaMemAllocNodeParams`
-        Parameters for the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemAllocNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphAddMemAllocNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphMemAllocNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemAllocNodeGetParams(node):
-    """ Returns a memory alloc node's parameters.
-
-    Returns the parameters of a memory alloc node `hNode` in `params_out`.
-    The `poolProps` and `accessDescs` returned in `params_out`, are owned
-    by the node. This memory remains valid until the node is destroyed. The
-    returned parameters must not be modified.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    params_out : :py:obj:`~.cudaMemAllocNodeParams`
-        Pointer to return the parameters
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cudaMemAllocNodeParams params_out = cudaMemAllocNodeParams()
-    err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, <cyruntime.cudaMemAllocNodeParams*>params_out._ptr)
-    return (cudaError_t(err), params_out)
-{{endif}}
-
-{{if 'cudaGraphAddMemFreeNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, dptr):
-    """ Creates a memory free node and adds it to a graph.
-
-    Creates a new memory free node and adds it to `graph` with
-    `numDependencies` dependencies specified via `pDependencies` and
-    address specified in `dptr`. It is possible for `numDependencies` to be
-    0, in which case the node will be placed at the root of the graph.
-    `pDependencies` may not have any duplicate entries. A handle to the new
-    node will be returned in `pGraphNode`.
-
-    :py:obj:`~.cudaGraphAddMemFreeNode` will return
-    :py:obj:`~.cudaErrorInvalidValue` if the user attempts to free:
-
-    - an allocation twice in the same graph.
-
-    - an address that was not returned by an allocation node.
-
-    - an invalid address.
-
-    The following restrictions apply to graphs which contain allocation
-    and/or memory free nodes:
-
-    - Nodes and edges of the graph cannot be deleted.
-
-    - The graph cannot be used in a child node.
-
-    - Only one instantiation of the graph may exist at any point in time.
-
-    - The graph cannot be cloned.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    dptr : Any
-        Address of memory to free
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cydptr = utils.HelperInputVoidPtr(dptr)
-    cdef void* cydptr_ptr = <void*><void_ptr>cydptr.cptr
-    err = cyruntime.cudaGraphAddMemFreeNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cydptr_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphMemFreeNodeGetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphMemFreeNodeGetParams(node):
-    """ Returns a memory free node's parameters.
-
-    Returns the address of a memory free node `hNode` in `dptr_out`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to get the parameters for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    dptr_out : Any
-        Pointer to return the device address
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef void_ptr dptr_out = 0
-    cdef void* cydptr_out_ptr = <void*>&dptr_out
-    err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr)
-    return (cudaError_t(err), dptr_out)
-{{endif}}
-
-{{if 'cudaDeviceGraphMemTrim' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGraphMemTrim(int device):
-    """ Free unused memory that was cached on the specified device for use with graphs back to the OS.
-
-    Blocks which are not in use by a graph that is either currently
-    executing or scheduled to execute are freed back to the operating
-    system.
-
-    Parameters
-    ----------
-    device : int
-        The device for which cached memory should be freed.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
-    """
-    err = cyruntime.cudaDeviceGraphMemTrim(device)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaDeviceGetGraphMemAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceGetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType):
-    """ Query asynchronous allocation attributes related to graphs.
-
-    Valid attributes are:
-
-    - :py:obj:`~.cudaGraphMemAttrUsedMemCurrent`: Amount of memory, in
-      bytes, currently associated with graphs
-
-    - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in
-      bytes, associated with graphs since the last time it was reset. High
-      watermark can only be reset to zero.
-
-    - :py:obj:`~.cudaGraphMemAttrReservedMemCurrent`: Amount of memory, in
-      bytes, currently allocated for use by the CUDA graphs asynchronous
-      allocator.
-
-    - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of
-      memory, in bytes, currently allocated for use by the CUDA graphs
-      asynchronous allocator.
-
-    Parameters
-    ----------
-    device : int
-        Specifies the scope of the query
-    attr : :py:obj:`~.cudaGraphMemAttributeType`
-        attribute to get
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
-    value : Any
-        retrieved value
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
-    """
-    cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value
-    cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, 0, is_getter=True)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr)
-    return (cudaError_t(err), cyvalue.pyObj())
-{{endif}}
-
-{{if 'cudaDeviceSetGraphMemAttribute' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaDeviceSetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType, value):
-    """ Set asynchronous allocation attributes related to graphs.
-
-    Valid attributes are:
-
-    - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in
-      bytes, associated with graphs since the last time it was reset. High
-      watermark can only be reset to zero.
-
-    - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of
-      memory, in bytes, currently allocated for use by the CUDA graphs
-      asynchronous allocator.
-
-    Parameters
-    ----------
-    device : int
-        Specifies the scope of the query
-    attr : :py:obj:`~.cudaGraphMemAttributeType`
-        attribute to get
-    value : Any
-        pointer to value to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
-
-    See Also
-    --------
-    :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
-    """
-    cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value
-    cdef utils.HelperCUgraphMem_attribute cyvalue = utils.HelperCUgraphMem_attribute(attr, value, is_getter=False)
-    cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
-    err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphClone' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphClone(originalGraph):
-    """ Clones a graph.
-
-    This function creates a copy of `originalGraph` and returns it in
-    `pGraphClone`. All parameters are copied into the cloned graph. The
-    original graph may be modified after this call without affecting the
-    clone.
-
-    Child graph nodes in the original graph are recursively copied into the
-    clone.
-
-    Parameters
-    ----------
-    originalGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to clone
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
-    pGraphClone : :py:obj:`~.cudaGraph_t`
-        Returns newly created cloned graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeFindInClone`
-    """
-    cdef cyruntime.cudaGraph_t cyoriginalGraph
-    if originalGraph is None:
-        cyoriginalGraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(originalGraph, (cudaGraph_t,driver.CUgraph)):
-        poriginalGraph = int(originalGraph)
-        cyoriginalGraph = <cyruntime.cudaGraph_t><void_ptr>poriginalGraph
-    else:
-        poriginalGraph = int(cudaGraph_t(originalGraph))
-        cyoriginalGraph = <cyruntime.cudaGraph_t><void_ptr>poriginalGraph
-    cdef cudaGraph_t pGraphClone = cudaGraph_t()
-    err = cyruntime.cudaGraphClone(<cyruntime.cudaGraph_t*>pGraphClone._ptr, cyoriginalGraph)
-    return (cudaError_t(err), pGraphClone)
-{{endif}}
-
-{{if 'cudaGraphNodeFindInClone' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeFindInClone(originalNode, clonedGraph):
-    """ Finds a cloned version of a node.
-
-    This function returns the node in `clonedGraph` corresponding to
-    `originalNode` in the original graph.
-
-    `clonedGraph` must have been cloned from `originalGraph` via
-    :py:obj:`~.cudaGraphClone`. `originalNode` must have been in
-    `originalGraph` at the time of the call to :py:obj:`~.cudaGraphClone`,
-    and the corresponding cloned node in `clonedGraph` must not have been
-    removed. The cloned node is then returned via `pClonedNode`.
-
-    Parameters
-    ----------
-    originalNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Handle to the original node
-    clonedGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Cloned graph to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pNode : :py:obj:`~.cudaGraphNode_t`
-        Returns handle to the cloned node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphClone`
-    """
-    cdef cyruntime.cudaGraph_t cyclonedGraph
-    if clonedGraph is None:
-        cyclonedGraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(clonedGraph, (cudaGraph_t,driver.CUgraph)):
-        pclonedGraph = int(clonedGraph)
-        cyclonedGraph = <cyruntime.cudaGraph_t><void_ptr>pclonedGraph
-    else:
-        pclonedGraph = int(cudaGraph_t(clonedGraph))
-        cyclonedGraph = <cyruntime.cudaGraph_t><void_ptr>pclonedGraph
-    cdef cyruntime.cudaGraphNode_t cyoriginalNode
-    if originalNode is None:
-        cyoriginalNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(originalNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        poriginalNode = int(originalNode)
-        cyoriginalNode = <cyruntime.cudaGraphNode_t><void_ptr>poriginalNode
-    else:
-        poriginalNode = int(cudaGraphNode_t(originalNode))
-        cyoriginalNode = <cyruntime.cudaGraphNode_t><void_ptr>poriginalNode
-    cdef cudaGraphNode_t pNode = cudaGraphNode_t()
-    err = cyruntime.cudaGraphNodeFindInClone(<cyruntime.cudaGraphNode_t*>pNode._ptr, cyoriginalNode, cyclonedGraph)
-    return (cudaError_t(err), pNode)
-{{endif}}
-
-{{if 'cudaGraphNodeGetType' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetType(node):
-    """ Returns a node's type.
-
-    Returns the node type of `node` in `pType`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pType : :py:obj:`~.cudaGraphNodeType`
-        Pointer to return the node type
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNodeType pType
-    err = cyruntime.cudaGraphNodeGetType(cynode, &pType)
-    return (cudaError_t(err), cudaGraphNodeType(pType))
-{{endif}}
-
-{{if 'cudaGraphGetNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphGetNodes(graph, size_t numNodes = 0):
-    """ Returns a graph's nodes.
-
-    Returns a list of `graph's` nodes. `nodes` may be NULL, in which case
-    this function will return the number of nodes in `numNodes`. Otherwise,
-    `numNodes` entries will be filled in. If `numNodes` is higher than the
-    actual number of nodes, the remaining entries in `nodes` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `numNodes`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to query
-    numNodes : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    nodes : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the nodes
-    numNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numNodes
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cynodes = NULL
-    pynodes = []
-    if _graph_length != 0:
-        cynodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cynodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes)
-    if cudaError_t(err) == cudaError_t(0):
-        pynodes = [cudaGraphNode_t(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]
-    if cynodes is not NULL:
-        free(cynodes)
-    return (cudaError_t(err), pynodes, numNodes)
-{{endif}}
-
-{{if 'cudaGraphGetRootNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0):
-    """ Returns a graph's root nodes.
-
-    Returns a list of `graph's` root nodes. `pRootNodes` may be NULL, in
-    which case this function will return the number of root nodes in
-    `pNumRootNodes`. Otherwise, `pNumRootNodes` entries will be filled in.
-    If `pNumRootNodes` is higher than the actual number of root nodes, the
-    remaining entries in `pRootNodes` will be set to NULL, and the number
-    of nodes actually obtained will be returned in `pNumRootNodes`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to query
-    pNumRootNodes : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pRootNodes : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the root nodes
-    pNumRootNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = pNumRootNodes
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cypRootNodes = NULL
-    pypRootNodes = []
-    if _graph_length != 0:
-        cypRootNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cypRootNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes)
-    if cudaError_t(err) == cudaError_t(0):
-        pypRootNodes = [cudaGraphNode_t(init_value=<void_ptr>cypRootNodes[idx]) for idx in range(_graph_length)]
-    if cypRootNodes is not NULL:
-        free(cypRootNodes)
-    return (cudaError_t(err), pypRootNodes, pNumRootNodes)
-{{endif}}
-
-{{if 'cudaGraphGetEdges' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphGetEdges(graph, size_t numEdges = 0):
-    """ Returns a graph's dependency edges.
-
-    Returns a list of `graph's` dependency edges. Edges are returned via
-    corresponding indices in `from` and `to`; that is, the node in `to`[i]
-    has a dependency on the node in `from`[i]. `from` and `to` may both be
-    NULL, in which case this function only returns the number of edges in
-    `numEdges`. Otherwise, `numEdges` entries will be filled in. If
-    `numEdges` is higher than the actual number of edges, the remaining
-    entries in `from` and `to` will be set to NULL, and the number of edges
-    actually returned will be written to `numEdges`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to get the edges from
-    numEdges : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Location to return edge endpoints
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Location to return edge endpoints
-    numEdges : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numEdges
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    pyfrom_ = []
-    if _graph_length != 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    pyto = []
-    if _graph_length != 0:
-        cyto = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, &numEdges)
-    if cudaError_t(err) == cudaError_t(0):
-        pyfrom_ = [cudaGraphNode_t(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cudaError_t(err) == cudaError_t(0):
-        pyto = [cudaGraphNode_t(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
-    if cyto is not NULL:
-        free(cyto)
-    return (cudaError_t(err), pyfrom_, pyto, numEdges)
-{{endif}}
-
-{{if 'cudaGraphGetEdges_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphGetEdges_v2(graph, size_t numEdges = 0):
-    """ Returns a graph's dependency edges (12.3+)
-
-    Returns a list of `graph's` dependency edges. Edges are returned via
-    corresponding indices in `from`, `to` and `edgeData`; that is, the node
-    in `to`[i] has a dependency on the node in `from`[i] with data
-    `edgeData`[i]. `from` and `to` may both be NULL, in which case this
-    function only returns the number of edges in `numEdges`. Otherwise,
-    `numEdges` entries will be filled in. If `numEdges` is higher than the
-    actual number of edges, the remaining entries in `from` and `to` will
-    be set to NULL, and the number of edges actually returned will be
-    written to `numEdges`. `edgeData` may alone be NULL, in which case the
-    edges must all have default (zeroed) edge data. Attempting a losst
-    query via NULL `edgeData` will result in
-    :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL then `from`
-    and `to` must be as well.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to get the edges from
-    numEdges : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Location to return edge endpoints
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Location to return edge endpoints
-    edgeData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional location to return edge data
-    numEdges : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    cdef size_t _graph_length = numEdges
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    pyfrom_ = []
-    if _graph_length != 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    pyto = []
-    if _graph_length != 0:
-        cyto = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-    err = cyruntime.cudaGraphGetEdges_v2(cygraph, cyfrom_, cyto, cyedgeData, &numEdges)
-    if cudaError_t(err) == cudaError_t(0):
-        pyfrom_ = [cudaGraphNode_t(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cudaError_t(err) == cudaError_t(0):
-        pyto = [cudaGraphNode_t(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
-    if cyto is not NULL:
-        free(cyto)
-    if cudaError_t(err) == cudaError_t(0):
-        pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (cudaError_t(err), pyfrom_, pyto, pyedgeData, numEdges)
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0):
-    """ Returns a node's dependencies.
-
-    Returns a list of `node's` dependencies. `pDependencies` may be NULL,
-    in which case this function will return the number of dependencies in
-    `pNumDependencies`. Otherwise, `pNumDependencies` entries will be
-    filled in. If `pNumDependencies` is higher than the actual number of
-    dependencies, the remaining entries in `pDependencies` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `pNumDependencies`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    pNumDependencies : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the dependencies
-    pNumDependencies : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeGetDependentNodes`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = pNumDependencies
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    pypDependencies = []
-    if _graph_length != 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, &pNumDependencies)
-    if cudaError_t(err) == cudaError_t(0):
-        pypDependencies = [cudaGraphNode_t(init_value=<void_ptr>cypDependencies[idx]) for idx in range(_graph_length)]
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pypDependencies, pNumDependencies)
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetDependencies_v2(node, size_t pNumDependencies = 0):
-    """ Returns a node's dependencies (12.3+)
-
-    Returns a list of `node's` dependencies. `pDependencies` may be NULL,
-    in which case this function will return the number of dependencies in
-    `pNumDependencies`. Otherwise, `pNumDependencies` entries will be
-    filled in. If `pNumDependencies` is higher than the actual number of
-    dependencies, the remaining entries in `pDependencies` will be set to
-    NULL, and the number of nodes actually obtained will be returned in
-    `pNumDependencies`.
-
-    Note that if an edge has non-zero (non-default) edge data and
-    `edgeData` is NULL, this API will return
-    :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then
-    `pDependencies` must be as well.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    pNumDependencies : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the dependencies
-    edgeData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional array to return edge data for each dependency
-    pNumDependencies : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeGetDependentNodes`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = pNumDependencies
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    pypDependencies = []
-    if _graph_length != 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-    err = cyruntime.cudaGraphNodeGetDependencies_v2(cynode, cypDependencies, cyedgeData, &pNumDependencies)
-    if cudaError_t(err) == cudaError_t(0):
-        pypDependencies = [cudaGraphNode_t(init_value=<void_ptr>cypDependencies[idx]) for idx in range(_graph_length)]
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    if cudaError_t(err) == cudaError_t(0):
-        pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (cudaError_t(err), pypDependencies, pyedgeData, pNumDependencies)
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0):
-    """ Returns a node's dependent nodes.
-
-    Returns a list of `node's` dependent nodes. `pDependentNodes` may be
-    NULL, in which case this function will return the number of dependent
-    nodes in `pNumDependentNodes`. Otherwise, `pNumDependentNodes` entries
-    will be filled in. If `pNumDependentNodes` is higher than the actual
-    number of dependent nodes, the remaining entries in `pDependentNodes`
-    will be set to NULL, and the number of nodes actually obtained will be
-    returned in `pNumDependentNodes`.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    pNumDependentNodes : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pDependentNodes : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the dependent nodes
-    pNumDependentNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = pNumDependentNodes
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL
-    pypDependentNodes = []
-    if _graph_length != 0:
-        cypDependentNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependentNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, &pNumDependentNodes)
-    if cudaError_t(err) == cudaError_t(0):
-        pypDependentNodes = [cudaGraphNode_t(init_value=<void_ptr>cypDependentNodes[idx]) for idx in range(_graph_length)]
-    if cypDependentNodes is not NULL:
-        free(cypDependentNodes)
-    return (cudaError_t(err), pypDependentNodes, pNumDependentNodes)
-{{endif}}
-
-{{if 'cudaGraphNodeGetDependentNodes_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetDependentNodes_v2(node, size_t pNumDependentNodes = 0):
-    """ Returns a node's dependent nodes (12.3+)
-
-    Returns a list of `node's` dependent nodes. `pDependentNodes` may be
-    NULL, in which case this function will return the number of dependent
-    nodes in `pNumDependentNodes`. Otherwise, `pNumDependentNodes` entries
-    will be filled in. If `pNumDependentNodes` is higher than the actual
-    number of dependent nodes, the remaining entries in `pDependentNodes`
-    will be set to NULL, and the number of nodes actually obtained will be
-    returned in `pNumDependentNodes`.
-
-    Note that if an edge has non-zero (non-default) edge data and
-    `edgeData` is NULL, this API will return
-    :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then
-    `pDependentNodes` must be as well.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to query
-    pNumDependentNodes : int
-        See description
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
-    pDependentNodes : List[:py:obj:`~.cudaGraphNode_t`]
-        Pointer to return the dependent nodes
-    edgeData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional pointer to return edge data for dependent nodes
-    pNumDependentNodes : int
-        See description
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
-    """
-    cdef size_t _graph_length = pNumDependentNodes
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL
-    pypDependentNodes = []
-    if _graph_length != 0:
-        cypDependentNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependentNodes is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-    cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
-    pyedgeData = []
-    if _graph_length != 0:
-        cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-    err = cyruntime.cudaGraphNodeGetDependentNodes_v2(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes)
-    if cudaError_t(err) == cudaError_t(0):
-        pypDependentNodes = [cudaGraphNode_t(init_value=<void_ptr>cypDependentNodes[idx]) for idx in range(_graph_length)]
-    if cypDependentNodes is not NULL:
-        free(cypDependentNodes)
-    if cudaError_t(err) == cudaError_t(0):
-        pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (cudaError_t(err), pypDependentNodes, pyedgeData, pNumDependentNodes)
-{{endif}}
-
-{{if 'cudaGraphAddDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddDependencies(graph, from_ : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], to : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies):
-    """ Adds dependency edges to a graph.
-
-    The number of dependencies to be added is defined by `numDependencies`
-    Elements in `pFrom` and `pTo` at corresponding indices define a
-    dependency. Each node in `pFrom` and `pTo` must belong to `graph`.
-
-    If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
-    ignored. Specifying an existing dependency will return an error.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which dependencies are added
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of dependent nodes
-    numDependencies : size_t
-        Number of dependencies to be added
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    to = [] if to is None else to
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._ptr[0]
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    if len(to) > 0:
-        cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._ptr[0]
-    if numDependencies > <size_t>len(from_): raise RuntimeError("List is too small: " + str(len(from_)) + " < " + str(numDependencies))
-    if numDependencies > <size_t>len(to): raise RuntimeError("List is too small: " + str(len(to)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphAddDependencies(cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._ptr if len(to) == 1 else cyto, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddDependencies_v2(graph, from_ : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], to : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], edgeData : Optional[Tuple[cudaGraphEdgeData] | List[cudaGraphEdgeData]], size_t numDependencies):
-    """ Adds dependency edges to a graph. (12.3+)
-
-    The number of dependencies to be added is defined by `numDependencies`
-    Elements in `pFrom` and `pTo` at corresponding indices define a
-    dependency. Each node in `pFrom` and `pTo` must belong to `graph`.
-
-    If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
-    ignored. Specifying an existing dependency will return an error.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which dependencies are added
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of dependent nodes
-    edgeData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional array of edge data. If NULL, default (zeroed) edge data is
-        assumed.
-    numDependencies : size_t
-        Number of dependencies to be added
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    edgeData = [] if edgeData is None else edgeData
-    if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):
-        raise TypeError("Argument 'edgeData' is not instance of type (expected Tuple[cyruntime.cudaGraphEdgeData,] or List[cyruntime.cudaGraphEdgeData,]")
-    to = [] if to is None else to
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._ptr[0]
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    if len(to) > 0:
-        cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._ptr[0]
-    cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
-    if len(edgeData) > 0:
-        cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-        for idx in range(len(edgeData)):
-            string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._ptr, sizeof(cyruntime.cudaGraphEdgeData))
-    err = cyruntime.cudaGraphAddDependencies_v2(cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._ptr if len(to) == 1 else cyto, (<cudaGraphEdgeData>edgeData[0])._ptr if len(edgeData) == 1 else cyedgeData, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphRemoveDependencies(graph, from_ : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], to : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies):
-    """ Removes dependency edges from a graph.
-
-    The number of `pDependencies` to be removed is defined by
-    `numDependencies`. Elements in `pFrom` and `pTo` at corresponding
-    indices define a dependency. Each node in `pFrom` and `pTo` must belong
-    to `graph`.
-
-    If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
-    ignored. Specifying a non-existing dependency will return an error.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph from which to remove dependencies
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of dependent nodes
-    numDependencies : size_t
-        Number of dependencies to be removed
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    to = [] if to is None else to
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._ptr[0]
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    if len(to) > 0:
-        cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._ptr[0]
-    if numDependencies > <size_t>len(from_): raise RuntimeError("List is too small: " + str(len(from_)) + " < " + str(numDependencies))
-    if numDependencies > <size_t>len(to): raise RuntimeError("List is too small: " + str(len(to)) + " < " + str(numDependencies))
-    err = cyruntime.cudaGraphRemoveDependencies(cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._ptr if len(to) == 1 else cyto, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphRemoveDependencies_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphRemoveDependencies_v2(graph, from_ : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], to : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], edgeData : Optional[Tuple[cudaGraphEdgeData] | List[cudaGraphEdgeData]], size_t numDependencies):
-    """ Removes dependency edges from a graph. (12.3+)
-
-    The number of `pDependencies` to be removed is defined by
-    `numDependencies`. Elements in `pFrom` and `pTo` at corresponding
-    indices define a dependency. Each node in `pFrom` and `pTo` must belong
-    to `graph`.
-
-    If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
-    ignored. Specifying an edge that does not exist in the graph, with data
-    matching `edgeData`, results in an error. `edgeData` is nullable, which
-    is equivalent to passing default (zeroed) data for each edge.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph from which to remove dependencies
-    from : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of nodes that provide the dependencies
-    to : List[:py:obj:`~.cudaGraphNode_t`]
-        Array of dependent nodes
-    edgeData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional array of edge data. If NULL, edge data is assumed to be
-        default (zeroed).
-    numDependencies : size_t
-        Number of dependencies to be removed
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
-    """
-    edgeData = [] if edgeData is None else edgeData
-    if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):
-        raise TypeError("Argument 'edgeData' is not instance of type (expected Tuple[cyruntime.cudaGraphEdgeData,] or List[cyruntime.cudaGraphEdgeData,]")
-    to = [] if to is None else to
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
-        raise TypeError("Argument 'to' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    from_ = [] if from_ is None else from_
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
-        raise TypeError("Argument 'from_' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
-    if len(from_) > 0:
-        cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
-        if cyfrom_ is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(from_)):
-                cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._ptr[0]
-    cdef cyruntime.cudaGraphNode_t* cyto = NULL
-    if len(to) > 0:
-        cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
-        if cyto is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(to)):
-                cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._ptr[0]
-    cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
-    if len(edgeData) > 0:
-        cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))
-        if cyedgeData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-        for idx in range(len(edgeData)):
-            string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._ptr, sizeof(cyruntime.cudaGraphEdgeData))
-    err = cyruntime.cudaGraphRemoveDependencies_v2(cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._ptr if len(from_) == 1 else cyfrom_, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._ptr if len(to) == 1 else cyto, (<cudaGraphEdgeData>edgeData[0])._ptr if len(edgeData) == 1 else cyedgeData, numDependencies)
-    if cyfrom_ is not NULL:
-        free(cyfrom_)
-    if cyto is not NULL:
-        free(cyto)
-    if cyedgeData is not NULL:
-        free(cyedgeData)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphDestroyNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphDestroyNode(node):
-    """ Remove a node from the graph.
-
-    Removes `node` from its graph. This operation also severs any
-    dependencies of other nodes on `node` and vice versa.
-
-    Dependencies cannot be removed from graphs which contain allocation or
-    free nodes. Any attempt to do so will return an error.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to remove
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    err = cyruntime.cudaGraphDestroyNode(cynode)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphInstantiate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphInstantiate(graph, unsigned long long flags):
-    """ Creates an executable graph from a graph.
-
-    Instantiates `graph` as an executable graph. The graph is validated for
-    any structural constraints or intra-node constraints which were not
-    previously validated. If instantiation is successful, a handle to the
-    instantiated graph is returned in `pGraphExec`.
-
-    The `flags` parameter controls the behavior of instantiation and
-    subsequent graph launches. Valid flags are:
-
-    - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
-      configures a graph containing memory allocation nodes to
-      automatically free any unfreed memory allocations before the graph is
-      relaunched.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
-      the graph for launch from the device. If this flag is passed, the
-      executable graph handle returned can be used to launch the graph from
-      both the host and device. This flag cannot be used in conjunction
-      with :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
-      graph to use the priorities from the per-node attributes rather than
-      the priority of the launch stream during execution. Note that
-      priorities are only available on kernel nodes, and are copied from
-      stream priority during stream capture.
-
-    If `graph` contains any allocation or free nodes, there can be at most
-    one executable graph in existence for that graph at a time. An attempt
-    to instantiate a second executable graph before destroying the first
-    with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
-    also applies if `graph` contains any device-updatable kernel nodes.
-
-    Graphs instantiated for launch on the device have additional
-    restrictions which do not apply to host graphs:
-
-    - The graph's nodes must reside on a single device.
-
-    - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
-      and child graph nodes.
-
-    - The graph cannot be empty and must contain at least one kernel,
-      memcpy, or memset node. Operation-specific restrictions are outlined
-      below.
-
-    - Kernel nodes:
-
-      - Use of CUDA Dynamic Parallelism is not permitted.
-
-      - Cooperative launches are permitted as long as MPS is not in use.
-
-    - Memcpy nodes:
-
-      - Only copies involving device memory and/or pinned device-mapped
-        host memory are permitted.
-
-      - Copies involving CUDA arrays are not permitted.
-
-      - Both operands must be accessible from the current device, and the
-        current device must match the device of other nodes in the graph.
-
-    If `graph` is not instantiated for launch on the device but contains
-    kernels which call device-side :py:obj:`~.cudaGraphLaunch()` from
-    multiple devices, this will result in an error.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to instantiate
-    flags : unsigned long long
-        Flags to control instantiation. See
-        :py:obj:`~.CUgraphInstantiate_flags`.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphExec : :py:obj:`~.cudaGraphExec_t`
-        Returns instantiated graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
-    err = cyruntime.cudaGraphInstantiate(<cyruntime.cudaGraphExec_t*>pGraphExec._ptr, cygraph, flags)
-    return (cudaError_t(err), pGraphExec)
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphInstantiateWithFlags(graph, unsigned long long flags):
-    """ Creates an executable graph from a graph.
-
-    Instantiates `graph` as an executable graph. The graph is validated for
-    any structural constraints or intra-node constraints which were not
-    previously validated. If instantiation is successful, a handle to the
-    instantiated graph is returned in `pGraphExec`.
-
-    The `flags` parameter controls the behavior of instantiation and
-    subsequent graph launches. Valid flags are:
-
-    - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
-      configures a graph containing memory allocation nodes to
-      automatically free any unfreed memory allocations before the graph is
-      relaunched.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
-      the graph for launch from the device. If this flag is passed, the
-      executable graph handle returned can be used to launch the graph from
-      both the host and device. This flag can only be used on platforms
-      which support unified addressing. This flag cannot be used in
-      conjunction with
-      :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
-      graph to use the priorities from the per-node attributes rather than
-      the priority of the launch stream during execution. Note that
-      priorities are only available on kernel nodes, and are copied from
-      stream priority during stream capture.
-
-    If `graph` contains any allocation or free nodes, there can be at most
-    one executable graph in existence for that graph at a time. An attempt
-    to instantiate a second executable graph before destroying the first
-    with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
-    also applies if `graph` contains any device-updatable kernel nodes.
-
-    If `graph` contains kernels which call device-side
-    :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result
-    in an error.
-
-    Graphs instantiated for launch on the device have additional
-    restrictions which do not apply to host graphs:
-
-    - The graph's nodes must reside on a single device.
-
-    - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
-      and child graph nodes.
-
-    - The graph cannot be empty and must contain at least one kernel,
-      memcpy, or memset node. Operation-specific restrictions are outlined
-      below.
-
-    - Kernel nodes:
-
-      - Use of CUDA Dynamic Parallelism is not permitted.
-
-      - Cooperative launches are permitted as long as MPS is not in use.
-
-    - Memcpy nodes:
-
-      - Only copies involving device memory and/or pinned device-mapped
-        host memory are permitted.
-
-      - Copies involving CUDA arrays are not permitted.
-
-      - Both operands must be accessible from the current device, and the
-        current device must match the device of other nodes in the graph.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to instantiate
-    flags : unsigned long long
-        Flags to control instantiation. See
-        :py:obj:`~.CUgraphInstantiate_flags`.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphExec : :py:obj:`~.cudaGraphExec_t`
-        Returns instantiated graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
-    err = cyruntime.cudaGraphInstantiateWithFlags(<cyruntime.cudaGraphExec_t*>pGraphExec._ptr, cygraph, flags)
-    return (cudaError_t(err), pGraphExec)
-{{endif}}
-
-{{if 'cudaGraphInstantiateWithParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraphInstantiateParams]):
-    """ Creates an executable graph from a graph.
-
-    Instantiates `graph` as an executable graph according to the
-    `instantiateParams` structure. The graph is validated for any
-    structural constraints or intra-node constraints which were not
-    previously validated. If instantiation is successful, a handle to the
-    instantiated graph is returned in `pGraphExec`.
-
-    `instantiateParams` controls the behavior of instantiation and
-    subsequent graph launches, as well as returning more detailed
-    information in the event of an error.
-    :py:obj:`~.cudaGraphInstantiateParams` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    The `flags` field controls the behavior of instantiation and subsequent
-    graph launches. Valid flags are:
-
-    - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
-      configures a graph containing memory allocation nodes to
-      automatically free any unfreed memory allocations before the graph is
-      relaunched.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagUpload`, which will perform an
-      upload of the graph into `uploadStream` once the graph has been
-      instantiated.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
-      the graph for launch from the device. If this flag is passed, the
-      executable graph handle returned can be used to launch the graph from
-      both the host and device. This flag can only be used on platforms
-      which support unified addressing. This flag cannot be used in
-      conjunction with
-      :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
-
-    - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
-      graph to use the priorities from the per-node attributes rather than
-      the priority of the launch stream during execution. Note that
-      priorities are only available on kernel nodes, and are copied from
-      stream priority during stream capture.
-
-    If `graph` contains any allocation or free nodes, there can be at most
-    one executable graph in existence for that graph at a time. An attempt
-    to instantiate a second executable graph before destroying the first
-    with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
-    also applies if `graph` contains any device-updatable kernel nodes.
-
-    If `graph` contains kernels which call device-side
-    :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result
-    in an error.
-
-    Graphs instantiated for launch on the device have additional
-    restrictions which do not apply to host graphs:
-
-    - The graph's nodes must reside on a single device.
-
-    - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
-      and child graph nodes.
-
-    - The graph cannot be empty and must contain at least one kernel,
-      memcpy, or memset node. Operation-specific restrictions are outlined
-      below.
-
-    - Kernel nodes:
-
-      - Use of CUDA Dynamic Parallelism is not permitted.
-
-      - Cooperative launches are permitted as long as MPS is not in use.
-
-    - Memcpy nodes:
-
-      - Only copies involving device memory and/or pinned device-mapped
-        host memory are permitted.
-
-      - Copies involving CUDA arrays are not permitted.
-
-      - Both operands must be accessible from the current device, and the
-        current device must match the device of other nodes in the graph.
-
-    In the event of an error, the `result_out` and `errNode_out` fields
-    will contain more information about the nature of the error. Possible
-    error reporting includes:
-
-    - :py:obj:`~.cudaGraphInstantiateError`, if passed an invalid value or
-      if an unexpected error occurred which is described by the return
-      value of the function. `errNode_out` will be set to NULL.
-
-    - :py:obj:`~.cudaGraphInstantiateInvalidStructure`, if the graph
-      structure is invalid. `errNode_out` will be set to one of the
-      offending nodes.
-
-    - :py:obj:`~.cudaGraphInstantiateNodeOperationNotSupported`, if the
-      graph is instantiated for device launch but contains a node of an
-      unsupported node type, or a node which performs unsupported
-      operations, such as use of CUDA dynamic parallelism within a kernel
-      node. `errNode_out` will be set to this node.
-
-    - :py:obj:`~.cudaGraphInstantiateMultipleDevicesNotSupported`, if the
-      graph is instantiated for device launch but a node’s device differs
-      from that of another node. This error can also be returned if a graph
-      is not instantiated for device launch and it contains kernels which
-      call device-side :py:obj:`~.cudaGraphLaunch()` from multiple devices.
-      `errNode_out` will be set to this node.
-
-    If instantiation is successful, `result_out` will be set to
-    :py:obj:`~.cudaGraphInstantiateSuccess`, and `hErrNode_out` will be set
-    to NULL.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to instantiate
-    instantiateParams : :py:obj:`~.cudaGraphInstantiateParams`
-        Instantiation parameters
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    pGraphExec : :py:obj:`~.cudaGraphExec_t`
-        Returns instantiated graph
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphExecDestroy`
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
-    cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr = instantiateParams._ptr if instantiateParams != None else NULL
-    err = cyruntime.cudaGraphInstantiateWithParams(<cyruntime.cudaGraphExec_t*>pGraphExec._ptr, cygraph, cyinstantiateParams_ptr)
-    return (cudaError_t(err), pGraphExec)
-{{endif}}
-
-{{if 'cudaGraphExecGetFlags' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecGetFlags(graphExec):
-    """ Query the instantiation flags of an executable graph.
-
-    Returns the flags that were passed to instantiation for the given
-    executable graph. :py:obj:`~.cudaGraphInstantiateFlagUpload` will not
-    be returned by this API as it does not affect the resulting executable
-    graph.
-
-    Parameters
-    ----------
-    graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph to query
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    flags : unsigned long long
-        Returns the instantiation flags
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphInstantiateWithParams`
-    """
-    cdef cyruntime.cudaGraphExec_t cygraphExec
-    if graphExec is None:
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        pgraphExec = int(graphExec)
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    else:
-        pgraphExec = int(cudaGraphExec_t(graphExec))
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    cdef unsigned long long flags = 0
-    err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags)
-    return (cudaError_t(err), flags)
-{{endif}}
-
-{{if 'cudaGraphExecKernelNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaKernelNodeParams]):
-    """ Sets the parameters for a kernel node in the given graphExec.
-
-    Sets the parameters of a kernel node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `node`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    `node` must not have been removed from the original graph. All
-    `nodeParams` fields may change, but the following restrictions apply to
-    `func` updates:
-
-    - The owning device of the function cannot change.
-
-    - A node whose function originally did not use CUDA dynamic parallelism
-      cannot be updated to a function which uses CDP
-
-    - A node whose function originally did not make device-side update
-      calls cannot be updated to a function which makes device-side update
-      calls.
-
-    - If `hGraphExec` was not instantiated for device launch, a node whose
-      function originally did not use device-side
-      :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which
-      uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node
-      resides on the same device as nodes which contained such calls at
-      instantiate-time. If no such calls were present at instantiation,
-      these updates cannot be performed at all.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    If `node` is a device-updatable kernel node, the next upload/launch of
-    `hGraphExec` will overwrite any previous device-side updates.
-    Additionally, applying host updates to a device-updatable kernel node
-    while it is being updated from the device will result in undefined
-    behavior.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        kernel node from the graph from which graphExec was instantiated
-    pNodeParams : :py:obj:`~.cudaKernelNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemcpy3DParms]):
-    """ Sets the parameters for a memcpy node in the given graphExec.
-
-    Updates the work represented by `node` in `hGraphExec` as though `node`
-    had contained `pNodeParams` at instantiation. `node` must remain in the
-    graph which was used to instantiate `hGraphExec`. Changed edges to and
-    from `node` are ignored.
-
-    The source and destination memory in `pNodeParams` must be allocated
-    from the same contexts as the original source and destination memory.
-    Both the instantiation-time memory operands and the memory operands in
-    `pNodeParams` must be 1-dimensional. Zero-length operations are not
-    supported.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'
-    mappings changed or either the original or new memory operands are
-    multidimensional.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Memcpy node from the graph which was used to instantiate graphExec
-    pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecMemcpyNodeSetParams1D' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, size_t count, kind not None : cudaMemcpyKind):
-    """ Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional copy.
-
-    Updates the work represented by `node` in `hGraphExec` as though `node`
-    had contained the given params at instantiation. `node` must remain in
-    the graph which was used to instantiate `hGraphExec`. Changed edges to
-    and from `node` are ignored.
-
-    `src` and `dst` must be allocated from the same contexts as the
-    original source and destination memory. The instantiation-time memory
-    operands must be 1-dimensional. Zero-length operations are not
-    supported.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'
-    mappings changed or the original memory operands are multidimensional.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Memcpy node from the graph which was used to instantiate graphExec
-    dst : Any
-        Destination memory address
-    src : Any
-        Source memory address
-    count : size_t
-        Size in bytes to copy
-    kind : :py:obj:`~.cudaMemcpyKind`
-        Type of transfer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cydst = utils.HelperInputVoidPtr(dst)
-    cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
-    cysrc = utils.HelperInputVoidPtr(src)
-    cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
-    cdef cyruntime.cudaMemcpyKind cykind = kind.value
-    err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst_ptr, cysrc_ptr, count, cykind)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecMemsetNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemsetParams]):
-    """ Sets the parameters for a memset node in the given graphExec.
-
-    Updates the work represented by `node` in `hGraphExec` as though `node`
-    had contained `pNodeParams` at instantiation. `node` must remain in the
-    graph which was used to instantiate `hGraphExec`. Changed edges to and
-    from `node` are ignored.
-
-    Zero sized operations are not supported.
-
-    The new destination pointer in `pNodeParams` must be to the same kind
-    of allocation as the original destination pointer and have the same
-    context association and device mapping as the original destination
-    pointer.
-
-    Both the value and pointer address may be updated.   Changing other
-    aspects of the memset (width, height, element size or pitch) may cause
-    the update to be rejected. Specifically, for 2d memsets, all dimension
-    changes are rejected. For 1d memsets, changes in height are explicitly
-    rejected and other changes are oportunistically allowed if the
-    resulting work maps onto the work resources already allocated for the
-    node.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Memset node from the graph which was used to instantiate graphExec
-    pNodeParams : :py:obj:`~.cudaMemsetParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecHostNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaHostNodeParams]):
-    """ Sets the parameters for a host node in the given graphExec.
-
-    Updates the work represented by `node` in `hGraphExec` as though `node`
-    had contained `pNodeParams` at instantiation. `node` must remain in the
-    graph which was used to instantiate `hGraphExec`. Changed edges to and
-    from `node` are ignored.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Host node from the graph which was used to instantiate graphExec
-    pNodeParams : :py:obj:`~.cudaHostNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._ptr if pNodeParams != None else NULL
-    err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecChildGraphNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph):
-    """ Updates node parameters in the child graph node in the given graphExec.
-
-    Updates the work represented by `node` in `hGraphExec` as though the
-    nodes contained in `node's` graph had the parameters contained in
-    `childGraph's` nodes at instantiation. `node` must remain in the graph
-    which was used to instantiate `hGraphExec`. Changed edges to and from
-    `node` are ignored.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    The topology of `childGraph`, as well as the node insertion order, must
-    match that of the graph contained in `node`. See
-    :py:obj:`~.cudaGraphExecUpdate()` for a list of restrictions on what
-    can be updated in an instantiated graph. The update is recursive, so
-    child graph nodes contained within the top level child graph will also
-    be updated.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Host node from the graph which was used to instantiate graphExec
-    childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph supplying the updated parameters
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraph_t cychildGraph
-    if childGraph is None:
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):
-        pchildGraph = int(childGraph)
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
-    else:
-        pchildGraph = int(cudaGraph_t(childGraph))
-        cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecEventRecordNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event):
-    """ Sets the event for an event record node in the given graphExec.
-
-    Sets the event of an event record node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Event record node from the graph from which graphExec was
-        instantiated
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Updated event to use
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecEventWaitNodeSetEvent' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event):
-    """ Sets the event for an event wait node in the given graphExec.
-
-    Sets the event of an event wait node in an executable graph
-    `hGraphExec`. The node is identified by the corresponding node `hNode`
-    in the non-executable graph, from which the executable graph was
-    instantiated.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Event wait node from the graph from which graphExec was
-        instantiated
-    event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
-        Updated event to use
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaEvent_t cyevent
-    if event is None:
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>0
-    elif isinstance(event, (cudaEvent_t,driver.CUevent)):
-        pevent = int(event)
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    else:
-        pevent = int(cudaEvent_t(event))
-        cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresSignalNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
-    """ Sets the parameters for an external semaphore signal node in the given graphExec.
-
-    Sets the parameters of an external semaphore signal node in an
-    executable graph `hGraphExec`. The node is identified by the
-    corresponding node `hNode` in the non-executable graph, from which the
-    executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Changing `nodeParams->numExtSems` is not supported.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        semaphore signal node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecExternalSemaphoresWaitNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
-    """ Sets the parameters for an external semaphore wait node in the given graphExec.
-
-    Sets the parameters of an external semaphore wait node in an executable
-    graph `hGraphExec`. The node is identified by the corresponding node
-    `hNode` in the non-executable graph, from which the executable graph
-    was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Changing `nodeParams->numExtSems` is not supported.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        semaphore wait node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphNodeSetEnabled' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled):
-    """ Enables or disables the specified node in the given graphExec.
-
-    Sets `hNode` to be either enabled or disabled. Disabled nodes are
-    functionally equivalent to empty nodes until they are reenabled.
-    Existing node parameters are not affected by disabling/enabling the
-    node.
-
-    The node is identified by the corresponding node `hNode` in the non-
-    executable graph, from which the executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `hGraphExec`. Already
-    enqueued or running launches of `hGraphExec` are not affected by this
-    call. `hNode` is also not modified by this call.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node from the graph from which graphExec was instantiated
-    isEnabled : unsigned int
-        Node is enabled if != 0, otherwise the node is disabled
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeGetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`
-
-    Notes
-    -----
-    Currently only kernel, memset and memcpy nodes are supported.
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphNodeGetEnabled' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeGetEnabled(hGraphExec, hNode):
-    """ Query whether a node in the given graphExec is enabled.
-
-    Sets isEnabled to 1 if `hNode` is enabled, or 0 if `hNode` is disabled.
-
-    The node is identified by the corresponding node `hNode` in the non-
-    executable graph, from which the executable graph was instantiated.
-
-    `hNode` must not have been removed from the original graph.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to set the specified node
-    hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node from the graph from which graphExec was instantiated
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-    isEnabled : unsigned int
-        Location to return the enabled status of the node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphNodeSetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`
-
-    Notes
-    -----
-    Currently only kernel, memset and memcpy nodes are supported.
-    """
-    cdef cyruntime.cudaGraphNode_t cyhNode
-    if hNode is None:
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
-        phNode = int(hNode)
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    else:
-        phNode = int(cudaGraphNode_t(hNode))
-        cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef unsigned int isEnabled = 0
-    err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled)
-    return (cudaError_t(err), isEnabled)
-{{endif}}
-
-{{if 'cudaGraphExecUpdate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecUpdate(hGraphExec, hGraph):
-    """ Check whether an executable graph can be updated with a graph and perform the update if possible.
-
-    Updates the node parameters in the instantiated graph specified by
-    `hGraphExec` with the node parameters in a topologically identical
-    graph specified by `hGraph`.
-
-    Limitations:
-
-    - Kernel nodes:
-
-      - The owning context of the function cannot change.
-
-      - A node whose function originally did not use CUDA dynamic
-        parallelism cannot be updated to a function which uses CDP.
-
-      - A node whose function originally did not make device-side update
-        calls cannot be updated to a function which makes device-side
-        update calls.
-
-      - A cooperative node cannot be updated to a non-cooperative node, and
-        vice-versa.
-
-      - If the graph was instantiated with
-        cudaGraphInstantiateFlagUseNodePriority, the priority attribute
-        cannot change. Equality is checked on the originally requested
-        priority values, before they are clamped to the device's supported
-        range.
-
-      - If `hGraphExec` was not instantiated for device launch, a node
-        whose function originally did not use device-side
-        :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which
-        uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node
-        resides on the same device as nodes which contained such calls at
-        instantiate-time. If no such calls were present at instantiation,
-        these updates cannot be performed at all.
-
-      - Neither `hGraph` nor `hGraphExec` may contain device-updatable
-        kernel nodes.
-
-    - Memset and memcpy nodes:
-
-      - The CUDA device(s) to which the operand(s) was allocated/mapped
-        cannot change.
-
-      - The source/destination memory must be allocated from the same
-        contexts as the original source/destination memory.
-
-      - For 2d memsets, only address and assinged value may be updated.
-
-      - For 1d memsets, updating dimensions is also allowed, but may fail
-        if the resulting operation doesn't map onto the work resources
-        already allocated for the node.
-
-    - Additional memcpy node restrictions:
-
-      - Changing either the source or destination memory type(i.e.
-        CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
-
-    - Conditional nodes:
-
-      - Changing node parameters is not supported.
-
-      - Changeing parameters of nodes within the conditional body graph is
-        subject to the rules above.
-
-      - Conditional handle flags and default values are updated as part of
-        the graph update.
-
-    Note: The API may add further restrictions in future releases. The
-    return code should always be checked.
-
-    cudaGraphExecUpdate sets the result member of `resultInfo` to
-    cudaGraphExecUpdateErrorTopologyChanged under the following conditions:
-
-    - The count of nodes directly in `hGraphExec` and `hGraph` differ, in
-      which case resultInfo->errorNode is set to NULL.
-
-    - `hGraph` has more exit nodes than `hGraph`, in which case
-      resultInfo->errorNode is set to one of the exit nodes in hGraph.
-
-    - A node in `hGraph` has a different number of dependencies than the
-      node from `hGraphExec` it is paired with, in which case
-      resultInfo->errorNode is set to the node from `hGraph`.
-
-    - A node in `hGraph` has a dependency that does not match with the
-      corresponding dependency of the paired node from `hGraphExec`.
-      resultInfo->errorNode will be set to the node from `hGraph`.
-      resultInfo->errorFromNode will be set to the mismatched dependency.
-      The dependencies are paired based on edge order and a dependency does
-      not match when the nodes are already paired based on other edges
-      examined in the graph.
-
-    cudaGraphExecUpdate sets `the` result member of `resultInfo` to:
-
-    - cudaGraphExecUpdateError if passed an invalid value.
-
-    - cudaGraphExecUpdateErrorTopologyChanged if the graph topology changed
-
-    - cudaGraphExecUpdateErrorNodeTypeChanged if the type of a node
-      changed, in which case `hErrorNode_out` is set to the node from
-      `hGraph`.
-
-    - cudaGraphExecUpdateErrorFunctionChanged if the function of a kernel
-      node changed (CUDA driver < 11.2)
-
-    - cudaGraphExecUpdateErrorUnsupportedFunctionChange if the func field
-      of a kernel changed in an unsupported way(see note above), in which
-      case `hErrorNode_out` is set to the node from `hGraph`
-
-    - cudaGraphExecUpdateErrorParametersChanged if any parameters to a node
-      changed in a way that is not supported, in which case
-      `hErrorNode_out` is set to the node from `hGraph`
-
-    - cudaGraphExecUpdateErrorAttributesChanged if any attributes of a node
-      changed in a way that is not supported, in which case
-      `hErrorNode_out` is set to the node from `hGraph`
-
-    - cudaGraphExecUpdateErrorNotSupported if something about a node is
-      unsupported, like the node's type or configuration, in which case
-      `hErrorNode_out` is set to the node from `hGraph`
-
-    If the update fails for a reason not listed above, the result member of
-    `resultInfo` will be set to cudaGraphExecUpdateError. If the update
-    succeeds, the result member will be set to cudaGraphExecUpdateSuccess.
-
-    cudaGraphExecUpdate returns cudaSuccess when the updated was performed
-    successfully. It returns cudaErrorGraphExecUpdateFailure if the graph
-    update was not performed because it included changes which violated
-    constraints specific to instantiated graph update.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The instantiated graph to be updated
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph containing the updated parameters
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorGraphExecUpdateFailure`,
-    resultInfo : :py:obj:`~.cudaGraphExecUpdateResultInfo`
-        the error info structure
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraph_t cyhGraph
-    if hGraph is None:
-        cyhGraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)):
-        phGraph = int(hGraph)
-        cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph
-    else:
-        phGraph = int(cudaGraph_t(hGraph))
-        cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph
-    cdef cyruntime.cudaGraphExec_t cyhGraphExec
-    if hGraphExec is None:
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        phGraphExec = int(hGraphExec)
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    else:
-        phGraphExec = int(cudaGraphExec_t(hGraphExec))
-        cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
-    cdef cudaGraphExecUpdateResultInfo resultInfo = cudaGraphExecUpdateResultInfo()
-    err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, <cyruntime.cudaGraphExecUpdateResultInfo*>resultInfo._ptr)
-    return (cudaError_t(err), resultInfo)
-{{endif}}
-
-{{if 'cudaGraphUpload' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphUpload(graphExec, stream):
-    """ Uploads an executable graph in a stream.
-
-    Uploads `hGraphExec` to the device in `hStream` without executing it.
-    Uploads of the same `hGraphExec` will be serialized. Each upload is
-    ordered behind both any previous work in `hStream` and any previous
-    launches of `hGraphExec`. Uses memory cached by `stream` to back the
-    allocations owned by `graphExec`.
-
-    Parameters
-    ----------
-    hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to upload
-    hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to upload the graph
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphExec_t cygraphExec
-    if graphExec is None:
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        pgraphExec = int(graphExec)
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    else:
-        pgraphExec = int(cudaGraphExec_t(graphExec))
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    err = cyruntime.cudaGraphUpload(cygraphExec, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphLaunch' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphLaunch(graphExec, stream):
-    """ Launches an executable graph in a stream.
-
-    Executes `graphExec` in `stream`. Only one instance of `graphExec` may
-    be executing at a time. Each launch is ordered behind both any previous
-    work in `stream` and any previous launches of `graphExec`. To execute a
-    graph concurrently, it must be instantiated multiple times into
-    multiple executable graphs.
-
-    If any allocations created by `graphExec` remain unfreed (from a
-    previous launch) and `graphExec` was not instantiated with
-    :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, the launch will
-    fail with :py:obj:`~.cudaErrorInvalidValue`.
-
-    Parameters
-    ----------
-    graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to launch
-    stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
-        Stream in which to launch the graph
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphExecDestroy`
-    """
-    cdef cyruntime.cudaStream_t cystream
-    if stream is None:
-        cystream = <cyruntime.cudaStream_t><void_ptr>0
-    elif isinstance(stream, (cudaStream_t,driver.CUstream)):
-        pstream = int(stream)
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    else:
-        pstream = int(cudaStream_t(stream))
-        cystream = <cyruntime.cudaStream_t><void_ptr>pstream
-    cdef cyruntime.cudaGraphExec_t cygraphExec
-    if graphExec is None:
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        pgraphExec = int(graphExec)
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    else:
-        pgraphExec = int(cudaGraphExec_t(graphExec))
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    err = cyruntime.cudaGraphLaunch(cygraphExec, cystream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecDestroy(graphExec):
-    """ Destroys an executable graph.
-
-    Destroys the executable graph specified by `graphExec`.
-
-    Parameters
-    ----------
-    graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        Executable graph to destroy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`
-    """
-    cdef cyruntime.cudaGraphExec_t cygraphExec
-    if graphExec is None:
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        pgraphExec = int(graphExec)
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    else:
-        pgraphExec = int(cudaGraphExec_t(graphExec))
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    err = cyruntime.cudaGraphExecDestroy(cygraphExec)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphDestroy' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphDestroy(graph):
-    """ Destroys a graph.
-
-    Destroys the graph specified by `graph`, as well as all of its nodes.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to destroy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    err = cyruntime.cudaGraphDestroy(cygraph)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphDebugDotPrint' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphDebugDotPrint(graph, char* path, unsigned int flags):
-    """ Write a DOT file describing graph structure.
-
-    Using the provided `graph`, write to `path` a DOT formatted description
-    of the graph. By default this includes the graph topology, node types,
-    node id, kernel names and memcpy direction. `flags` can be specified to
-    write more detailed information about each node type such as parameter
-    values, kernel attributes, node and function handles.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to create a DOT file from
-    path : bytes
-        The path to write the DOT file to
-    flags : unsigned int
-        Flags from cudaGraphDebugDotFlags for specifying which additional
-        node information to write
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOperatingSystem`
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    err = cyruntime.cudaGraphDebugDotPrint(cygraph, path, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaUserObjectCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int flags):
-    """ Create a user object.
-
-    Create a user object with the specified destructor callback and initial
-    reference count. The initial references are owned by the caller.
-
-    Destructor callbacks cannot make CUDA API calls and should avoid
-    blocking behavior, as they are executed by a shared internal thread.
-    Another thread may be signaled to perform such actions, if it does not
-    block forward progress of tasks scheduled through CUDA.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    ptr : Any
-        The pointer to pass to the destroy function
-    destroy : :py:obj:`~.cudaHostFn_t`
-        Callback to free the user object when it is no longer in use
-    initialRefcount : unsigned int
-        The initial refcount to create the object with, typically 1. The
-        initial references are owned by the calling thread.
-    flags : unsigned int
-        Currently it is required to pass
-        :py:obj:`~.cudaUserObjectNoDestructorSync`, which is the only
-        defined flag. This indicates that the destroy callback cannot be
-        waited on by any CUDA API. Users requiring synchronization of the
-        callback should signal its completion manually.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    object_out : :py:obj:`~.cudaUserObject_t`
-        Location to return the user object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaHostFn_t cydestroy
-    if destroy is None:
-        cydestroy = <cyruntime.cudaHostFn_t><void_ptr>0
-    elif isinstance(destroy, (cudaHostFn_t,)):
-        pdestroy = int(destroy)
-        cydestroy = <cyruntime.cudaHostFn_t><void_ptr>pdestroy
-    else:
-        pdestroy = int(cudaHostFn_t(destroy))
-        cydestroy = <cyruntime.cudaHostFn_t><void_ptr>pdestroy
-    cdef cudaUserObject_t object_out = cudaUserObject_t()
-    cyptr = utils.HelperInputVoidPtr(ptr)
-    cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
-    err = cyruntime.cudaUserObjectCreate(<cyruntime.cudaUserObject_t*>object_out._ptr, cyptr_ptr, cydestroy, initialRefcount, flags)
-    return (cudaError_t(err), object_out)
-{{endif}}
-
-{{if 'cudaUserObjectRetain' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaUserObjectRetain(object, unsigned int count):
-    """ Retain a reference to a user object.
-
-    Retains new references to a user object. The new references are owned
-    by the caller.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    object : :py:obj:`~.cudaUserObject_t`
-        The object to retain
-    count : unsigned int
-        The number of references to retain, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaUserObject_t cyobject
-    if object is None:
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>0
-    elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
-        pobject = int(object)
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    else:
-        pobject = int(cudaUserObject_t(object))
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    err = cyruntime.cudaUserObjectRetain(cyobject, count)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaUserObjectRelease' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaUserObjectRelease(object, unsigned int count):
-    """ Release a reference to a user object.
-
-    Releases user object references owned by the caller. The object's
-    destructor is invoked if the reference count reaches zero.
-
-    It is undefined behavior to release references not owned by the caller,
-    or to use a user object handle after all references are released.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    object : :py:obj:`~.cudaUserObject_t`
-        The object to release
-    count : unsigned int
-        The number of references to release, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaUserObject_t cyobject
-    if object is None:
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>0
-    elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
-        pobject = int(object)
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    else:
-        pobject = int(cudaUserObject_t(object))
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    err = cyruntime.cudaUserObjectRelease(cyobject, count)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphRetainUserObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int flags):
-    """ Retain a reference to a user object from a graph.
-
-    Creates or moves user object references that will be owned by a CUDA
-    graph.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph to associate the reference with
-    object : :py:obj:`~.cudaUserObject_t`
-        The user object to retain a reference for
-    count : unsigned int
-        The number of references to add to the graph, typically 1. Must be
-        nonzero and not larger than INT_MAX.
-    flags : unsigned int
-        The optional flag :py:obj:`~.cudaGraphUserObjectMove` transfers
-        references from the calling thread, rather than create new
-        references. Pass 0 to create new references.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaUserObject_t cyobject
-    if object is None:
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>0
-    elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
-        pobject = int(object)
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    else:
-        pobject = int(cudaUserObject_t(object))
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphReleaseUserObject' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphReleaseUserObject(graph, object, unsigned int count):
-    """ Release a user object reference from a graph.
-
-    Releases user object references owned by a graph.
-
-    See CUDA User Objects in the CUDA C++ Programming Guide for more
-    information on user objects.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        The graph that will release the reference
-    object : :py:obj:`~.cudaUserObject_t`
-        The user object to release a reference for
-    count : unsigned int
-        The number of references to release, typically 1. Must be nonzero
-        and not larger than INT_MAX.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-
-    See Also
-    --------
-    :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphCreate`
-    """
-    cdef cyruntime.cudaUserObject_t cyobject
-    if object is None:
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>0
-    elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
-        pobject = int(object)
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    else:
-        pobject = int(cudaUserObject_t(object))
-        cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphAddNode' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddNode(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaGraphNodeParams]):
-    """ Adds a node of arbitrary type to a graph.
-
-    Creates a new node in `graph` described by `nodeParams` with
-    `numDependencies` dependencies specified via `pDependencies`.
-    `numDependencies` may be 0. `pDependencies` may be null if
-    `numDependencies` is 0. `pDependencies` may not have any duplicate
-    entries.
-
-    `nodeParams` is a tagged union. The node type should be specified in
-    the `typename` field, and type-specific parameters in the corresponding
-    union member. All unused bytes - that is, `reserved0` and all bytes
-    past the utilized union member - must be set to zero. It is recommended
-    to use brace initialization or memset to ensure all bytes are
-    initialized.
-
-    Note that for some node types, `nodeParams` may contain "out
-    parameters" which are modified during the call, such as
-    `nodeParams->alloc.dptr`.
-
-    A handle to the new node will be returned in `phGraphNode`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.cudaGraphNodeParams`
-        Specification of the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphExecNodeSetParams`
-    """
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphAddNode(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, numDependencies, cynodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphAddNode_v2' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphAddNode_v2(graph, pDependencies : Optional[Tuple[cudaGraphNode_t] | List[cudaGraphNode_t]], dependencyData : Optional[Tuple[cudaGraphEdgeData] | List[cudaGraphEdgeData]], size_t numDependencies, nodeParams : Optional[cudaGraphNodeParams]):
-    """ Adds a node of arbitrary type to a graph (12.3+)
-
-    Creates a new node in `graph` described by `nodeParams` with
-    `numDependencies` dependencies specified via `pDependencies`.
-    `numDependencies` may be 0. `pDependencies` may be null if
-    `numDependencies` is 0. `pDependencies` may not have any duplicate
-    entries.
-
-    `nodeParams` is a tagged union. The node type should be specified in
-    the `typename` field, and type-specific parameters in the corresponding
-    union member. All unused bytes - that is, `reserved0` and all bytes
-    past the utilized union member - must be set to zero. It is recommended
-    to use brace initialization or memset to ensure all bytes are
-    initialized.
-
-    Note that for some node types, `nodeParams` may contain "out
-    parameters" which are modified during the call, such as
-    `nodeParams->alloc.dptr`.
-
-    A handle to the new node will be returned in `phGraphNode`.
-
-    Parameters
-    ----------
-    graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph to which to add the node
-    pDependencies : List[:py:obj:`~.cudaGraphNode_t`]
-        Dependencies of the node
-    dependencyData : List[:py:obj:`~.cudaGraphEdgeData`]
-        Optional edge data for the dependencies. If NULL, the data is
-        assumed to be default (zeroed) for all dependencies.
-    numDependencies : size_t
-        Number of dependencies
-    nodeParams : :py:obj:`~.cudaGraphNodeParams`
-        Specification of the node
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
-    pGraphNode : :py:obj:`~.cudaGraphNode_t`
-        Returns newly created node
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphExecNodeSetParams`
-    """
-    dependencyData = [] if dependencyData is None else dependencyData
-    if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
-        raise TypeError("Argument 'dependencyData' is not instance of type (expected Tuple[cyruntime.cudaGraphEdgeData,] or List[cyruntime.cudaGraphEdgeData,]")
-    pDependencies = [] if pDependencies is None else pDependencies
-    if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
-        raise TypeError("Argument 'pDependencies' is not instance of type (expected Tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or List[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
-    cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
-    if len(pDependencies) > 0:
-        cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
-        if cypDependencies is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
-        else:
-            for idx in range(len(pDependencies)):
-                cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._ptr[0]
-    cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
-    if len(dependencyData) > 0:
-        cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
-        if cydependencyData is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
-        for idx in range(len(dependencyData)):
-            string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._ptr, sizeof(cyruntime.cudaGraphEdgeData))
-    if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
-    if numDependencies > <size_t>len(dependencyData): raise RuntimeError("List is too small: " + str(len(dependencyData)) + " < " + str(numDependencies))
-    cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphAddNode_v2(<cyruntime.cudaGraphNode_t*>pGraphNode._ptr, cygraph, <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._ptr if len(pDependencies) == 1 else cypDependencies, (<cudaGraphEdgeData>dependencyData[0])._ptr if len(dependencyData) == 1 else cydependencyData, numDependencies, cynodeParams_ptr)
-    if cypDependencies is not NULL:
-        free(cypDependencies)
-    if cydependencyData is not NULL:
-        free(cydependencyData)
-    return (cudaError_t(err), pGraphNode)
-{{endif}}
-
-{{if 'cudaGraphNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]):
-    """ Update's a graph node's parameters.
-
-    Sets the parameters of graph node `node` to `nodeParams`. The node type
-    specified by `nodeParams->type` must match the type of `node`.
-    `nodeParams` must be fully initialized and all unused bytes (reserved,
-    padding) zeroed.
-
-    Modifying parameters is not supported for node types
-    cudaGraphNodeTypeMemAlloc and cudaGraphNodeTypeMemFree.
-
-    Parameters
-    ----------
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Node to set the parameters for
-    nodeParams : :py:obj:`~.cudaGraphNodeParams`
-        Parameters to copy
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExecNodeSetParams`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphExecNodeSetParams' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphNodeParams]):
-    """ Update's a graph node's parameters in an instantiated graph.
-
-    Sets the parameters of a node in an executable graph `graphExec`. The
-    node is identified by the corresponding node `node` in the non-
-    executable graph from which the executable graph was instantiated.
-    `node` must not have been removed from the original graph.
-
-    The modifications only affect future launches of `graphExec`. Already
-    enqueued or running launches of `graphExec` are not affected by this
-    call. `node` is also not modified by this call.
-
-    Allowed changes to parameters on executable graphs are as follows:
-
-    **View CUDA Toolkit Documentation for a table example**
-
-    Parameters
-    ----------
-    graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
-        The executable graph in which to update the specified node
-    node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
-        Corresponding node from the graph from which graphExec was
-        instantiated
-    nodeParams : :py:obj:`~.cudaGraphNodeParams`
-        Updated Parameters to set
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphNodeSetParams` :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
-    """
-    cdef cyruntime.cudaGraphNode_t cynode
-    if node is None:
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>0
-    elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
-        pnode = int(node)
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    else:
-        pnode = int(cudaGraphNode_t(node))
-        cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
-    cdef cyruntime.cudaGraphExec_t cygraphExec
-    if graphExec is None:
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>0
-    elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
-        pgraphExec = int(graphExec)
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    else:
-        pgraphExec = int(cudaGraphExec_t(graphExec))
-        cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
-    cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._ptr if nodeParams != None else NULL
-    err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if 'cudaGraphConditionalHandleCreate' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, unsigned int flags):
-    """ Create a conditional handle.
-
-    Creates a conditional handle associated with `hGraph`.
-
-    The conditional handle must be associated with a conditional node in
-    this graph or one of its children.
-
-    Handles not associated with a conditional node may cause graph
-    instantiation to fail.
-
-    Parameters
-    ----------
-    hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
-        Graph which will contain the conditional node using this handle.
-    defaultLaunchValue : unsigned int
-        Optional initial value for the conditional variable.
-    flags : unsigned int
-        Currently must be cudaGraphCondAssignDefault or 0.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
-    pHandle_out : :py:obj:`~.cudaGraphConditionalHandle`
-        Pointer used to return the handle to the caller.
-
-    See Also
-    --------
-    :py:obj:`~.cuGraphAddNode`,
-    """
-    cdef cyruntime.cudaGraph_t cygraph
-    if graph is None:
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>0
-    elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
-        pgraph = int(graph)
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    else:
-        pgraph = int(cudaGraph_t(graph))
-        cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
-    cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle()
-    err = cyruntime.cudaGraphConditionalHandleCreate(<cyruntime.cudaGraphConditionalHandle*>pHandle_out._ptr, cygraph, defaultLaunchValue, flags)
-    return (cudaError_t(err), pHandle_out)
-{{endif}}
-
-{{if 'cudaGetDriverEntryPoint' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags):
-    """ Returns the requested driver API function pointer.
-
-    Returns in `**funcPtr` the address of the CUDA driver function for the
-    requested flags.
-
-    For a requested driver symbol, if the CUDA version in which the driver
-    symbol was introduced is less than or equal to the CUDA runtime
-    version, the API will return the function pointer to the corresponding
-    versioned driver function.
-
-    The pointer returned by the API should be cast to a function pointer
-    matching the requested driver function's definition in the API header
-    file. The function pointer typedef can be picked up from the
-    corresponding typedefs header file. For example, cudaTypedefs.h
-    consists of function pointer typedefs for driver APIs defined in
-    cuda.h.
-
-    The API will return :py:obj:`~.cudaSuccess` and set the returned
-    `funcPtr` if the requested driver function is valid and supported on
-    the platform.
-
-    The API will return :py:obj:`~.cudaSuccess` and set the returned
-    `funcPtr` to NULL if the requested driver function is not supported on
-    the platform, no ABI compatible driver function exists for the CUDA
-    runtime version or if the driver symbol is invalid.
-
-    It will also set the optional `driverStatus` to one of the values in
-    :py:obj:`~.cudaDriverEntryPointQueryResult` with the following
-    meanings:
-
-    - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was
-      succesfully found based on input arguments and `pfn` is valid
-
-    - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol
-      was not found
-
-    - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested
-      symbol was found but is not supported by the current runtime version
-      (CUDART_VERSION)
-
-    The requested flags can be:
-
-    - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is
-      equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the
-      code is compiled with --default-stream per-thread compilation flag or
-      the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;
-      :py:obj:`~.cudaEnableLegacyStream` otherwise.
-
-    - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for
-      all driver symbols that match the requested driver symbol name except
-      the corresponding per-thread versions.
-
-    - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the
-      search for all driver symbols that match the requested driver symbol
-      name including the per-thread versions. If a per-thread version is
-      not found, the API will return the legacy version of the driver
-      function.
-
-    Parameters
-    ----------
-    symbol : bytes
-        The base name of the driver API function to look for. As an
-        example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`
-        would be cuMemAlloc. Note that the API will use the CUDA runtime
-        version to return the address to the most recent ABI compatible
-        driver symbol, :py:obj:`~.cuMemAlloc` or :py:obj:`~.cuMemAlloc_v2`.
-    flags : unsigned long long
-        Flags to specify search options.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-    funcPtr : Any
-        Location to return the function pointer to the requested driver
-        function
-    driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`
-        Optional location to store the status of finding the symbol from
-        the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for
-        possible values.
-
-    See Also
-    --------
-    :py:obj:`~.cuGetProcAddress`
-    """
-    cdef void_ptr funcPtr = 0
-    cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus
-    err = cyruntime.cudaGetDriverEntryPoint(symbol, <void**>&funcPtr, flags, &driverStatus)
-    return (cudaError_t(err), funcPtr, cudaDriverEntryPointQueryResult(driverStatus))
-{{endif}}
-
-{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, unsigned long long flags):
-    """ Returns the requested driver API function pointer by CUDA version.
-
-    Returns in `**funcPtr` the address of the CUDA driver function for the
-    requested flags and CUDA driver version.
-
-    The CUDA version is specified as (1000 * major + 10 * minor), so CUDA
-    11.2 should be specified as 11020. For a requested driver symbol, if
-    the specified CUDA version is greater than or equal to the CUDA version
-    in which the driver symbol was introduced, this API will return the
-    function pointer to the corresponding versioned function.
-
-    The pointer returned by the API should be cast to a function pointer
-    matching the requested driver function's definition in the API header
-    file. The function pointer typedef can be picked up from the
-    corresponding typedefs header file. For example, cudaTypedefs.h
-    consists of function pointer typedefs for driver APIs defined in
-    cuda.h.
-
-    For the case where the CUDA version requested is greater than the CUDA
-    Toolkit installed, there may not be an appropriate function pointer
-    typedef in the corresponding header file and may need a custom typedef
-    to match the driver function signature returned. This can be done by
-    getting the typedefs from a later toolkit or creating appropriately
-    matching custom function typedefs.
-
-    The API will return :py:obj:`~.cudaSuccess` and set the returned
-    `funcPtr` if the requested driver function is valid and supported on
-    the platform.
-
-    The API will return :py:obj:`~.cudaSuccess` and set the returned
-    `funcPtr` to NULL if the requested driver function is not supported on
-    the platform, no ABI compatible driver function exists for the
-    requested version or if the driver symbol is invalid.
-
-    It will also set the optional `driverStatus` to one of the values in
-    :py:obj:`~.cudaDriverEntryPointQueryResult` with the following
-    meanings:
-
-    - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was
-      succesfully found based on input arguments and `pfn` is valid
-
-    - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol
-      was not found
-
-    - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested
-      symbol was found but is not supported by the specified version
-      `cudaVersion`
-
-    The requested flags can be:
-
-    - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is
-      equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the
-      code is compiled with --default-stream per-thread compilation flag or
-      the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;
-      :py:obj:`~.cudaEnableLegacyStream` otherwise.
-
-    - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for
-      all driver symbols that match the requested driver symbol name except
-      the corresponding per-thread versions.
-
-    - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the
-      search for all driver symbols that match the requested driver symbol
-      name including the per-thread versions. If a per-thread version is
-      not found, the API will return the legacy version of the driver
-      function.
-
-    Parameters
-    ----------
-    symbol : bytes
-        The base name of the driver API function to look for. As an
-        example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`
-        would be cuMemAlloc.
-    cudaVersion : unsigned int
-        The CUDA version to look for the requested driver symbol
-    flags : unsigned long long
-        Flags to specify search options.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
-    funcPtr : Any
-        Location to return the function pointer to the requested driver
-        function
-    driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`
-        Optional location to store the status of finding the symbol from
-        the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for
-        possible values.
-
-    See Also
-    --------
-    :py:obj:`~.cuGetProcAddress`
-    """
-    cdef void_ptr funcPtr = 0
-    cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus
-    err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, <void**>&funcPtr, cudaVersion, flags, &driverStatus)
-    return (cudaError_t(err), funcPtr, cudaDriverEntryPointQueryResult(driverStatus))
-{{endif}}
-
-{{if 'cudaGetExportTable' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetExportTable(pExportTableId : Optional[cudaUUID_t]):
-    """"""
-    cdef void_ptr ppExportTable = 0
-    cdef cyruntime.cudaUUID_t* cypExportTableId_ptr = pExportTableId._ptr if pExportTableId != None else NULL
-    err = cyruntime.cudaGetExportTable(<const void**>&ppExportTable, cypExportTableId_ptr)
-    return (cudaError_t(err), ppExportTable)
-{{endif}}
-
-{{if 'cudaGetKernel' in found_functions}}
-
-@cython.embedsignature(True)
-def cudaGetKernel(entryFuncAddr):
-    """ Get pointer to device kernel that matches entry function `entryFuncAddr`.
-
-    Returns in `kernelPtr` the device kernel corresponding to the entry
-    function `entryFuncAddr`.
-
-    Parameters
-    ----------
-    entryFuncAddr : Any
-        Address of device entry function to search kernel for
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-    kernelPtr : :py:obj:`~.cudaKernel_t`
-        Returns the device kernel
-
-    See Also
-    --------
-    cudaGetKernel (C++ API)
-    """
-    cdef cudaKernel_t kernelPtr = cudaKernel_t()
-    cyentryFuncAddr = utils.HelperInputVoidPtr(entryFuncAddr)
-    cdef void* cyentryFuncAddr_ptr = <void*><void_ptr>cyentryFuncAddr.cptr
-    err = cyruntime.cudaGetKernel(<cyruntime.cudaKernel_t*>kernelPtr._ptr, cyentryFuncAddr_ptr)
-    return (cudaError_t(err), kernelPtr)
-{{endif}}
-
-{{if 'make_cudaPitchedPtr' in found_functions}}
-
-@cython.embedsignature(True)
-def make_cudaPitchedPtr(d, size_t p, size_t xsz, size_t ysz):
-    """ Returns a :py:obj:`~.cudaPitchedPtr` based on input parameters.
-
-    Returns a :py:obj:`~.cudaPitchedPtr` based on the specified input
-    parameters `d`, `p`, `xsz`, and `ysz`.
-
-    Parameters
-    ----------
-    d : Any
-        Pointer to allocated memory
-    p : size_t
-        Pitch of allocated memory in bytes
-    xsz : size_t
-        Logical width of allocation in elements
-    ysz : size_t
-        Logical height of allocation in elements
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    :py:obj:`~.cudaPitchedPtr`
-        :py:obj:`~.cudaPitchedPtr` specified by `d`, `p`, `xsz`, and `ysz`
-
-    See Also
-    --------
-    make_cudaExtent, make_cudaPos
-    """
-    cyd = utils.HelperInputVoidPtr(d)
-    cdef void* cyd_ptr = <void*><void_ptr>cyd.cptr
-    err = cyruntime.make_cudaPitchedPtr(cyd_ptr, p, xsz, ysz)
-    cdef cudaPitchedPtr wrapper = cudaPitchedPtr()
-    wrapper._ptr[0] = err
-    return wrapper
-{{endif}}
-
-{{if 'make_cudaPos' in found_functions}}
-
-@cython.embedsignature(True)
-def make_cudaPos(size_t x, size_t y, size_t z):
-    """ Returns a :py:obj:`~.cudaPos` based on input parameters.
-
-    Returns a :py:obj:`~.cudaPos` based on the specified input parameters
-    `x`, `y`, and `z`.
-
-    Parameters
-    ----------
-    x : size_t
-        X position
-    y : size_t
-        Y position
-    z : size_t
-        Z position
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    :py:obj:`~.cudaPos`
-        :py:obj:`~.cudaPos` specified by `x`, `y`, and `z`
-
-    See Also
-    --------
-    make_cudaExtent, make_cudaPitchedPtr
-    """
-    err = cyruntime.make_cudaPos(x, y, z)
-    cdef cudaPos wrapper = cudaPos()
-    wrapper._ptr[0] = err
-    return wrapper
-{{endif}}
-
-{{if 'make_cudaExtent' in found_functions}}
-
-@cython.embedsignature(True)
-def make_cudaExtent(size_t w, size_t h, size_t d):
-    """ Returns a :py:obj:`~.cudaExtent` based on input parameters.
-
-    Returns a :py:obj:`~.cudaExtent` based on the specified input
-    parameters `w`, `h`, and `d`.
-
-    Parameters
-    ----------
-    w : size_t
-        Width in elements when referring to array memory, in bytes when
-        referring to linear memory
-    h : size_t
-        Height in elements
-    d : size_t
-        Depth in elements
-
-    Returns
-    -------
-    cudaError_t.cudaSuccess
-        cudaError_t.cudaSuccess
-    :py:obj:`~.cudaExtent`
-        :py:obj:`~.cudaExtent` specified by `w`, `h`, and `d`
-
-    See Also
-    --------
-    make_cudaPitchedPtr, make_cudaPos
-    """
-    err = cyruntime.make_cudaExtent(w, h, d)
-    cdef cudaExtent wrapper = cudaExtent()
-    wrapper._ptr[0] = err
-    return wrapper
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsEGLRegisterImage(image, unsigned int flags):
-    """ Registers an EGL image.
-
-    Registers the EGLImageKHR specified by `image` for access by CUDA. A
-    handle to the registered object is returned as `pCudaResource`.
-    Additional Mapping/Unmapping is not required for the registered
-    resource and :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be
-    directly called on the `pCudaResource`.
-
-    The application will be responsible for synchronizing access to shared
-    objects. The application must ensure that any pending operation which
-    access the objects have completed before passing control to CUDA. This
-    may be accomplished by issuing and waiting for glFinish command on all
-    GLcontexts (for OpenGL and likewise for other APIs). The application
-    will be also responsible for ensuring that any pending operation on the
-    registered CUDA resource has completed prior to executing subsequent
-    commands in other APIs accesing the same memory objects. This can be
-    accomplished by calling cuCtxSynchronize or cuEventSynchronize
-    (preferably).
-
-    The surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
-      how this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
-      will not write to this resource.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
-      CUDA will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    The EGLImageKHR is an object which can be used to create EGLImage
-    target resource. It is defined as a void pointer. typedef void*
-    EGLImageKHR
-
-    Parameters
-    ----------
-    image : :py:obj:`~.EGLImageKHR`
-        An EGLImageKHR image which can be used to create target resource.
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-    pCudaResource : :py:obj:`~.cudaGraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame`, :py:obj:`~.cuGraphicsEGLRegisterImage`
-    """
-    cdef cyruntime.EGLImageKHR cyimage
-    if image is None:
-        cyimage = <cyruntime.EGLImageKHR><void_ptr>0
-    elif isinstance(image, (EGLImageKHR,)):
-        pimage = int(image)
-        cyimage = <cyruntime.EGLImageKHR><void_ptr>pimage
-    else:
-        pimage = int(EGLImageKHR(image))
-        cyimage = <cyruntime.EGLImageKHR><void_ptr>pimage
-    cdef cudaGraphicsResource_t pCudaResource = cudaGraphicsResource_t()
-    err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._ptr, cyimage, flags)
-    return (cudaError_t(err), pCudaResource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamConsumerConnect(eglStream):
-    """ Connect CUDA to EGLStream as a consumer.
-
-    Connect CUDA as a consumer to EGLStreamKHR specified by `eglStream`.
-
-    The EGLStreamKHR is an EGL object that transfers a sequence of image
-    frames from one API to another.
-
-    Parameters
-    ----------
-    eglStream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnect`
-    """
-    cdef cyruntime.EGLStreamKHR cyeglStream
-    if eglStream is None:
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>0
-    elif isinstance(eglStream, (EGLStreamKHR,)):
-        peglStream = int(eglStream)
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    else:
-        peglStream = int(EGLStreamKHR(eglStream))
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
-    err = cyruntime.cudaEGLStreamConsumerConnect(<cyruntime.cudaEglStreamConnection*>conn._ptr, cyeglStream)
-    return (cudaError_t(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags):
-    """ Connect CUDA to EGLStream as a consumer with given flags.
-
-    Connect CUDA as a consumer to EGLStreamKHR specified by `stream` with
-    specified `flags` defined by :py:obj:`~.cudaEglResourceLocationFlags`.
-
-    The flags specify whether the consumer wants to access frames from
-    system memory or video memory. Default is
-    :py:obj:`~.cudaEglResourceLocationVidmem`.
-
-    Parameters
-    ----------
-    eglStream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-    flags : unsigned int
-        Flags denote intended location - system or video.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnectWithFlags`
-    """
-    cdef cyruntime.EGLStreamKHR cyeglStream
-    if eglStream is None:
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>0
-    elif isinstance(eglStream, (EGLStreamKHR,)):
-        peglStream = int(eglStream)
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    else:
-        peglStream = int(EGLStreamKHR(eglStream))
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
-    err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(<cyruntime.cudaEglStreamConnection*>conn._ptr, cyeglStream, flags)
-    return (cudaError_t(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamConsumerDisconnect(conn):
-    """ Disconnect CUDA as a consumer to EGLStream .
-
-    Disconnect CUDA as a consumer to EGLStreamKHR.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Conection to disconnect.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerDisconnect`
-    """
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    err = cyruntime.cudaEGLStreamConsumerDisconnect(cyconn)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int timeout):
-    """ Acquire an image frame from the EGLStream with CUDA as a consumer.
-
-    Acquire an image frame from EGLStreamKHR.
-    :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be called on
-    `pCudaResource` to get :py:obj:`~.cudaEglFrame`.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Connection on which to acquire
-    pCudaResource : :py:obj:`~.cudaGraphicsResource_t`
-        CUDA resource on which the EGLStream frame will be mapped for use.
-    pStream : :py:obj:`~.cudaStream_t`
-        CUDA stream for synchronization and any data migrations implied by
-        :py:obj:`~.cudaEglResourceLocationFlags`.
-    timeout : unsigned int
-        Desired timeout in usec.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorLaunchTimeout`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`
-    """
-    cdef cyruntime.cudaStream_t *cypStream
-    if pStream is None:
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
-    elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
-        ppStream = pStream.getPtr()
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
-    cdef cyruntime.cudaGraphicsResource_t *cypCudaResource
-    if pCudaResource is None:
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
-    elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):
-        ppCudaResource = pCudaResource.getPtr()
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>ppCudaResource
-    elif isinstance(pCudaResource, (int)):
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>pCudaResource
-    else:
-        raise TypeError("Argument 'pCudaResource' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(pCudaResource)))
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream):
-    """ Releases the last frame acquired from the EGLStream.
-
-    Release the acquired image frame specified by `pCudaResource` to
-    EGLStreamKHR.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Connection on which to release
-    pCudaResource : :py:obj:`~.cudaGraphicsResource_t`
-        CUDA resource whose corresponding frame is to be released
-    pStream : :py:obj:`~.cudaStream_t`
-        CUDA stream on which release will be done.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`
-    """
-    cdef cyruntime.cudaStream_t *cypStream
-    if pStream is None:
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
-    elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
-        ppStream = pStream.getPtr()
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
-    cdef cyruntime.cudaGraphicsResource_t cypCudaResource
-    if pCudaResource is None:
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):
-        ppCudaResource = int(pCudaResource)
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t><void_ptr>ppCudaResource
-    else:
-        ppCudaResource = int(cudaGraphicsResource_t(pCudaResource))
-        cypCudaResource = <cyruntime.cudaGraphicsResource_t><void_ptr>ppCudaResource
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamProducerConnect(eglStream, width, height):
-    """ Connect CUDA to EGLStream as a producer.
-
-    Connect CUDA as a producer to EGLStreamKHR specified by `stream`.
-
-    The EGLStreamKHR is an EGL object that transfers a sequence of image
-    frames from one API to another.
-
-    Parameters
-    ----------
-    eglStream : :py:obj:`~.EGLStreamKHR`
-        EGLStreamKHR handle
-    width : :py:obj:`~.EGLint`
-        width of the image to be submitted to the stream
-    height : :py:obj:`~.EGLint`
-        height of the image to be submitted to the stream
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Pointer to the returned connection handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerConnect`
-    """
-    cdef cyruntime.EGLint cyheight
-    if height is None:
-        cyheight = <cyruntime.EGLint><void_ptr>0
-    elif isinstance(height, (EGLint,)):
-        pheight = int(height)
-        cyheight = <cyruntime.EGLint><void_ptr>pheight
-    else:
-        pheight = int(EGLint(height))
-        cyheight = <cyruntime.EGLint><void_ptr>pheight
-    cdef cyruntime.EGLint cywidth
-    if width is None:
-        cywidth = <cyruntime.EGLint><void_ptr>0
-    elif isinstance(width, (EGLint,)):
-        pwidth = int(width)
-        cywidth = <cyruntime.EGLint><void_ptr>pwidth
-    else:
-        pwidth = int(EGLint(width))
-        cywidth = <cyruntime.EGLint><void_ptr>pwidth
-    cdef cyruntime.EGLStreamKHR cyeglStream
-    if eglStream is None:
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>0
-    elif isinstance(eglStream, (EGLStreamKHR,)):
-        peglStream = int(eglStream)
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    else:
-        peglStream = int(EGLStreamKHR(eglStream))
-        cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
-    cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
-    err = cyruntime.cudaEGLStreamProducerConnect(<cyruntime.cudaEglStreamConnection*>conn._ptr, cyeglStream, cywidth, cyheight)
-    return (cudaError_t(err), conn)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamProducerDisconnect(conn):
-    """ Disconnect CUDA as a producer to EGLStream .
-
-    Disconnect CUDA as a producer to EGLStreamKHR.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Conection to disconnect.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerDisconnect`
-    """
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    err = cyruntime.cudaEGLStreamProducerDisconnect(cyconn)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pStream):
-    """ Present a CUDA eglFrame to the EGLStream with CUDA as a producer.
-
-    The :py:obj:`~.cudaEglFrame` is defined as:
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    For :py:obj:`~.cudaEglFrame` of type :py:obj:`~.cudaEglFrameTypePitch`,
-    the application may present sub-region of a memory allocation. In that
-    case, :py:obj:`~.cudaPitchedPtr.ptr` will specify the start address of
-    the sub-region in the allocation and :py:obj:`~.cudaEglPlaneDesc` will
-    specify the dimensions of the sub-region.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Connection on which to present the CUDA array
-    eglframe : :py:obj:`~.cudaEglFrame`
-        CUDA Eglstream Proucer Frame handle to be sent to the consumer over
-        EglStream.
-    pStream : :py:obj:`~.cudaStream_t`
-        CUDA stream on which to present the frame.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerPresentFrame`
-    """
-    cdef cyruntime.cudaStream_t *cypStream
-    if pStream is None:
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
-    elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
-        ppStream = pStream.getPtr()
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._ptr[0], cypStream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pStream):
-    """ Return the CUDA eglFrame to the EGLStream last released by the consumer.
-
-    This API can potentially return cudaErrorLaunchTimeout if the consumer
-    has not returned a frame to EGL stream. If timeout is returned the
-    application can retry.
-
-    Parameters
-    ----------
-    conn : :py:obj:`~.cudaEglStreamConnection`
-        Connection on which to present the CUDA array
-    eglframe : :py:obj:`~.cudaEglFrame`
-        CUDA Eglstream Proucer Frame handle returned from the consumer over
-        EglStream.
-    pStream : :py:obj:`~.cudaStream_t`
-        CUDA stream on which to return the frame.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-
-    See Also
-    --------
-    :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cuEGLStreamProducerReturnFrame`
-    """
-    cdef cyruntime.cudaStream_t *cypStream
-    if pStream is None:
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
-    elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
-        ppStream = pStream.getPtr()
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
-    elif isinstance(pStream, (int)):
-        cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
-    else:
-        raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
-    cdef cyruntime.cudaEglStreamConnection *cyconn
-    if conn is None:
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
-    elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
-        pconn = conn.getPtr()
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
-    elif isinstance(conn, (int)):
-        cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
-    else:
-        raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
-    cdef cyruntime.cudaEglFrame* cyeglframe_ptr = eglframe._ptr if eglframe != None else NULL
-    err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned int mipLevel):
-    """ Get an eglFrame through which to access a registered EGL graphics resource.
-
-    Returns in `*eglFrame` an eglFrame pointer through which the registered
-    graphics resource `resource` may be accessed. This API can only be
-    called for EGL graphics resources.
-
-    The :py:obj:`~.cudaEglFrame` is defined as
-
-    **View CUDA Toolkit Documentation for a C++ code example**
-
-    Parameters
-    ----------
-    resource : :py:obj:`~.cudaGraphicsResource_t`
-        Registered resource to access.
-    index : unsigned int
-        Index for cubemap surfaces.
-    mipLevel : unsigned int
-        Mipmap level for the subresource to access.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
-    eglFrame : :py:obj:`~.cudaEglFrame`
-        Returned eglFrame.
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedEglFrame`
-
-    Notes
-    -----
-    Note that in case of multiplanar `*eglFrame`, pitch of only first plane (unsigned int :py:obj:`~.cudaEglPlaneDesc.pitch`) is to be considered by the application.
-    """
-    cdef cyruntime.cudaGraphicsResource_t cyresource
-    if resource is None:
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>0
-    elif isinstance(resource, (cudaGraphicsResource_t,)):
-        presource = int(resource)
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    else:
-        presource = int(cudaGraphicsResource_t(resource))
-        cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
-    cdef cudaEglFrame eglFrame = cudaEglFrame()
-    err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(<cyruntime.cudaEglFrame*>eglFrame._ptr, cyresource, index, mipLevel)
-    return (cudaError_t(err), eglFrame)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaEventCreateFromEGLSync(eglSync, unsigned int flags):
-    """ Creates an event from EGLSync object.
-
-    Creates an event *phEvent from an EGLSyncKHR eglSync with the flages
-    specified via `flags`. Valid flags include:
-
-    - :py:obj:`~.cudaEventDefault`: Default event creation flag.
-
-    - :py:obj:`~.cudaEventBlockingSync`: Specifies that the created event
-      should use blocking synchronization. A CPU thread that uses
-      :py:obj:`~.cudaEventSynchronize()` to wait on an event created with
-      this flag will block until the event has actually been completed.
-
-    :py:obj:`~.cudaEventRecord` and TimingData are not supported for events
-    created from EGLSync.
-
-    The EGLSyncKHR is an opaque handle to an EGL sync object. typedef void*
-    EGLSyncKHR
-
-    Parameters
-    ----------
-    eglSync : :py:obj:`~.EGLSyncKHR`
-        Opaque handle to EGLSync object
-    flags : unsigned int
-        Event creation flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
-    phEvent : :py:obj:`~.cudaEvent_t`
-        Returns newly created event
-
-    See Also
-    --------
-    :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`
-    """
-    cdef cyruntime.EGLSyncKHR cyeglSync
-    if eglSync is None:
-        cyeglSync = <cyruntime.EGLSyncKHR><void_ptr>0
-    elif isinstance(eglSync, (EGLSyncKHR,)):
-        peglSync = int(eglSync)
-        cyeglSync = <cyruntime.EGLSyncKHR><void_ptr>peglSync
-    else:
-        peglSync = int(EGLSyncKHR(eglSync))
-        cyeglSync = <cyruntime.EGLSyncKHR><void_ptr>peglSync
-    cdef cudaEvent_t phEvent = cudaEvent_t()
-    err = cyruntime.cudaEventCreateFromEGLSync(<cyruntime.cudaEvent_t*>phEvent._ptr, cyeglSync, flags)
-    return (cudaError_t(err), phEvent)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaProfilerStart():
-    """ Enable profiling.
-
-    Enables profile collection by the active profiling tool for the current
-    context. If profiling is already enabled, then
-    :py:obj:`~.cudaProfilerStart()` has no effect.
-
-    cudaProfilerStart and cudaProfilerStop APIs are used to
-    programmatically control the profiling granularity by allowing
-    profiling to be done only on selective pieces of code.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaProfilerStop`, :py:obj:`~.cuProfilerStart`
-    """
-    err = cyruntime.cudaProfilerStart()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaProfilerStop():
-    """ Disable profiling.
-
-    Disables profile collection by the active profiling tool for the
-    current context. If profiling is already disabled, then
-    :py:obj:`~.cudaProfilerStop()` has no effect.
-
-    cudaProfilerStart and cudaProfilerStop APIs are used to
-    programmatically control the profiling granularity by allowing
-    profiling to be done only on selective pieces of code.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaProfilerStart`, :py:obj:`~.cuProfilerStop`
-    """
-    err = cyruntime.cudaProfilerStop()
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : cudaGLDeviceList):
-    """ Gets the CUDA devices associated with the current OpenGL context.
-
-    Returns in `*pCudaDeviceCount` the number of CUDA-compatible devices
-    corresponding to the current OpenGL context. Also returns in
-    `*pCudaDevices` at most `cudaDeviceCount` of the CUDA-compatible
-    devices corresponding to the current OpenGL context. If any of the GPUs
-    being used by the current OpenGL context are not CUDA capable then the
-    call will return cudaErrorNoDevice.
-
-    Parameters
-    ----------
-    cudaDeviceCount : unsigned int
-        The size of the output device array `pCudaDevices`
-    deviceList : cudaGLDeviceList
-        The set of devices to return. This set may be cudaGLDeviceListAll
-        for all devices, cudaGLDeviceListCurrentFrame for the devices used
-        to render the current frame (in SLI), or cudaGLDeviceListNextFrame
-        for the devices used to render the next frame (in SLI).
-
-    Returns
-    -------
-    cudaError_t
-        cudaSuccess
-        cudaErrorNoDevice
-        cudaErrorInvalidGraphicsContext
-        cudaErrorUnknown
-    pCudaDeviceCount : unsigned int
-        Returned number of CUDA devices corresponding to the current OpenGL
-        context
-    pCudaDevices : List[int]
-        Returned CUDA devices corresponding to the current OpenGL context
-
-    See Also
-    --------
-    ~.cudaGraphicsUnregisterResource
-    ~.cudaGraphicsMapResources
-    ~.cudaGraphicsSubResourceGetMappedArray
-    ~.cudaGraphicsResourceGetMappedPointer
-    ~.cuGLGetDevices
-
-    Notes
-    -----
-    This function is not supported on Mac OS X.
-
-    """
-    cdef unsigned int pCudaDeviceCount = 0
-    cdef int* cypCudaDevices = NULL
-    pypCudaDevices = []
-    if cudaDeviceCount != 0:
-        cypCudaDevices = <int*>calloc(cudaDeviceCount, sizeof(int))
-        if cypCudaDevices is NULL:
-            raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int)))
-    cdef cyruntime.cudaGLDeviceList cydeviceList = deviceList.value
-    err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList)
-    if cudaError_t(err) == cudaError_t(0):
-        pypCudaDevices = [<void_ptr>cypCudaDevices[idx] for idx in range(cudaDeviceCount)]
-    if cypCudaDevices is not NULL:
-        free(cypCudaDevices)
-    return (cudaError_t(err), pCudaDeviceCount, pypCudaDevices)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsGLRegisterImage(image, target, unsigned int flags):
-    """ Register an OpenGL texture or renderbuffer object.
-
-    Registers the texture or renderbuffer object specified by `image` for
-    access by CUDA. A handle to the registered object is returned as
-    `resource`.
-
-    `target` must match the type of the object, and must be one of
-    :py:obj:`~.GL_TEXTURE_2D`, :py:obj:`~.GL_TEXTURE_RECTANGLE`,
-    :py:obj:`~.GL_TEXTURE_CUBE_MAP`, :py:obj:`~.GL_TEXTURE_3D`,
-    :py:obj:`~.GL_TEXTURE_2D_ARRAY`, or :py:obj:`~.GL_RENDERBUFFER`.
-
-    The register flags `flags` specify the intended usage, as follows:
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
-      how this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
-      will not write to this resource.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
-      CUDA will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsSurfaceLoadStore`: Specifies that
-      CUDA will bind this resource to a surface reference.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsTextureGather`: Specifies that
-      CUDA will perform texture gather operations on this resource.
-
-    The following image formats are supported. For brevity's sake, the list
-    is abbreviated. For ex., {GL_R, GL_RG} X {8, 16} would expand to the
-    following 4 formats {GL_R8, GL_R16, GL_RG8, GL_RG16} :
-
-    - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA,
-      GL_INTENSITY
-
-    - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I,
-      32I}
-
-    - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16,
-      16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT,
-      32I_EXT}
-
-    The following image classes are currently disallowed:
-
-    - Textures with borders
-
-    - Multisampled renderbuffers
-
-    Parameters
-    ----------
-    image : :py:obj:`~.GLuint`
-        name of texture or renderbuffer object to be registered
-    target : :py:obj:`~.GLenum`
-        Identifies the type of object specified by `image`
-    flags : unsigned int
-        Register flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`
-    resource : :py:obj:`~.cudaGraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsGLRegisterImage`
-    """
-    cdef cyruntime.GLenum cytarget
-    if target is None:
-        cytarget = <cyruntime.GLenum><void_ptr>0
-    elif isinstance(target, (GLenum,)):
-        ptarget = int(target)
-        cytarget = <cyruntime.GLenum><void_ptr>ptarget
-    else:
-        ptarget = int(GLenum(target))
-        cytarget = <cyruntime.GLenum><void_ptr>ptarget
-    cdef cyruntime.GLuint cyimage
-    if image is None:
-        cyimage = <cyruntime.GLuint><void_ptr>0
-    elif isinstance(image, (GLuint,)):
-        pimage = int(image)
-        cyimage = <cyruntime.GLuint><void_ptr>pimage
-    else:
-        pimage = int(GLuint(image))
-        cyimage = <cyruntime.GLuint><void_ptr>pimage
-    cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
-    err = cyruntime.cudaGraphicsGLRegisterImage(resource._ptr, cyimage, cytarget, flags)
-    return (cudaError_t(err), resource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags):
-    """ Registers an OpenGL buffer object.
-
-    Registers the buffer object specified by `buffer` for access by CUDA. A
-    handle to the registered object is returned as `resource`. The register
-    flags `flags` specify the intended usage, as follows:
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
-      how this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
-      will not write to this resource.
-
-    - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
-      CUDA will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    Parameters
-    ----------
-    buffer : :py:obj:`~.GLuint`
-        name of buffer object to be registered
-    flags : unsigned int
-        Register flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`
-    resource : :py:obj:`~.cudaGraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsGLRegisterBuffer`
-    """
-    cdef cyruntime.GLuint cybuffer
-    if buffer is None:
-        cybuffer = <cyruntime.GLuint><void_ptr>0
-    elif isinstance(buffer, (GLuint,)):
-        pbuffer = int(buffer)
-        cybuffer = <cyruntime.GLuint><void_ptr>pbuffer
-    else:
-        pbuffer = int(GLuint(buffer))
-        cybuffer = <cyruntime.GLuint><void_ptr>pbuffer
-    cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
-    err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._ptr, cybuffer, flags)
-    return (cudaError_t(err), resource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress):
-    """ Gets the CUDA device associated with a VdpDevice.
-
-    Returns the CUDA device associated with a VdpDevice, if applicable.
-
-    Parameters
-    ----------
-    vdpDevice : :py:obj:`~.VdpDevice`
-        A VdpDevice handle
-    vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
-        VDPAU's VdpGetProcAddress function pointer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`
-    device : int
-        Returns the device associated with vdpDevice, or -1 if the device
-        associated with vdpDevice is not a compute device.
-
-    See Also
-    --------
-    :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cuVDPAUGetDevice`
-    """
-    cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress
-    if vdpGetProcAddress is None:
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL
-    elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
-        pvdpGetProcAddress = vdpGetProcAddress.getPtr()
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
-    elif isinstance(vdpGetProcAddress, (int)):
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
-    else:
-        raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
-    cdef cyruntime.VdpDevice cyvdpDevice
-    if vdpDevice is None:
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>0
-    elif isinstance(vdpDevice, (VdpDevice,)):
-        pvdpDevice = int(vdpDevice)
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
-    else:
-        pvdpDevice = int(VdpDevice(vdpDevice))
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
-    cdef int device = 0
-    err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress)
-    return (cudaError_t(err), device)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress):
-    """ Sets a CUDA device to use VDPAU interoperability.
-
-    Records `vdpDevice` as the VdpDevice for VDPAU interoperability with
-    the CUDA device `device` and sets `device` as the current device for
-    the calling host thread.
-
-    This function will immediately initialize the primary context on
-    `device` if needed.
-
-    If `device` has already been initialized then this call will fail with
-    the error :py:obj:`~.cudaErrorSetOnActiveProcess`. In this case it is
-    necessary to reset `device` using :py:obj:`~.cudaDeviceReset()` before
-    VDPAU interoperability on `device` may be enabled.
-
-    Parameters
-    ----------
-    device : int
-        Device to use for VDPAU interoperability
-    vdpDevice : :py:obj:`~.VdpDevice`
-        The VdpDevice to interoperate with
-    vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
-        VDPAU's VdpGetProcAddress function pointer
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`
-
-    See Also
-    --------
-    :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cudaDeviceReset`
-    """
-    cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress
-    if vdpGetProcAddress is None:
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL
-    elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
-        pvdpGetProcAddress = vdpGetProcAddress.getPtr()
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
-    elif isinstance(vdpGetProcAddress, (int)):
-        cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
-    else:
-        raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
-    cdef cyruntime.VdpDevice cyvdpDevice
-    if vdpDevice is None:
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>0
-    elif isinstance(vdpDevice, (VdpDevice,)):
-        pvdpDevice = int(vdpDevice)
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
-    else:
-        pvdpDevice = int(VdpDevice(vdpDevice))
-        cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
-    err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress)
-    return (cudaError_t(err),)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags):
-    """ Register a VdpVideoSurface object.
-
-    Registers the VdpVideoSurface specified by `vdpSurface` for access by
-    CUDA. A handle to the registered object is returned as `resource`. The
-    surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
-      this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
-      not write to this resource.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA
-      will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    Parameters
-    ----------
-    vdpSurface : :py:obj:`~.VdpVideoSurface`
-        VDPAU object to be registered
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-    resource : :py:obj:`~.cudaGraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`
-    """
-    cdef cyruntime.VdpVideoSurface cyvdpSurface
-    if vdpSurface is None:
-        cyvdpSurface = <cyruntime.VdpVideoSurface><void_ptr>0
-    elif isinstance(vdpSurface, (VdpVideoSurface,)):
-        pvdpSurface = int(vdpSurface)
-        cyvdpSurface = <cyruntime.VdpVideoSurface><void_ptr>pvdpSurface
-    else:
-        pvdpSurface = int(VdpVideoSurface(vdpSurface))
-        cyvdpSurface = <cyruntime.VdpVideoSurface><void_ptr>pvdpSurface
-    cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
-    err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._ptr, cyvdpSurface, flags)
-    return (cudaError_t(err), resource)
-{{endif}}
-
-{{if True}}
-
-@cython.embedsignature(True)
-def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags):
-    """ Register a VdpOutputSurface object.
-
-    Registers the VdpOutputSurface specified by `vdpSurface` for access by
-    CUDA. A handle to the registered object is returned as `resource`. The
-    surface's intended usage is specified using `flags`, as follows:
-
-    - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
-      this resource will be used. It is therefore assumed that this
-      resource will be read from and written to by CUDA. This is the
-      default value.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
-      not write to this resource.
-
-    - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA
-      will not read from this resource and will write over the entire
-      contents of the resource, so none of the data previously stored in
-      the resource will be preserved.
-
-    Parameters
-    ----------
-    vdpSurface : :py:obj:`~.VdpOutputSurface`
-        VDPAU object to be registered
-    flags : unsigned int
-        Map flags
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
-    resource : :py:obj:`~.cudaGraphicsResource`
-        Pointer to the returned object handle
-
-    See Also
-    --------
-    :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`
-    """
-    cdef cyruntime.VdpOutputSurface cyvdpSurface
-    if vdpSurface is None:
-        cyvdpSurface = <cyruntime.VdpOutputSurface><void_ptr>0
-    elif isinstance(vdpSurface, (VdpOutputSurface,)):
-        pvdpSurface = int(vdpSurface)
-        cyvdpSurface = <cyruntime.VdpOutputSurface><void_ptr>pvdpSurface
-    else:
-        pvdpSurface = int(VdpOutputSurface(vdpSurface))
-        cyvdpSurface = <cyruntime.VdpOutputSurface><void_ptr>pvdpSurface
-    cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
-    err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._ptr, cyvdpSurface, flags)
-    return (cudaError_t(err), resource)
-{{endif}}
-
-
-@cython.embedsignature(True)
-def getLocalRuntimeVersion():
-    """ Returns the CUDA Runtime version of local shared library.
-
-    Returns in `*runtimeVersion` the version number of the current CUDA
-    Runtime instance. The version is returned as (1000 * major + 10 *
-    minor). For example, CUDA 9.2 would be represented by 9020.
-
-    As of CUDA 12.0, this function no longer initializes CUDA. The purpose
-    of this API is solely to return a compile-time constant stating the
-    CUDA Toolkit version in the above format.
-
-    This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
-    if the `runtimeVersion` argument is NULL.
-
-    Returns
-    -------
-    cudaError_t
-        :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
-    runtimeVersion : int
-        Returns the CUDA Runtime version.
-
-    See Also
-    --------
-    :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`
-    """
-    cdef int runtimeVersion = 0
-    err = cyruntime.getLocalRuntimeVersion(&runtimeVersion)
-    return (cudaError_t(err), runtimeVersion)
-
-
-@cython.embedsignature(True)
-def sizeof(objType):
-    """ Returns the size of provided CUDA Python structure in bytes
-
-    Parameters
-    ----------
-    objType : Any
-        CUDA Python object
-
-    Returns
-    -------
-    lowered_name : int
-        The size of `objType` in bytes
-    """
-    {{if 'struct dim3' in found_types}}
-    if objType == dim3:
-        return sizeof(cyruntime.dim3){{endif}}
-    {{if 'struct cudaChannelFormatDesc' in found_types}}
-    if objType == cudaChannelFormatDesc:
-        return sizeof(cyruntime.cudaChannelFormatDesc){{endif}}
-    {{if 'cudaArray_t' in found_types}}
-    if objType == cudaArray_t:
-        return sizeof(cyruntime.cudaArray_t){{endif}}
-    {{if 'cudaArray_const_t' in found_types}}
-    if objType == cudaArray_const_t:
-        return sizeof(cyruntime.cudaArray_const_t){{endif}}
-    {{if 'cudaMipmappedArray_t' in found_types}}
-    if objType == cudaMipmappedArray_t:
-        return sizeof(cyruntime.cudaMipmappedArray_t){{endif}}
-    {{if 'cudaMipmappedArray_const_t' in found_types}}
-    if objType == cudaMipmappedArray_const_t:
-        return sizeof(cyruntime.cudaMipmappedArray_const_t){{endif}}
-    {{if 'struct cudaArraySparseProperties' in found_types}}
-    if objType == cudaArraySparseProperties:
-        return sizeof(cyruntime.cudaArraySparseProperties){{endif}}
-    {{if 'struct cudaArrayMemoryRequirements' in found_types}}
-    if objType == cudaArrayMemoryRequirements:
-        return sizeof(cyruntime.cudaArrayMemoryRequirements){{endif}}
-    {{if 'struct cudaPitchedPtr' in found_types}}
-    if objType == cudaPitchedPtr:
-        return sizeof(cyruntime.cudaPitchedPtr){{endif}}
-    {{if 'struct cudaExtent' in found_types}}
-    if objType == cudaExtent:
-        return sizeof(cyruntime.cudaExtent){{endif}}
-    {{if 'struct cudaPos' in found_types}}
-    if objType == cudaPos:
-        return sizeof(cyruntime.cudaPos){{endif}}
-    {{if 'struct cudaMemcpy3DParms' in found_types}}
-    if objType == cudaMemcpy3DParms:
-        return sizeof(cyruntime.cudaMemcpy3DParms){{endif}}
-    {{if 'struct cudaMemcpyNodeParams' in found_types}}
-    if objType == cudaMemcpyNodeParams:
-        return sizeof(cyruntime.cudaMemcpyNodeParams){{endif}}
-    {{if 'struct cudaMemcpy3DPeerParms' in found_types}}
-    if objType == cudaMemcpy3DPeerParms:
-        return sizeof(cyruntime.cudaMemcpy3DPeerParms){{endif}}
-    {{if 'struct cudaMemsetParams' in found_types}}
-    if objType == cudaMemsetParams:
-        return sizeof(cyruntime.cudaMemsetParams){{endif}}
-    {{if 'struct cudaMemsetParamsV2' in found_types}}
-    if objType == cudaMemsetParamsV2:
-        return sizeof(cyruntime.cudaMemsetParamsV2){{endif}}
-    {{if 'struct cudaAccessPolicyWindow' in found_types}}
-    if objType == cudaAccessPolicyWindow:
-        return sizeof(cyruntime.cudaAccessPolicyWindow){{endif}}
-    {{if 'cudaHostFn_t' in found_types}}
-    if objType == cudaHostFn_t:
-        return sizeof(cyruntime.cudaHostFn_t){{endif}}
-    {{if 'struct cudaHostNodeParams' in found_types}}
-    if objType == cudaHostNodeParams:
-        return sizeof(cyruntime.cudaHostNodeParams){{endif}}
-    {{if 'struct cudaHostNodeParamsV2' in found_types}}
-    if objType == cudaHostNodeParamsV2:
-        return sizeof(cyruntime.cudaHostNodeParamsV2){{endif}}
-    {{if 'struct cudaResourceDesc' in found_types}}
-    if objType == cudaResourceDesc:
-        return sizeof(cyruntime.cudaResourceDesc){{endif}}
-    {{if 'struct cudaResourceViewDesc' in found_types}}
-    if objType == cudaResourceViewDesc:
-        return sizeof(cyruntime.cudaResourceViewDesc){{endif}}
-    {{if 'struct cudaPointerAttributes' in found_types}}
-    if objType == cudaPointerAttributes:
-        return sizeof(cyruntime.cudaPointerAttributes){{endif}}
-    {{if 'struct cudaFuncAttributes' in found_types}}
-    if objType == cudaFuncAttributes:
-        return sizeof(cyruntime.cudaFuncAttributes){{endif}}
-    {{if 'struct cudaMemLocation' in found_types}}
-    if objType == cudaMemLocation:
-        return sizeof(cyruntime.cudaMemLocation){{endif}}
-    {{if 'struct cudaMemAccessDesc' in found_types}}
-    if objType == cudaMemAccessDesc:
-        return sizeof(cyruntime.cudaMemAccessDesc){{endif}}
-    {{if 'struct cudaMemPoolProps' in found_types}}
-    if objType == cudaMemPoolProps:
-        return sizeof(cyruntime.cudaMemPoolProps){{endif}}
-    {{if 'struct cudaMemPoolPtrExportData' in found_types}}
-    if objType == cudaMemPoolPtrExportData:
-        return sizeof(cyruntime.cudaMemPoolPtrExportData){{endif}}
-    {{if 'struct cudaMemAllocNodeParams' in found_types}}
-    if objType == cudaMemAllocNodeParams:
-        return sizeof(cyruntime.cudaMemAllocNodeParams){{endif}}
-    {{if 'struct cudaMemAllocNodeParamsV2' in found_types}}
-    if objType == cudaMemAllocNodeParamsV2:
-        return sizeof(cyruntime.cudaMemAllocNodeParamsV2){{endif}}
-    {{if 'struct cudaMemFreeNodeParams' in found_types}}
-    if objType == cudaMemFreeNodeParams:
-        return sizeof(cyruntime.cudaMemFreeNodeParams){{endif}}
-    {{if 'struct CUuuid_st' in found_types}}
-    if objType == CUuuid_st:
-        return sizeof(cyruntime.CUuuid_st){{endif}}
-    {{if 'CUuuid' in found_types}}
-    if objType == CUuuid:
-        return sizeof(cyruntime.CUuuid){{endif}}
-    {{if 'cudaUUID_t' in found_types}}
-    if objType == cudaUUID_t:
-        return sizeof(cyruntime.cudaUUID_t){{endif}}
-    {{if 'struct cudaDeviceProp' in found_types}}
-    if objType == cudaDeviceProp:
-        return sizeof(cyruntime.cudaDeviceProp){{endif}}
-    {{if 'struct cudaIpcEventHandle_st' in found_types}}
-    if objType == cudaIpcEventHandle_st:
-        return sizeof(cyruntime.cudaIpcEventHandle_st){{endif}}
-    {{if 'cudaIpcEventHandle_t' in found_types}}
-    if objType == cudaIpcEventHandle_t:
-        return sizeof(cyruntime.cudaIpcEventHandle_t){{endif}}
-    {{if 'struct cudaIpcMemHandle_st' in found_types}}
-    if objType == cudaIpcMemHandle_st:
-        return sizeof(cyruntime.cudaIpcMemHandle_st){{endif}}
-    {{if 'cudaIpcMemHandle_t' in found_types}}
-    if objType == cudaIpcMemHandle_t:
-        return sizeof(cyruntime.cudaIpcMemHandle_t){{endif}}
-    {{if 'struct cudaMemFabricHandle_st' in found_types}}
-    if objType == cudaMemFabricHandle_st:
-        return sizeof(cyruntime.cudaMemFabricHandle_st){{endif}}
-    {{if 'cudaMemFabricHandle_t' in found_types}}
-    if objType == cudaMemFabricHandle_t:
-        return sizeof(cyruntime.cudaMemFabricHandle_t){{endif}}
-    {{if 'struct cudaExternalMemoryHandleDesc' in found_types}}
-    if objType == cudaExternalMemoryHandleDesc:
-        return sizeof(cyruntime.cudaExternalMemoryHandleDesc){{endif}}
-    {{if 'struct cudaExternalMemoryBufferDesc' in found_types}}
-    if objType == cudaExternalMemoryBufferDesc:
-        return sizeof(cyruntime.cudaExternalMemoryBufferDesc){{endif}}
-    {{if 'struct cudaExternalMemoryMipmappedArrayDesc' in found_types}}
-    if objType == cudaExternalMemoryMipmappedArrayDesc:
-        return sizeof(cyruntime.cudaExternalMemoryMipmappedArrayDesc){{endif}}
-    {{if 'struct cudaExternalSemaphoreHandleDesc' in found_types}}
-    if objType == cudaExternalSemaphoreHandleDesc:
-        return sizeof(cyruntime.cudaExternalSemaphoreHandleDesc){{endif}}
-    {{if 'struct cudaExternalSemaphoreSignalParams' in found_types}}
-    if objType == cudaExternalSemaphoreSignalParams:
-        return sizeof(cyruntime.cudaExternalSemaphoreSignalParams){{endif}}
-    {{if 'struct cudaExternalSemaphoreWaitParams' in found_types}}
-    if objType == cudaExternalSemaphoreWaitParams:
-        return sizeof(cyruntime.cudaExternalSemaphoreWaitParams){{endif}}
-    {{if 'cudaStream_t' in found_types}}
-    if objType == cudaStream_t:
-        return sizeof(cyruntime.cudaStream_t){{endif}}
-    {{if 'cudaEvent_t' in found_types}}
-    if objType == cudaEvent_t:
-        return sizeof(cyruntime.cudaEvent_t){{endif}}
-    {{if 'cudaGraphicsResource_t' in found_types}}
-    if objType == cudaGraphicsResource_t:
-        return sizeof(cyruntime.cudaGraphicsResource_t){{endif}}
-    {{if 'cudaExternalMemory_t' in found_types}}
-    if objType == cudaExternalMemory_t:
-        return sizeof(cyruntime.cudaExternalMemory_t){{endif}}
-    {{if 'cudaExternalSemaphore_t' in found_types}}
-    if objType == cudaExternalSemaphore_t:
-        return sizeof(cyruntime.cudaExternalSemaphore_t){{endif}}
-    {{if 'cudaGraph_t' in found_types}}
-    if objType == cudaGraph_t:
-        return sizeof(cyruntime.cudaGraph_t){{endif}}
-    {{if 'cudaGraphNode_t' in found_types}}
-    if objType == cudaGraphNode_t:
-        return sizeof(cyruntime.cudaGraphNode_t){{endif}}
-    {{if 'cudaUserObject_t' in found_types}}
-    if objType == cudaUserObject_t:
-        return sizeof(cyruntime.cudaUserObject_t){{endif}}
-    {{if 'cudaGraphConditionalHandle' in found_types}}
-    if objType == cudaGraphConditionalHandle:
-        return sizeof(cyruntime.cudaGraphConditionalHandle){{endif}}
-    {{if 'cudaFunction_t' in found_types}}
-    if objType == cudaFunction_t:
-        return sizeof(cyruntime.cudaFunction_t){{endif}}
-    {{if 'cudaKernel_t' in found_types}}
-    if objType == cudaKernel_t:
-        return sizeof(cyruntime.cudaKernel_t){{endif}}
-    {{if 'cudaMemPool_t' in found_types}}
-    if objType == cudaMemPool_t:
-        return sizeof(cyruntime.cudaMemPool_t){{endif}}
-    {{if 'struct cudaKernelNodeParams' in found_types}}
-    if objType == cudaKernelNodeParams:
-        return sizeof(cyruntime.cudaKernelNodeParams){{endif}}
-    {{if 'struct cudaKernelNodeParamsV2' in found_types}}
-    if objType == cudaKernelNodeParamsV2:
-        return sizeof(cyruntime.cudaKernelNodeParamsV2){{endif}}
-    {{if 'struct cudaExternalSemaphoreSignalNodeParams' in found_types}}
-    if objType == cudaExternalSemaphoreSignalNodeParams:
-        return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParams){{endif}}
-    {{if 'struct cudaExternalSemaphoreSignalNodeParamsV2' in found_types}}
-    if objType == cudaExternalSemaphoreSignalNodeParamsV2:
-        return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParamsV2){{endif}}
-    {{if 'struct cudaExternalSemaphoreWaitNodeParams' in found_types}}
-    if objType == cudaExternalSemaphoreWaitNodeParams:
-        return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParams){{endif}}
-    {{if 'struct cudaExternalSemaphoreWaitNodeParamsV2' in found_types}}
-    if objType == cudaExternalSemaphoreWaitNodeParamsV2:
-        return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParamsV2){{endif}}
-    {{if 'struct cudaConditionalNodeParams' in found_types}}
-    if objType == cudaConditionalNodeParams:
-        return sizeof(cyruntime.cudaConditionalNodeParams){{endif}}
-    {{if 'struct cudaChildGraphNodeParams' in found_types}}
-    if objType == cudaChildGraphNodeParams:
-        return sizeof(cyruntime.cudaChildGraphNodeParams){{endif}}
-    {{if 'struct cudaEventRecordNodeParams' in found_types}}
-    if objType == cudaEventRecordNodeParams:
-        return sizeof(cyruntime.cudaEventRecordNodeParams){{endif}}
-    {{if 'struct cudaEventWaitNodeParams' in found_types}}
-    if objType == cudaEventWaitNodeParams:
-        return sizeof(cyruntime.cudaEventWaitNodeParams){{endif}}
-    {{if 'struct cudaGraphNodeParams' in found_types}}
-    if objType == cudaGraphNodeParams:
-        return sizeof(cyruntime.cudaGraphNodeParams){{endif}}
-    {{if 'struct cudaGraphEdgeData_st' in found_types}}
-    if objType == cudaGraphEdgeData_st:
-        return sizeof(cyruntime.cudaGraphEdgeData_st){{endif}}
-    {{if 'cudaGraphEdgeData' in found_types}}
-    if objType == cudaGraphEdgeData:
-        return sizeof(cyruntime.cudaGraphEdgeData){{endif}}
-    {{if 'cudaGraphExec_t' in found_types}}
-    if objType == cudaGraphExec_t:
-        return sizeof(cyruntime.cudaGraphExec_t){{endif}}
-    {{if 'struct cudaGraphInstantiateParams_st' in found_types}}
-    if objType == cudaGraphInstantiateParams_st:
-        return sizeof(cyruntime.cudaGraphInstantiateParams_st){{endif}}
-    {{if 'cudaGraphInstantiateParams' in found_types}}
-    if objType == cudaGraphInstantiateParams:
-        return sizeof(cyruntime.cudaGraphInstantiateParams){{endif}}
-    {{if 'struct cudaGraphExecUpdateResultInfo_st' in found_types}}
-    if objType == cudaGraphExecUpdateResultInfo_st:
-        return sizeof(cyruntime.cudaGraphExecUpdateResultInfo_st){{endif}}
-    {{if 'cudaGraphExecUpdateResultInfo' in found_types}}
-    if objType == cudaGraphExecUpdateResultInfo:
-        return sizeof(cyruntime.cudaGraphExecUpdateResultInfo){{endif}}
-    {{if 'cudaGraphDeviceNode_t' in found_types}}
-    if objType == cudaGraphDeviceNode_t:
-        return sizeof(cyruntime.cudaGraphDeviceNode_t){{endif}}
-    {{if 'struct cudaGraphKernelNodeUpdate' in found_types}}
-    if objType == cudaGraphKernelNodeUpdate:
-        return sizeof(cyruntime.cudaGraphKernelNodeUpdate){{endif}}
-    {{if 'struct cudaLaunchMemSyncDomainMap_st' in found_types}}
-    if objType == cudaLaunchMemSyncDomainMap_st:
-        return sizeof(cyruntime.cudaLaunchMemSyncDomainMap_st){{endif}}
-    {{if 'cudaLaunchMemSyncDomainMap' in found_types}}
-    if objType == cudaLaunchMemSyncDomainMap:
-        return sizeof(cyruntime.cudaLaunchMemSyncDomainMap){{endif}}
-    {{if 'union cudaLaunchAttributeValue' in found_types}}
-    if objType == cudaLaunchAttributeValue:
-        return sizeof(cyruntime.cudaLaunchAttributeValue){{endif}}
-    {{if 'struct cudaLaunchAttribute_st' in found_types}}
-    if objType == cudaLaunchAttribute_st:
-        return sizeof(cyruntime.cudaLaunchAttribute_st){{endif}}
-    {{if 'cudaLaunchAttribute' in found_types}}
-    if objType == cudaLaunchAttribute:
-        return sizeof(cyruntime.cudaLaunchAttribute){{endif}}
-    {{if 'cudaAsyncCallbackHandle_t' in found_types}}
-    if objType == cudaAsyncCallbackHandle_t:
-        return sizeof(cyruntime.cudaAsyncCallbackHandle_t){{endif}}
-    {{if 'struct cudaAsyncNotificationInfo' in found_types}}
-    if objType == cudaAsyncNotificationInfo:
-        return sizeof(cyruntime.cudaAsyncNotificationInfo){{endif}}
-    {{if 'cudaAsyncNotificationInfo_t' in found_types}}
-    if objType == cudaAsyncNotificationInfo_t:
-        return sizeof(cyruntime.cudaAsyncNotificationInfo_t){{endif}}
-    {{if 'cudaAsyncCallback' in found_types}}
-    if objType == cudaAsyncCallback:
-        return sizeof(cyruntime.cudaAsyncCallback){{endif}}
-    {{if 'cudaSurfaceObject_t' in found_types}}
-    if objType == cudaSurfaceObject_t:
-        return sizeof(cyruntime.cudaSurfaceObject_t){{endif}}
-    {{if 'struct cudaTextureDesc' in found_types}}
-    if objType == cudaTextureDesc:
-        return sizeof(cyruntime.cudaTextureDesc){{endif}}
-    {{if 'cudaTextureObject_t' in found_types}}
-    if objType == cudaTextureObject_t:
-        return sizeof(cyruntime.cudaTextureObject_t){{endif}}
-    {{if 'cudaStreamCallback_t' in found_types}}
-    if objType == cudaStreamCallback_t:
-        return sizeof(cyruntime.cudaStreamCallback_t){{endif}}
-    {{if True}}
-    if objType == GLenum:
-        return sizeof(cyruntime.GLenum){{endif}}
-    {{if True}}
-    if objType == GLuint:
-        return sizeof(cyruntime.GLuint){{endif}}
-    {{if True}}
-    if objType == EGLImageKHR:
-        return sizeof(cyruntime.EGLImageKHR){{endif}}
-    {{if True}}
-    if objType == EGLStreamKHR:
-        return sizeof(cyruntime.EGLStreamKHR){{endif}}
-    {{if True}}
-    if objType == EGLint:
-        return sizeof(cyruntime.EGLint){{endif}}
-    {{if True}}
-    if objType == EGLSyncKHR:
-        return sizeof(cyruntime.EGLSyncKHR){{endif}}
-    {{if True}}
-    if objType == VdpDevice:
-        return sizeof(cyruntime.VdpDevice){{endif}}
-    {{if True}}
-    if objType == VdpGetProcAddress:
-        return sizeof(cyruntime.VdpGetProcAddress){{endif}}
-    {{if True}}
-    if objType == VdpVideoSurface:
-        return sizeof(cyruntime.VdpVideoSurface){{endif}}
-    {{if True}}
-    if objType == VdpOutputSurface:
-        return sizeof(cyruntime.VdpOutputSurface){{endif}}
-    {{if True}}
-    if objType == cudaStreamAttrValue:
-        return sizeof(cyruntime.cudaStreamAttrValue){{endif}}
-    {{if True}}
-    if objType == cudaKernelNodeAttrValue:
-        return sizeof(cyruntime.cudaKernelNodeAttrValue){{endif}}
-    {{if True}}
-    if objType == cudaEglPlaneDesc_st:
-        return sizeof(cyruntime.cudaEglPlaneDesc_st){{endif}}
-    {{if True}}
-    if objType == cudaEglPlaneDesc:
-        return sizeof(cyruntime.cudaEglPlaneDesc){{endif}}
-    {{if True}}
-    if objType == cudaEglFrame_st:
-        return sizeof(cyruntime.cudaEglFrame_st){{endif}}
-    {{if True}}
-    if objType == cudaEglFrame:
-        return sizeof(cyruntime.cudaEglFrame){{endif}}
-    {{if True}}
-    if objType == cudaEglStreamConnection:
-        return sizeof(cyruntime.cudaEglStreamConnection){{endif}}
-    raise TypeError("Unknown type: " + str(objType))
diff --git a/cuda_bindings/cuda/ccuda.pxd b/cuda_bindings/cuda/ccuda.pxd
deleted file mode 100644
index 73f3fc5c..00000000
--- a/cuda_bindings/cuda/ccuda.pxd
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cydriver cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.ccuda module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cydriver module instead." )
-    """
diff --git a/cuda_bindings/cuda/ccuda.pyx b/cuda_bindings/cuda/ccuda.pyx
deleted file mode 100644
index 73f3fc5c..00000000
--- a/cuda_bindings/cuda/ccuda.pyx
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cydriver cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.ccuda module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cydriver module instead." )
-    """
diff --git a/cuda_bindings/cuda/ccudart.pxd b/cuda_bindings/cuda/ccudart.pxd
deleted file mode 100644
index b32eece8..00000000
--- a/cuda_bindings/cuda/ccudart.pxd
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cyruntime cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.ccudart module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cyruntime module instead." )
-    """
diff --git a/cuda_bindings/cuda/ccudart.pyx b/cuda_bindings/cuda/ccudart.pyx
deleted file mode 100644
index b32eece8..00000000
--- a/cuda_bindings/cuda/ccudart.pyx
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cyruntime cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.ccudart module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cyruntime module instead." )
-    """
diff --git a/cuda_bindings/cuda/cnvrtc.pxd b/cuda_bindings/cuda/cnvrtc.pxd
deleted file mode 100644
index d4034084..00000000
--- a/cuda_bindings/cuda/cnvrtc.pxd
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cynvrtc cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.cnvrtc module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cynvrtc module instead." )
-    """
diff --git a/cuda_bindings/cuda/cnvrtc.pyx b/cuda_bindings/cuda/cnvrtc.pyx
deleted file mode 100644
index d4034084..00000000
--- a/cuda_bindings/cuda/cnvrtc.pyx
+++ /dev/null
@@ -1,7 +0,0 @@
-from cuda.bindings.cynvrtc cimport *
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.cnvrtc module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.cynvrtc module instead." )
-    """
diff --git a/cuda_bindings/cuda/cuda.pyx b/cuda_bindings/cuda/cuda.pyx
deleted file mode 100644
index f8b197f7..00000000
--- a/cuda_bindings/cuda/cuda.pyx
+++ /dev/null
@@ -1,14 +0,0 @@
-import warnings as _warnings
-
-from cuda.bindings.driver import *
-
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.cuda module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.driver module instead." )
-    """
-
-
-_warnings.warn("The cuda.cuda module is deprecated and will be removed in a future release, "
-               "please switch to use the cuda.bindings.driver module instead.", DeprecationWarning, stacklevel=2)
diff --git a/cuda_bindings/cuda/cudart.pyx b/cuda_bindings/cuda/cudart.pyx
deleted file mode 100644
index 8c342df8..00000000
--- a/cuda_bindings/cuda/cudart.pyx
+++ /dev/null
@@ -1,14 +0,0 @@
-import warnings as _warnings
-
-from cuda.bindings.runtime import *
-
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.cudart module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.runtime module instead." )
-    """
-
-
-_warnings.warn("The cuda.cudart module is deprecated and will be removed in a future release, "
-               "please switch to use the cuda.bindings.runtime module instead.", DeprecationWarning, stacklevel=2)
diff --git a/cuda_bindings/cuda/nvrtc.pyx b/cuda_bindings/cuda/nvrtc.pyx
deleted file mode 100644
index 4a9e048f..00000000
--- a/cuda_bindings/cuda/nvrtc.pyx
+++ /dev/null
@@ -1,14 +0,0 @@
-import warnings as _warnings
-
-from cuda.bindings.nvrtc import *
-
-
-cdef extern from *:
-    """
-    #pragma message ( "The cuda.nvrtc module is deprecated and will be removed in a future release, " \
-                      "please switch to use the cuda.bindings.nvrtc module instead." )
-    """
-
-
-_warnings.warn("The cuda.nvrtc module is deprecated and will be removed in a future release, "
-               "please switch to use the cuda.bindings.nvrtc module instead.", DeprecationWarning, stacklevel=2)
diff --git a/cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py b/cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py
deleted file mode 100644
index 49a7a1e0..00000000
--- a/cuda_bindings/examples/0_Introduction/clock_nvrtc_test.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import numpy as np
-from cuda import cuda
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-
-clock_nvrtc = '''\
-extern "C" __global__  void timedReduction(const float *hinput, float *output, clock_t *timer)
-{
-    // __shared__ float shared[2 * blockDim.x];
-    extern __shared__ float shared[];
-
-    const int tid = threadIdx.x;
-    const int bid = blockIdx.x;
-
-    if (tid == 0) timer[bid] = clock();
-
-    // Copy hinput.
-    shared[tid] = hinput[tid];
-    shared[tid + blockDim.x] = hinput[tid + blockDim.x];
-
-    // Perform reduction to find minimum.
-    for (int d = blockDim.x; d > 0; d /= 2)
-    {
-        __syncthreads();
-
-        if (tid < d)
-        {
-            float f0 = shared[tid];
-            float f1 = shared[tid + d];
-
-            if (f1 < f0)
-            {
-                shared[tid] = f1;
-            }
-        }
-    }
-
-    // Write result.
-    if (tid == 0) output[bid] = shared[0];
-
-    __syncthreads();
-
-    if (tid == 0) timer[bid+gridDim.x] = clock();
-}
-'''
-
-NUM_BLOCKS = 64 
-NUM_THREADS  = 256
-
-def main():
-    print("CUDA Clock sample")
-
-    timer = np.empty(NUM_BLOCKS * 2, dtype='int64')
-    hinput = np.empty(NUM_THREADS * 2, dtype='float32')
-
-    for i in range(0, NUM_THREADS * 2):
-        hinput[i] = i
-
-    devID = findCudaDevice()
-    kernelHelper = common.KernelHelper(clock_nvrtc, devID)
-    kernel_addr = kernelHelper.getFunction(b'timedReduction')
-
-    dinput = checkCudaErrors(cuda.cuMemAlloc(np.dtype(np.float32).itemsize * NUM_THREADS * 2))
-    doutput = checkCudaErrors(cuda.cuMemAlloc(np.dtype(np.float32).itemsize * NUM_BLOCKS))
-    dtimer = checkCudaErrors(cuda.cuMemAlloc(np.dtype(np.int64).itemsize * NUM_BLOCKS * 2))
-    checkCudaErrors(cuda.cuMemcpyHtoD(dinput, hinput, np.dtype(np.float32).itemsize * NUM_THREADS * 2))
-
-
-
-    arr = ((dinput, doutput, dtimer),
-           (None, None, None))
-
-    checkCudaErrors(cuda.cuLaunchKernel(kernel_addr,
-                                        NUM_BLOCKS, 1, 1,  # grid dim
-                                        NUM_THREADS, 1, 1, # block dim
-                                        np.dtype(np.float32).itemsize * 2 *NUM_THREADS, 0, # shared mem, stream
-                                        arr, 0)) # arguments
-
-    checkCudaErrors(cuda.cuCtxSynchronize())
-    checkCudaErrors(cuda.cuMemcpyDtoH(timer, dtimer, np.dtype(np.int64).itemsize * NUM_BLOCKS * 2))
-    checkCudaErrors(cuda.cuMemFree(dinput))
-    checkCudaErrors(cuda.cuMemFree(doutput))
-    checkCudaErrors(cuda.cuMemFree(dtimer))
-
-    avgElapsedClocks = 0.0
-
-    for i in range(0,NUM_BLOCKS):
-        avgElapsedClocks += timer[i + NUM_BLOCKS] - timer[i]
-
-    avgElapsedClocks = avgElapsedClocks/NUM_BLOCKS;
-    print("Average clocks/block = {}".format(avgElapsedClocks))
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py b/cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py
deleted file mode 100644
index adb5a560..00000000
--- a/cuda_bindings/examples/0_Introduction/simpleCubemapTexture_test.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import sys
-import time
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-
-simpleCubemapTexture = '''\
-extern "C"
-__global__ void transformKernel(float *g_odata, int width, cudaTextureObject_t tex)
-{
-    // calculate this thread's data point
-    unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
-    unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
-
-    // 0.5f offset and division are necessary to access the original data points
-    // in the texture (such that bilinear interpolation will not be activated).
-    // For details, see also CUDA Programming Guide, Appendix D
-
-    float u = ((x+0.5f) / (float) width) * 2.f - 1.f;
-    float v = ((y+0.5f) / (float) width) * 2.f - 1.f;
-
-    float cx, cy, cz;
-
-    for (unsigned int face = 0; face < 6; face ++)
-    {
-        //Layer 0 is positive X face
-        if (face == 0)
-        {
-            cx = 1;
-            cy = -v;
-            cz = -u;
-        }
-        //Layer 1 is negative X face
-        else if (face == 1)
-        {
-            cx = -1;
-            cy = -v;
-            cz = u;
-        }
-        //Layer 2 is positive Y face
-        else if (face == 2)
-        {
-            cx = u;
-            cy = 1;
-            cz = v;
-        }
-        //Layer 3 is negative Y face
-        else if (face == 3)
-        {
-            cx = u;
-            cy = -1;
-            cz = -v;
-        }
-        //Layer 4 is positive Z face
-        else if (face == 4)
-        {
-            cx = u;
-            cy = -v;
-            cz = 1;
-        }
-        //Layer 4 is negative Z face
-        else if (face == 5)
-        {
-            cx = -u;
-            cy = -v;
-            cz = -1;
-        }
-
-        // read from texture, do expected transformation and write to global memory
-        g_odata[face*width*width + y*width + x] = -texCubemap<float>(tex, cx, cy, cz);
-    }
-}
-'''
-
-def main():
-    # Use command-line specified CUDA device, otherwise use device with highest Gflops/s
-    devID = findCudaDevice()
-
-    # Get number of SMs on this GPU
-    deviceProps = checkCudaErrors(cudart.cudaGetDeviceProperties(devID));
-    print("CUDA device [{}] has {} Multi-Processors SM {}.{}".format(deviceProps.name,
-                                                                     deviceProps.multiProcessorCount,
-                                                                     deviceProps.major,
-                                                                     deviceProps.minor))
-    if (deviceProps.major < 2):
-        print("{} requires SM 2.0 or higher for support of Texture Arrays.  Test will exit...".format(sSDKname))
-        sys.exit()
-
-    # Generate input data for layered texture
-    width = 64
-    num_faces = 6
-    num_layers = 1
-    cubemap_size = width * width * num_faces
-    size = cubemap_size * num_layers * np.dtype(np.float32).itemsize
-    h_data = np.zeros(cubemap_size * num_layers, dtype='float32')
-
-    for i in range(cubemap_size * num_layers):
-        h_data[i] = i
-
-    # This is the expected transformation of the input data (the expected output)
-    h_data_ref = np.zeros(cubemap_size * num_layers, dtype='float32')
-
-    for layer in range(num_layers):
-        for i in range(cubemap_size):
-            h_data_ref[layer*cubemap_size + i] = -h_data[layer*cubemap_size + i] + layer
-
-    # Allocate device memory for result
-    d_data = checkCudaErrors(cudart.cudaMalloc(size))
-
-    # Allocate array and copy image data
-    channelDesc = checkCudaErrors(cudart.cudaCreateChannelDesc(32, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat))
-    cu_3darray = checkCudaErrors(cudart.cudaMalloc3DArray(channelDesc, cudart.make_cudaExtent(width, width, num_faces), cudart.cudaArrayCubemap))
-    myparms = cudart.cudaMemcpy3DParms()
-    myparms.srcPos = cudart.make_cudaPos(0,0,0)
-    myparms.dstPos = cudart.make_cudaPos(0,0,0)
-    myparms.srcPtr = cudart.make_cudaPitchedPtr(h_data, width * np.dtype(np.float32).itemsize, width, width)
-    myparms.dstArray = cu_3darray
-    myparms.extent = cudart.make_cudaExtent(width, width, num_faces)
-    myparms.kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    checkCudaErrors(cudart.cudaMemcpy3D(myparms))
-
-    texRes = cudart.cudaResourceDesc()
-    texRes.resType            = cudart.cudaResourceType.cudaResourceTypeArray
-    texRes.res.array.array    = cu_3darray
-
-    texDescr = cudart.cudaTextureDesc()
-    texDescr.normalizedCoords = True
-    texDescr.filterMode       = cudart.cudaTextureFilterMode.cudaFilterModeLinear
-    texDescr.addressMode[0] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
-    texDescr.addressMode[1] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
-    texDescr.addressMode[2] = cudart.cudaTextureAddressMode.cudaAddressModeWrap
-    texDescr.readMode = cudart.cudaTextureReadMode.cudaReadModeElementType
-
-    tex = checkCudaErrors(cudart.cudaCreateTextureObject(texRes, texDescr, None))
-    dimBlock = cudart.dim3()
-    dimBlock.x = 8
-    dimBlock.y = 8
-    dimBlock.z = 1
-    dimGrid = cudart.dim3()
-    dimGrid.x = width / dimBlock.x
-    dimGrid.y = width / dimBlock.y
-    dimGrid.z = 1
-
-    print("Covering Cubemap data array of {}~3 x {}: Grid size is {} x {}, each block has 8 x 8 threads".format(
-           width, num_layers, dimGrid.x, dimGrid.y))
-
-    kernelHelper = common.KernelHelper(simpleCubemapTexture, devID)
-    _transformKernel = kernelHelper.getFunction(b'transformKernel')
-    kernelArgs = ((d_data, width, tex),(ctypes.c_void_p, ctypes.c_int, None))
-    checkCudaErrors(cuda.cuLaunchKernel(_transformKernel,
-                                        dimGrid.x, dimGrid.y, dimGrid.z,         # grid dim
-                                        dimBlock.x, dimBlock.y, dimBlock.z,      # block dim
-                                        0, 0,                                    # shared mem and stream
-                                        kernelArgs, 0))                          # arguments
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    start = time.time()
-
-    # Execute the kernel
-    checkCudaErrors(cuda.cuLaunchKernel(_transformKernel,
-                                        dimGrid.x, dimGrid.y, dimGrid.z,         # grid dim
-                                        dimBlock.x, dimBlock.y, dimBlock.z,      # block dim
-                                        0, 0,                                    # shared mem and stream
-                                        kernelArgs, 0))                          # arguments
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-    stop = time.time()
-    print("Processing time: {:.3f} msec".format(stop - start))
-    print("{:.2f} Mtexlookups/sec".format(cubemap_size / ((stop - start + 1) / 1000.0) / 1e6))
-
-    # Allocate mem for the result on host side
-    h_odata = np.zeros(cubemap_size * num_layers, dtype='float32')
-    # Copy result from device to host
-    checkCudaErrors(cudart.cudaMemcpy(h_odata, d_data, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
-
-    print("Comparing kernel output to expected data")
-    MIN_EPSILON_ERROR = 5.0e-3
-    for i in range(cubemap_size * num_layers):
-        d = h_odata[i] - h_data_ref[i]
-        if math.fabs(d) > MIN_EPSILON_ERROR:
-            print("Failed")
-            sys.exit(-1)
-    print("Passed")
-
-    checkCudaErrors(cudart.cudaDestroyTextureObject(tex))
-    checkCudaErrors(cudart.cudaFree(d_data))
-    checkCudaErrors(cudart.cudaFreeArray(cu_3darray))
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/simpleP2P_test.py b/cuda_bindings/examples/0_Introduction/simpleP2P_test.py
deleted file mode 100644
index d4d17de1..00000000
--- a/cuda_bindings/examples/0_Introduction/simpleP2P_test.py
+++ /dev/null
@@ -1,206 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import numpy as np
-import sys
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors
-
-simplep2p = '''\
-extern "C"
-__global__ void SimpleKernel(float *src, float *dst)
-{
-    // Just a dummy kernel, doing enough for us to verify that everything
-    // worked
-    const int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    dst[idx] = src[idx] * 2.0f;
-}
-'''
-
-def main():
-    print("Starting...")
-
-    # Number of GPUs
-    print("Checking for multiple GPUs...")
-    gpu_n = checkCudaErrors(cudart.cudaGetDeviceCount())
-    print("CUDA-capable device count: {}".format(gpu_n))
-
-    if gpu_n < 2:
-        print("Two or more GPUs with Peer-to-Peer access capability are required")
-        return
-
-    prop = [checkCudaErrors(cudart.cudaGetDeviceProperties(i)) for i in range(gpu_n)]
-    # Check possibility for peer access
-    print("\nChecking GPU(s) for support of peer to peer memory access...")
-
-    p2pCapableGPUs = [-1, -1]
-    for i in range(gpu_n):
-        p2pCapableGPUs[0] = i
-        for j in range(gpu_n):
-            if i == j:
-                continue
-            i_access_j = checkCudaErrors(cudart.cudaDeviceCanAccessPeer(i, j))
-            j_access_i = checkCudaErrors(cudart.cudaDeviceCanAccessPeer(j, i))
-            print("> Peer access from {} (GPU{}) -> {} (GPU{}) : {}\n".format(
-                    prop[i].name, i, prop[j].name, j, "Yes" if i_access_j else "No"))
-            print("> Peer access from {} (GPU{}) -> {} (GPU{}) : {}\n".format(
-                    prop[j].name, j, prop[i].name, i, "Yes" if i_access_j else "No"))
-            if i_access_j and j_access_i:
-                p2pCapableGPUs[1] = j
-                break
-        if p2pCapableGPUs[1] != -1:
-            break
-
-    if p2pCapableGPUs[0] == -1 or p2pCapableGPUs[1] == -1:
-        print("Two or more GPUs with Peer-to-Peer access capability are required.")
-        print("Peer to Peer access is not available amongst GPUs in the system, waiving test.")
-        return
-
-    # Use first pair of p2p capable GPUs detected
-    gpuid = [p2pCapableGPUs[0], p2pCapableGPUs[1]]
-
-    # Enable peer access
-    print("Enabling peer access between GPU{} and GPU{}...".format(gpuid[0], gpuid[1]))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    checkCudaErrors(cudart.cudaDeviceEnablePeerAccess(gpuid[1], 0))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[1]))
-    checkCudaErrors(cudart.cudaDeviceEnablePeerAccess(gpuid[0], 0))
-
-    # Allocate buffers
-    buf_size = 1024 * 1024 * 16 * np.dtype(np.float32).itemsize
-    print("Allocating buffers ({}MB on GPU{}, GPU{} and CPU Host)...".format(int(buf_size / 1024 / 1024), gpuid[0], gpuid[1]))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    g0 = checkCudaErrors(cudart.cudaMalloc(buf_size))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[1]))
-    g1 = checkCudaErrors(cudart.cudaMalloc(buf_size))
-    h0 = checkCudaErrors(cudart.cudaMallocHost(buf_size)) # Automatically portable with UVA
-
-    # Create CUDA event handles
-    print("Creating event handles...")
-    eventflags = cudart.cudaEventBlockingSync
-    start_event = checkCudaErrors(cudart.cudaEventCreateWithFlags(eventflags))
-    stop_event = checkCudaErrors(cudart.cudaEventCreateWithFlags(eventflags))
-
-    # P2P memcopy() benchmark
-    checkCudaErrors(cudart.cudaEventRecord(start_event, cudart.cudaStream_t(0)))
-
-    for i in range(100):
-        # With UVA we don't need to specify source and target devices, the
-        # runtime figures this out by itself from the pointers
-        # Ping-pong copy between GPUs
-        if i % 2 == 0:
-            checkCudaErrors(cudart.cudaMemcpy(g1, g0, buf_size, cudart.cudaMemcpyKind.cudaMemcpyDefault))
-        else:
-            checkCudaErrors(cudart.cudaMemcpy(g0, g1, buf_size, cudart.cudaMemcpyKind.cudaMemcpyDefault))
-
-    checkCudaErrors(cudart.cudaEventRecord(stop_event, cudart.cudaStream_t(0)))
-    checkCudaErrors(cudart.cudaEventSynchronize(stop_event))
-    time_memcpy = checkCudaErrors(cudart.cudaEventElapsedTime(start_event, stop_event))
-    print("cudaMemcpyPeer / cudaMemcpy between GPU{} and GPU{}: {:.2f}GB/s".format(gpuid[0], gpuid[1],
-            (1.0 / (time_memcpy / 1000.0)) * ((100.0 * buf_size)) / 1024.0 / 1024.0 / 1024.0))
-
-    # Prepare host buffer and copy to GPU 0
-    print("Preparing host buffer and memcpy to GPU{}...".format(gpuid[0]))
-
-    h0_local = (ctypes.c_float * int(buf_size / np.dtype(np.float32).itemsize)).from_address(h0)
-    for i in range(int(buf_size / np.dtype(np.float32).itemsize)):
-        h0_local[i] = i % 4096
-
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    checkCudaErrors(cudart.cudaMemcpy(g0, h0, buf_size, cudart.cudaMemcpyKind.cudaMemcpyDefault))
-
-    # Kernel launch configuration
-    threads = cudart.dim3()
-    threads.x = 512
-    threads.y = 1
-    threads.z = 1
-    blocks = cudart.dim3()
-    blocks.x = (buf_size / np.dtype(np.float32).itemsize) / threads.x
-    blocks.y = 1
-    blocks.z = 1
-
-    # Run kernel on GPU 1, reading input from the GPU 0 buffer, writing
-    # output to the GPU 1 buffer
-    print("Run kernel on GPU{}, taking source data from GPU{} and writing to GPU{}...".format(
-          gpuid[1], gpuid[0], gpuid[1]))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[1]))
-
-    kernelHelper = [None]*2
-    _simpleKernel = [None]*2
-    kernelArgs = [None]*2
-
-    kernelHelper[1] = common.KernelHelper(simplep2p, gpuid[1])
-    _simpleKernel[1] = kernelHelper[1].getFunction(b'SimpleKernel')
-    kernelArgs[1] = ((g0, g1), (ctypes.c_void_p, ctypes.c_void_p))
-    checkCudaErrors(cuda.cuLaunchKernel(_simpleKernel[1],
-                                        blocks.x, blocks.y, blocks.z,
-                                        threads.x, threads.y, threads.z,
-                                        0, 0,
-                                        kernelArgs[1], 0))
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    # Run kernel on GPU 0, reading input from the GPU 1 buffer, writing
-    # output to the GPU 0 buffer
-    print("Run kernel on GPU{}, taking source data from GPU{} and writing to GPU{}...".format(
-          gpuid[0], gpuid[1], gpuid[0]))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    kernelHelper[0] = common.KernelHelper(simplep2p, gpuid[0])
-    _simpleKernel[0] = kernelHelper[0].getFunction(b'SimpleKernel')
-    kernelArgs[0] = ((g1, g0), (ctypes.c_void_p, ctypes.c_void_p))
-    checkCudaErrors(cuda.cuLaunchKernel(_simpleKernel[0],
-                                        blocks.x, blocks.y, blocks.z,
-                                        threads.x, threads.y, threads.z,
-                                        0, 0,
-                                        kernelArgs[0], 0))
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    # Copy data back to host and verify
-    print("Copy data back to host from GPU{} and verify results...".format(gpuid[0]))
-    checkCudaErrors(cudart.cudaMemcpy(h0, g0, buf_size, cudart.cudaMemcpyKind.cudaMemcpyDefault))
-
-    error_count = 0
-
-    for i in range(int(buf_size / np.dtype(np.float32).itemsize)):
-        # Re-generate input data and apply 2x '* 2.0f' computation of both
-        # kernel runs
-        if h0_local[i] != float(i % 4096) * 2.0 * 2.0:
-            print("Verification error @ element {}: val = {}, ref = {}\n".format(i, h0_local[i], (float(i%4096)*2.0*2.0)))
-            error_count += 1
-            if error_count > 10:
-                break
-
-    # Disable peer access (also unregisters memory for non-UVA cases)
-    print("Disabling peer access...")
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    checkCudaErrors(cudart.cudaDeviceDisablePeerAccess(gpuid[1]))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[1]))
-    checkCudaErrors(cudart.cudaDeviceDisablePeerAccess(gpuid[0]))
-
-    # Cleanup and shutdown
-    print("Shutting down...")
-    checkCudaErrors(cudart.cudaEventDestroy(start_event))
-    checkCudaErrors(cudart.cudaEventDestroy(stop_event))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[0]))
-    checkCudaErrors(cudart.cudaFree(g0))
-    checkCudaErrors(cudart.cudaSetDevice(gpuid[1]))
-    checkCudaErrors(cudart.cudaFree(g1))
-    checkCudaErrors(cudart.cudaFreeHost(h0))
-
-    for i in range(gpu_n):
-        checkCudaErrors(cudart.cudaSetDevice(i))
-
-    if error_count != 0:
-        print("Test failed!")
-        sys.exit(-1)
-    print("Test passed!")
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/simpleZeroCopy_test.py b/cuda_bindings/examples/0_Introduction/simpleZeroCopy_test.py
deleted file mode 100644
index 834eb32a..00000000
--- a/cuda_bindings/examples/0_Introduction/simpleZeroCopy_test.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import random as rnd
-import sys
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors
-from common.helper_string import checkCmdLineFlag
-
-simpleZeroCopy = '''\
-extern "C"
-__global__ void vectorAddGPU(float *a, float *b, float *c, int N)
-{
-    int idx = blockIdx.x*blockDim.x + threadIdx.x;
-
-    if (idx < N)
-    {
-        c[idx] = a[idx] + b[idx];
-    }
-}
-'''
-
-def main():
-    idev = 0
-    bPinGenericMemory = False
-
-    if checkCmdLineFlag("help"):
-        print("Usage:  simpleZeroCopy [OPTION]\n")
-        print("Options:")
-        print("  device=[device #]  Specify the device to be used")
-        print("  use_generic_memory (optional) use generic page-aligned for system memory")
-        return
-
-    # Get the device selected by the user or default to 0, and then set it.
-    if checkCmdLineFlag("device="):
-        deviceCount = cudart.cudaGetDeviceCount()
-        idev = int(getCmdLineArgumentInt("device="))
-
-        if idev >= deviceCount or idev < 0:
-            print("Device number {} is invalid, will use default CUDA device 0.".format(idev))
-            idev = 0
-
-    if checkCmdLineFlag("use_generic_memory"):
-        bPinGenericMemory = True
-
-    if bPinGenericMemory:
-        print("> Using Generic System Paged Memory (malloc)");
-    else:
-        print("> Using CUDA Host Allocated (cudaHostAlloc)");
-
-    checkCudaErrors(cudart.cudaSetDevice(idev))
-
-    # Verify the selected device supports mapped memory and set the device flags for mapping host memory.
-    deviceProp = checkCudaErrors(cudart.cudaGetDeviceProperties(idev))
-
-    if not deviceProp.canMapHostMemory:
-        print("Device {} does not support mapping CPU host memory!".format(idev))
-        return
-
-    checkCudaErrors(cudart.cudaSetDeviceFlags(cudart.cudaDeviceMapHost))
-
-    # Allocate mapped CPU memory
-
-    nelem = 1048576
-    num_bytes = nelem*np.dtype(np.float32).itemsize
-
-    if bPinGenericMemory:
-        a = np.empty(nelem, dtype=np.float32)
-        b = np.empty(nelem, dtype=np.float32)
-        c = np.empty(nelem, dtype=np.float32)
-
-        checkCudaErrors(cudart.cudaHostRegister(a, num_bytes, cudart.cudaHostRegisterMapped))
-        checkCudaErrors(cudart.cudaHostRegister(b, num_bytes, cudart.cudaHostRegisterMapped))
-        checkCudaErrors(cudart.cudaHostRegister(c, num_bytes, cudart.cudaHostRegisterMapped))
-    else:
-        flags = cudart.cudaHostAllocMapped
-        a_ptr = checkCudaErrors(cudart.cudaHostAlloc(num_bytes, flags))
-        b_ptr = checkCudaErrors(cudart.cudaHostAlloc(num_bytes, flags))
-        c_ptr = checkCudaErrors(cudart.cudaHostAlloc(num_bytes, flags))
-
-        a = (ctypes.c_float * nelem).from_address(a_ptr)
-        b = (ctypes.c_float * nelem).from_address(b_ptr)
-        c = (ctypes.c_float * nelem).from_address(c_ptr)
-
-    # Initialize the vectors
-    for n in range(nelem):
-        a[n] = rnd.random()
-        b[n] = rnd.random()
-
-    # Get the device pointers for the pinned CPU memory mapped into the GPU memory space
-    d_a = checkCudaErrors(cudart.cudaHostGetDevicePointer(a, 0))
-    d_b = checkCudaErrors(cudart.cudaHostGetDevicePointer(b, 0))
-    d_c = checkCudaErrors(cudart.cudaHostGetDevicePointer(c, 0))
-
-    # Call the GPU kernel using the CPU pointers residing in CPU mapped memory
-    print("> vectorAddGPU kernel will add vectors using mapped CPU memory...")
-    block = cudart.dim3()
-    block.x = 256
-    block.y = 1
-    block.z = 1
-    grid = cudart.dim3()
-    grid.x = math.ceil(nelem/float(block.x))
-    grid.y = 1
-    grid.z = 1
-    kernelHelper = common.KernelHelper(simpleZeroCopy, idev)
-    _vectorAddGPU = kernelHelper.getFunction(b'vectorAddGPU')
-    kernelArgs = ((d_a, d_b, d_c, nelem),(ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int))
-    checkCudaErrors(cuda.cuLaunchKernel(_vectorAddGPU,
-                                        grid.x, grid.y, grid.z,
-                                        block.x, block.y, block.z,
-                                        0, cuda.CU_STREAM_LEGACY,
-                                        kernelArgs, 0))
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    print("> Checking the results from vectorAddGPU() ...");
-
-    # Compare the results
-    errorNorm = 0.0
-    refNorm = 0.0
-
-    for n in range(nelem):
-        ref = a[n] + b[n]
-        diff = c[n] - ref
-        errorNorm += diff*diff
-        refNorm += ref*ref
-
-    errorNorm = math.sqrt(errorNorm)
-    refNorm = math.sqrt(refNorm)
-
-    # Memory clean up
-
-    print("Releasing CPU memory...")
-
-    if bPinGenericMemory:
-        checkCudaErrors(cudart.cudaHostUnregister(a))
-        checkCudaErrors(cudart.cudaHostUnregister(b))
-        checkCudaErrors(cudart.cudaHostUnregister(c))
-    else:
-        checkCudaErrors(cudart.cudaFreeHost(a))
-        checkCudaErrors(cudart.cudaFreeHost(b))
-        checkCudaErrors(cudart.cudaFreeHost(c))
-
-    if errorNorm/refNorm >= 1.0e-7:
-        print("FAILED")
-        sys.exit(-1)
-    print("PASSED")
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/systemWideAtomics_test.py b/cuda_bindings/examples/0_Introduction/systemWideAtomics_test.py
deleted file mode 100644
index f34f3195..00000000
--- a/cuda_bindings/examples/0_Introduction/systemWideAtomics_test.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import numpy as np
-import sys
-import os
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-
-systemWideAtomics =  '''\
-#define LOOP_NUM 50
-
-extern "C"
-__global__ void atomicKernel(int *atom_arr) {
-    unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
-
-    for (int i = 0; i < LOOP_NUM; i++) {
-        // Atomic addition
-        atomicAdd_system(&atom_arr[0], 10);
-
-        // Atomic exchange
-        atomicExch_system(&atom_arr[1], tid);
-
-        // Atomic maximum
-        atomicMax_system(&atom_arr[2], tid);
-
-        // Atomic minimum
-        atomicMin_system(&atom_arr[3], tid);
-
-        // Atomic increment (modulo 17+1)
-        atomicInc_system((unsigned int *)&atom_arr[4], 17);
-
-        // Atomic decrement
-        atomicDec_system((unsigned int *)&atom_arr[5], 137);
-
-        // Atomic compare-and-swap
-        atomicCAS_system(&atom_arr[6], tid - 1, tid);
-
-        // Bitwise atomic instructions
-
-        // Atomic AND
-        atomicAnd_system(&atom_arr[7], 2 * tid + 7);
-
-        // Atomic OR
-        atomicOr_system(&atom_arr[8], 1 << tid);
-
-        // Atomic XOR
-        atomicXor_system(&atom_arr[9], tid);
-  }
-}
-'''
-
-LOOP_NUM = 50 
-
-#! Compute reference data set
-#! Each element is multiplied with the number of threads / array length
-#! @param reference  reference data, computed but preallocated
-#! @param idata      input data as provided to device
-#! @param len        number of elements in reference / idata
-def verify(testData, length):
-    val = 0
-
-    for i in range(length * LOOP_NUM):
-        val += 10
-
-    if val != testData[0]:
-       print(f"atomicAdd failed val = {val} testData = {testData[0]}")
-       return False
-
-    val = 0
-    found = False
-    for i in range(length):
-        # second element should be a member of [0, len)
-        if i == testData[1]:
-            found = True
-            break
-
-    if not found:
-        print("atomicExch failed") 
-        return False
-
-    val = -(1 << 8)
-
-    for i in range(length):
-        # third element should be len-1
-        val = max(val, i)
-
-    if val != testData[2]:
-        print("atomicMax failed")
-        return False
-
-    val = 1 << 8
-
-    for i in range(length):
-        val = min(val, i)
-
-    if val != testData[3]:
-        print("atomicMin failed")
-        return False
-
-    limit = 17
-    val = 0
-
-    for i in range(length * LOOP_NUM):
-        val = 0 if val >= limit else val + 1
-
-    if val != testData[4]:
-        print("atomicInc failed")
-        return False
-
-    limit = 137
-    val = 0
-
-    for i in range(length * LOOP_NUM):
-        val = limit if (val == 0) or (val > limit) else val - 1
-
-    if val != testData[5]:
-        print("atomicDec failed")
-        return False
-
-    found = False
-
-    for i in range(length):
-        # seventh element should be a member of [0, len)
-        if i == testData[6]:
-            found = True
-            break
-
-    if not found:
-        print("atomicCAS failed")
-        return False
-
-    val = 0xff
-
-    for i in range(length):
-        # 8th element should be 1
-        val &= (2 * i + 7)
-
-    if val != testData[7]:
-        print("atomicAnd failed")
-        return False
-
-    # 9th element should be 0xff
-    val = -1
-    if val != testData[8]:
-        print("atomicOr failed")
-        return False
-
-    val = 0xff
-
-    for i in range(length):
-        # 11th element should be 0xff
-        val ^= i;
-
-    if val != testData[9]:
-        print("atomicXor failed")
-        return False
-
-    return True
-
-def main():
-    if os.name == 'nt':
-        print("Atomics not supported on Windows")
-        return
-
-    # set device
-    dev_id = findCudaDevice()
-    device_prop = checkCudaErrors(cudart.cudaGetDeviceProperties(dev_id))
-
-    if not device_prop.managedMemory:
-        # This samples requires being run on a device that supports Unified Memory
-        print("Unified Memory not supported on this device")
-        return
-
-    if device_prop.computeMode == cudart.cudaComputeMode.cudaComputeModeProhibited:
-        # This sample requires being run with a default or process exclusive mode
-        print("This sample requires a device in either default or process exclusive mode")
-        return
-    
-    if device_prop.major < 6:
-        print("Requires a minimum CUDA compute 6.0 capability, waiving testing.")
-        return
-
-    numThreads = 256
-    numBlocks = 64
-    numData = 10
-
-    if device_prop.pageableMemoryAccess:
-        print("CAN access pageable memory")
-        atom_arr_h = (ctypes.c_int * numData)(0)
-        atom_arr = ctypes.addressof(atom_arr_h)
-    else:
-        print("CANNOT access pageable memory")
-        atom_arr = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.int32).itemsize * numData, cudart.cudaMemAttachGlobal))
-        atom_arr_h = (ctypes.c_int * numData).from_address(atom_arr)
-
-    for i in range(numData):
-        atom_arr_h[i] = 0
-
-    # To make the AND and XOR tests generate something other than 0...
-    atom_arr_h[7] = atom_arr_h[9] = 0xff
-
-    kernelHelper = common.KernelHelper(systemWideAtomics, dev_id)
-    _atomicKernel = kernelHelper.getFunction(b'atomicKernel')
-    kernelArgs = ((atom_arr,),
-                  (ctypes.c_void_p,))
-    checkCudaErrors(cuda.cuLaunchKernel(_atomicKernel,
-                                        numBlocks, 1, 1,                         # grid dim
-                                        numThreads, 1, 1,                        # block dim
-                                        0, cuda.CU_STREAM_LEGACY,                # shared mem and stream
-                                        kernelArgs, 0))                          # arguments
-    # NOTE: Python doesn't have an equivalent system atomic operations
-    # atomicKernel_CPU(atom_arr_h, numBlocks * numThreads)
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    # Compute & verify reference solution
-    testResult = verify(atom_arr_h, numThreads * numBlocks)
-
-    if device_prop.pageableMemoryAccess:
-        pass
-    else:
-        checkCudaErrors(cudart.cudaFree(atom_arr))
-
-    print("systemWideAtomics completed, returned {}".format("OK" if testResult else "ERROR!"))
-    if not testResult:
-        sys.exit(-1)
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/vectorAddDrv_test.py b/cuda_bindings/examples/0_Introduction/vectorAddDrv_test.py
deleted file mode 100644
index 32934040..00000000
--- a/cuda_bindings/examples/0_Introduction/vectorAddDrv_test.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-from cuda import cuda
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDeviceDRV
-
-vectorAddDrv = '''\
-/* Vector addition: C = A + B.
- *
- * This sample is a very basic sample that implements element by element
- * vector addition. It is the same as the sample illustrating Chapter 3
- * of the programming guide with some additions like error checking.
- *
- */
-
-// Device code
-extern "C" __global__ void VecAdd_kernel(const float *A, const float *B, float *C, int N)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-
-    if (i < N)
-        C[i] = A[i] + B[i];
-}
-'''
-
-def main():
-    print("Vector Addition (Driver API)")
-    N = 50000
-    devID = 0
-    size = N * np.dtype(np.float32).itemsize
-
-    # Initialize
-    checkCudaErrors(cuda.cuInit(0));
-
-    cuDevice = findCudaDeviceDRV()
-    # Create context
-    cuContext = checkCudaErrors(cuda.cuCtxCreate(0, cuDevice))
-
-    uvaSupported = checkCudaErrors(cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice))
-    if not uvaSupported:
-        print("Accessing pageable memory directly requires UVA")
-        return
-
-    kernelHelper = common.KernelHelper(vectorAddDrv, int(cuDevice))
-    _VecAdd_kernel = kernelHelper.getFunction(b'VecAdd_kernel')
-
-    # Allocate input vectors h_A and h_B in host memory
-    h_A = np.random.rand(size).astype(dtype=np.float32)
-    h_B = np.random.rand(size).astype(dtype=np.float32)
-    h_C = np.random.rand(size).astype(dtype=np.float32)
-
-    # Allocate vectors in device memory
-    d_A = checkCudaErrors(cuda.cuMemAlloc(size))
-    d_B = checkCudaErrors(cuda.cuMemAlloc(size))
-    d_C = checkCudaErrors(cuda.cuMemAlloc(size))
-
-    # Copy vectors from host memory to device memory
-    checkCudaErrors(cuda.cuMemcpyHtoD(d_A, h_A, size))
-    checkCudaErrors(cuda.cuMemcpyHtoD(d_B, h_B, size))
-
-    if True:
-        # Grid/Block configuration
-        threadsPerBlock = 256
-        blocksPerGrid   = (N + threadsPerBlock - 1) / threadsPerBlock
-
-        kernelArgs = ((d_A, d_B, d_C, N),
-                      (None, None, None, ctypes.c_int))
-
-        # Launch the CUDA kernel
-        checkCudaErrors(cuda.cuLaunchKernel(_VecAdd_kernel,
-                                            blocksPerGrid, 1, 1,
-                                            threadsPerBlock, 1, 1,
-                                            0, 0,
-                                            kernelArgs, 0))
-    else:
-        pass
-
-    # Copy result from device memory to host memory
-    # h_C contains the result in host memory
-    checkCudaErrors(cuda.cuMemcpyDtoH(h_C, d_C, size))
-
-    for i in range(N):
-        sum_all = h_A[i] + h_B[i]
-        if math.fabs(h_C[i] - sum_all) > 1e-7:
-            break
-
-    # Free device memory
-    checkCudaErrors(cuda.cuMemFree(d_A))
-    checkCudaErrors(cuda.cuMemFree(d_B))
-    checkCudaErrors(cuda.cuMemFree(d_C))
-
-    checkCudaErrors(cuda.cuCtxDestroy(cuContext))
-    print("{}".format("Result = PASS" if i+1 == N else "Result = FAIL"))
-    if i+1 != N:
-        sys.exit(-1)
-
-if __name__ == "__main__":
-    main()
diff --git a/cuda_bindings/examples/0_Introduction/vectorAddMMAP_test.py b/cuda_bindings/examples/0_Introduction/vectorAddMMAP_test.py
deleted file mode 100644
index 8af4a833..00000000
--- a/cuda_bindings/examples/0_Introduction/vectorAddMMAP_test.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import sys
-from cuda import cuda
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDeviceDRV
-
-vectorAddMMAP = '''\
-/* Vector addition: C = A + B.
- *
- * This sample is a very basic sample that implements element by element
- * vector addition. It is the same as the sample illustrating Chapter 3
- * of the programming guide with some additions like error checking.
- *
- */
-
-// Device code
-extern "C" __global__ void VecAdd_kernel(const float *A, const float *B, float *C, int N)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-
-    if (i < N)
-        C[i] = A[i] + B[i];
-}
-'''
-
-def round_up(x, y):
-    return int((x - 1)/y + 1) * y
-
-def getBackingDevices(cuDevice):
-    num_devices = checkCudaErrors(cuda.cuDeviceGetCount())
-
-    backingDevices = [cuDevice]
-    for dev in range(num_devices):
-        # The mapping device is already in the backingDevices vector
-        if int(dev) == int(cuDevice):
-            continue
-
-        # Only peer capable devices can map each others memory
-        capable = checkCudaErrors(cuda.cuDeviceCanAccessPeer(cuDevice, dev))
-        if not capable:
-            continue
-
-        # The device needs to support virtual address management for the required apis to work
-        attributeVal = checkCudaErrors(cuda.cuDeviceGetAttribute(
-                                            cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
-                                            cuDevice))
-        if attributeVal == 0:
-            continue
-
-        backingDevices.append(cuda.CUdevice(dev))
-    return backingDevices
-
-def simpleMallocMultiDeviceMmap(size, residentDevices, mappingDevices, align = 0):
-    min_granularity = 0
-
-    # Setup the properties common for all the chunks
-    # The allocations will be device pinned memory.
-    # This property structure describes the physical location where the memory will be allocated via cuMemCreate allong with additional properties
-    # In this case, the allocation will be pinnded device memory local to a given device.
-    prop = cuda.CUmemAllocationProp()
-    prop.type = cuda.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
-    prop.location.type = cuda.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-
-    # Get the minimum granularity needed for the resident devices
-    # (the max of the minimum granularity of each participating device)
-    for device in residentDevices:
-        prop.location.id = device
-        status, granularity = cuda.cuMemGetAllocationGranularity(prop, cuda.CUmemAllocationGranularity_flags.CU_MEM_ALLOC_GRANULARITY_MINIMUM)
-        if status != cuda.CUresult.CUDA_SUCCESS:
-            return status, None, None
-        if min_granularity < granularity:
-            min_granularity = granularity
-
-    # Get the minimum granularity needed for the accessing devices
-    # (the max of the minimum granularity of each participating device)
-    for device in mappingDevices:
-        prop.location.id = device
-        status, granularity = cuda.cuMemGetAllocationGranularity(prop, cuda.CUmemAllocationGranularity_flags.CU_MEM_ALLOC_GRANULARITY_MINIMUM)
-        if status != cuda.CUresult.CUDA_SUCCESS:
-            return status, None, None
-        if min_granularity < granularity:
-            min_granularity = granularity
-
-    # Round up the size such that we can evenly split it into a stripe size tha meets the granularity requirements
-    # Essentially size = N * residentDevices.size() * min_granularity is the requirement,
-    # since each piece of the allocation will be stripeSize = N * min_granularity
-    # and the min_granularity requirement applies to each stripeSize piece of the allocation.
-    size = round_up(size, len(residentDevices) * min_granularity)
-    stripeSize = size / len(residentDevices)
-
-    # Return the rounded up size to the caller for use in the free
-    allocationSize = size
-
-    # Reserve the required contiguous VA space for the allocations
-    status, dptr = cuda.cuMemAddressReserve(size, align, cuda.CUdeviceptr(0), 0)
-    if status != cuda.CUresult.CUDA_SUCCESS:
-        simpleFreeMultiDeviceMmap(dptr, size)
-        return status, None, None
-
-    # Create and map the backings on each gpu
-    # note: reusing CUmemAllocationProp prop from earlier with prop.type & prop.location.type already specified.
-    for idx in range(len(residentDevices)):
-        # Set the location for this chunk to this device
-        prop.location.id = residentDevices[idx]
-
-        # Create the allocation as a pinned allocation on this device
-        status, allocationHandle = cuda.cuMemCreate(stripeSize, prop, 0)
-        if status != cuda.CUresult.CUDA_SUCCESS:
-            simpleFreeMultiDeviceMmap(dptr, size)
-            return status, None, None
-
-        # Assign the chunk to the appropriate VA range and release the handle.
-        # After mapping the memory, it can be referenced by virtual address.
-        # Since we do not need to make any other mappings of this memory or export it,
-        # we no longer need and can release the allocationHandle.
-        # The allocation will be kept live until it is unmapped.
-        status, = cuda.cuMemMap(int(dptr) + (stripeSize * idx), stripeSize, 0, allocationHandle, 0)
-        
-        # the handle needs to be released even if the mapping failed.
-        status2, = cuda.cuMemRelease(allocationHandle)
-        if status != cuda.CUresult.CUDA_SUCCESS:
-            # cuMemRelease should not have failed here
-            # as the handle was just allocated successfully
-            # however return an error if it does.
-            status = status2
-
-        # Cleanup in case of any mapping failures.
-        if status != cuda.CUresult.CUDA_SUCCESS:
-            simpleFreeMultiDeviceMmap(dptr, size)
-            return status, None, None
-
-    # Each accessDescriptor will describe the mapping requirement for a single device
-    accessDescriptors = [cuda.CUmemAccessDesc()] * len(mappingDevices)
-
-    # Prepare the access descriptor array indicating where and how the backings should be visible.
-    for idx in range(len(mappingDevices)):
-        # Specify which device we are adding mappings for.
-        accessDescriptors[idx].location.type = cuda.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-        accessDescriptors[idx].location.id = mappingDevices[idx]
-
-        # Specify both read and write access.
-        accessDescriptors[idx].flags = cuda.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
-
-    # Apply the access descriptors to the whole VA range.
-    status, = cuda.cuMemSetAccess(dptr, size, accessDescriptors, len(accessDescriptors))
-    if status != cuda.CUresult.CUDA_SUCCESS:
-        simpleFreeMultiDeviceMmap(dptr, size)
-        return status, None, None
-
-    return (status, dptr, allocationSize)
-
-def simpleFreeMultiDeviceMmap(dptr, size):
-    # Unmap the mapped virtual memory region
-    # Since the handles to the mapped backing stores have already been released
-    # by cuMemRelease, and these are the only/last mappings referencing them,
-    # The backing stores will be freed.
-    # Since the memory has been unmapped after this call, accessing the specified
-    # va range will result in a fault (unitll it is remapped).
-    status = cuda.cuMemUnmap(dptr, size);
-    if status[0] != cuda.CUresult.CUDA_SUCCESS:
-        return status
-
-    # Free the virtual address region.  This allows the virtual address region
-    # to be reused by future cuMemAddressReserve calls.  This also allows the
-    # virtual address region to be used by other allocation made through
-    # opperating system calls like malloc & mmap.
-    status = cuda.cuMemAddressFree(dptr, size)
-    if status[0] != cuda.CUresult.CUDA_SUCCESS:
-        return status
-    return status
-
-def main():
-    print("Vector Addition (Driver API)")
-    N = 50000
-    size = N * np.dtype(np.float32).itemsize
-
-    # Initialize
-    checkCudaErrors(cuda.cuInit(0))
-
-    cuDevice = findCudaDeviceDRV()
-
-    # Check that the selected device supports virtual address management
-    attributeVal = checkCudaErrors(cuda.cuDeviceGetAttribute(
-                        cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED,
-                        cuDevice))
-    print("Device {} VIRTUAL ADDRESS MANAGEMENT SUPPORTED = {}.".format(cuDevice, attributeVal))
-    if not attributeVal:
-        print("Device {} doesn't support VIRTUAL ADDRESS MANAGEMENT.".format(cuDevice))
-        return
-
-    # The vector addition happens on cuDevice, so the allocations need to be mapped there.
-    mappingDevices = [cuDevice]
-
-    # Collect devices accessible by the mapping device (cuDevice) into the backingDevices vector.
-    backingDevices = getBackingDevices(cuDevice)
-
-    # Create context
-    cuContext = checkCudaErrors(cuda.cuCtxCreate(0, cuDevice))
-
-    kernelHelper = common.KernelHelper(vectorAddMMAP, int(cuDevice))
-    _VecAdd_kernel = kernelHelper.getFunction(b'VecAdd_kernel')
-
-    # Allocate input vectors h_A and h_B in host memory
-    h_A = np.random.rand(size).astype(dtype=np.float32)
-    h_B = np.random.rand(size).astype(dtype=np.float32)
-    h_C = np.random.rand(size).astype(dtype=np.float32)
-
-    # Allocate vectors in device memory
-    # note that a call to cuCtxEnablePeerAccess is not needed even though
-    # the backing devices and mapping device are not the same.
-    # This is because the cuMemSetAccess call explicitly specifies
-    # the cross device mapping.
-    # cuMemSetAccess is still subject to the constraints of cuDeviceCanAccessPeer
-    # for cross device mappings (hence why we checked cuDeviceCanAccessPeer earlier).
-    d_A, allocationSize = checkCudaErrors(simpleMallocMultiDeviceMmap(size, backingDevices, mappingDevices))
-    d_B, _ = checkCudaErrors(simpleMallocMultiDeviceMmap(size, backingDevices, mappingDevices))
-    d_C, _ = checkCudaErrors(simpleMallocMultiDeviceMmap(size, backingDevices, mappingDevices))
-
-    # Copy vectors from host memory to device memory
-    checkCudaErrors(cuda.cuMemcpyHtoD(d_A, h_A, size))
-    checkCudaErrors(cuda.cuMemcpyHtoD(d_B, h_B, size))
-
-    # Grid/Block configuration
-    threadsPerBlock = 256
-    blocksPerGrid   = (N + threadsPerBlock - 1) / threadsPerBlock
-
-    kernelArgs = ((d_A, d_B, d_C, N),
-                  (None, None, None, ctypes.c_int))
-
-    # Launch the CUDA kernel
-    checkCudaErrors(cuda.cuLaunchKernel(_VecAdd_kernel,
-                                        blocksPerGrid, 1, 1,
-                                        threadsPerBlock, 1, 1,
-                                        0, 0,
-                                        kernelArgs, 0))
-
-    # Copy result from device memory to host memory
-    # h_C contains the result in host memory
-    checkCudaErrors(cuda.cuMemcpyDtoH(h_C, d_C, size))
-
-    # Verify result
-    for i in range(N):
-        sum_all = h_A[i] + h_B[i]
-        if math.fabs(h_C[i] - sum_all) > 1e-7:
-            break
-
-    checkCudaErrors(simpleFreeMultiDeviceMmap(d_A, allocationSize))
-    checkCudaErrors(simpleFreeMultiDeviceMmap(d_B, allocationSize))
-    checkCudaErrors(simpleFreeMultiDeviceMmap(d_C, allocationSize))
-
-    checkCudaErrors(cuda.cuCtxDestroy(cuContext))
-
-    print("{}".format("Result = PASS" if i+1 == N else "Result = FAIL"))
-    if i+1 != N:
-        sys.exit(-1)
-
-if __name__ == "__main__":
-    main()
diff --git a/cuda_bindings/examples/2_Concepts_and_Techniques/streamOrderedAllocation_test.py b/cuda_bindings/examples/2_Concepts_and_Techniques/streamOrderedAllocation_test.py
deleted file mode 100644
index 84af8717..00000000
--- a/cuda_bindings/examples/2_Concepts_and_Techniques/streamOrderedAllocation_test.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import random as rnd
-import sys
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-from common.helper_string import checkCmdLineFlag
-
-streamOrderedAllocation = '''\
-/* Add two vectors on the GPU */
-extern "C"
-__global__ void vectorAddGPU(const float *a, const float *b, float *c, int N)
-{
-    int idx = blockIdx.x*blockDim.x + threadIdx.x;
-
-    if (idx < N) {
-        c[idx] =  a[idx] + b[idx];
-    }
-}
-'''
-
-MAX_ITER = 20
-
-def basicStreamOrderedAllocation(dev, nelem, a, b, c):
-    num_bytes = nelem*np.dtype(np.float32).itemsize
-
-    print("Starting basicStreamOrderedAllocation()")
-    checkCudaErrors(cudart.cudaSetDevice(dev))
-    stream = checkCudaErrors(cudart.cudaStreamCreateWithFlags(cudart.cudaStreamNonBlocking))
-
-    d_a = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-    d_b = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-    d_c = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-    checkCudaErrors(cudart.cudaMemcpyAsync(d_a, a, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-    checkCudaErrors(cudart.cudaMemcpyAsync(d_b, b, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-
-    block = cudart.dim3()
-    block.x = 256
-    block.y = 1
-    block.z = 1
-    grid = cudart.dim3()
-    grid.x = math.ceil(nelem/float(block.x))
-    grid.y = 1
-    grid.z = 1
-
-    kernelArgs = ((d_a, d_b, d_c, nelem),
-                  (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int))
-    checkCudaErrors(cuda.cuLaunchKernel(_vectorAddGPU,
-                                        grid.x, grid.y, grid.z,    # grid dim
-                                        block.x, block.y, block.z, # block dim
-                                        0, stream,                 # shared mem and stream
-                                        kernelArgs, 0))            # arguments
-
-    checkCudaErrors(cudart.cudaFreeAsync(d_a, stream))
-    checkCudaErrors(cudart.cudaFreeAsync(d_b, stream))
-    checkCudaErrors(cudart.cudaMemcpyAsync(c, d_c, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream))
-    checkCudaErrors(cudart.cudaFreeAsync(d_c, stream))
-    checkCudaErrors(cudart.cudaStreamSynchronize(stream))
-
-    # Compare the results
-    print("> Checking the results from vectorAddGPU() ...");
-    errorNorm = 0.0
-    refNorm = 0.0
-
-    for n in range(nelem):
-        ref = a[n] + b[n]
-        diff = c[n] - ref
-        errorNorm += diff*diff
-        refNorm += ref*ref
-
-    errorNorm = math.sqrt(errorNorm)
-    refNorm = math.sqrt(refNorm)
-
-    if errorNorm/refNorm < 1.e-6:
-        print("basicStreamOrderedAllocation PASSED")
-    
-    checkCudaErrors(cudart.cudaStreamDestroy(stream))
-
-    return errorNorm/refNorm < 1.e-6
-
-# streamOrderedAllocationPostSync(): demonstrates If the application wants the memory to persist in the pool beyond
-# synchronization, then it sets the release threshold on the pool. This way, when the application reaches the "steady state",
-# it is no longer allocating/freeing memory from the OS.
-def streamOrderedAllocationPostSync(dev, nelem, a, b, c) :
-    num_bytes = nelem*np.dtype(np.float32).itemsize
-
-    print("Starting streamOrderedAllocationPostSync()")
-    checkCudaErrors(cudart.cudaSetDevice(dev))
-    stream = checkCudaErrors(cudart.cudaStreamCreateWithFlags(cudart.cudaStreamNonBlocking))
-    start = checkCudaErrors(cudart.cudaEventCreate())
-    end = checkCudaErrors(cudart.cudaEventCreate())
-
-    memPool = checkCudaErrors(cudart.cudaDeviceGetDefaultMemPool(dev))
-    thresholdVal = cuda.cuuint64_t(ctypes.c_uint64(-1).value)
-    # Set high release threshold on the default pool so that cudaFreeAsync will not actually release memory to the system.
-    # By default, the release threshold for a memory pool is set to zero. This implies that the CUDA driver is 
-    # allowed to release a memory chunk back to the system as long as it does not contain any active suballocations.
-    checkCudaErrors(cudart.cudaMemPoolSetAttribute(memPool, cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold, thresholdVal));
-
-    # Record teh start event
-    checkCudaErrors(cudart.cudaEventRecord(start, stream))
-    for i in range(MAX_ITER):
-        d_a = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-        d_b = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-        d_c = checkCudaErrors(cudart.cudaMallocAsync(num_bytes, stream))
-        checkCudaErrors(cudart.cudaMemcpyAsync(d_a, a, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-        checkCudaErrors(cudart.cudaMemcpyAsync(d_b, b, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-
-        block = cudart.dim3()
-        block.x = 256
-        block.y = 1
-        block.z = 1
-        grid = cudart.dim3()
-        grid.x = math.ceil(nelem/float(block.x))
-        grid.y = 1
-        grid.z = 1
-
-        kernelArgs = ((d_a, d_b, d_c, nelem),
-                      (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int))
-        checkCudaErrors(cuda.cuLaunchKernel(_vectorAddGPU,
-                                            grid.x, grid.y, grid.z,    # grid dim
-                                            block.x, block.y, block.z, # block dim
-                                            0, stream,                 # shared mem and stream
-                                            kernelArgs, 0))            # arguments
-
-        checkCudaErrors(cudart.cudaFreeAsync(d_a, stream))
-        checkCudaErrors(cudart.cudaFreeAsync(d_b, stream))
-        checkCudaErrors(cudart.cudaMemcpyAsync(c, d_c, num_bytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream))
-        checkCudaErrors(cudart.cudaFreeAsync(d_c, stream))
-        checkCudaErrors(cudart.cudaStreamSynchronize(stream))
-    checkCudaErrors(cudart.cudaEventRecord(end, stream))
-    # Wait for the end event to complete
-    checkCudaErrors(cudart.cudaEventSynchronize(end))
-
-    msecTotal = checkCudaErrors(cudart.cudaEventElapsedTime(start, end))
-    print("Total elapsed time = {} ms over {} iterations".format(msecTotal, MAX_ITER))
-
-    # Compare the results
-    print("> Checking the results from vectorAddGPU() ...")
-    errorNorm = 0.0
-    refNorm = 0.0
-
-    for n in range(nelem):
-        ref = a[n] + b[n]
-        diff = c[n] - ref
-        errorNorm += diff*diff
-        refNorm += ref*ref
-
-    errorNorm = math.sqrt(errorNorm)
-    refNorm = math.sqrt(refNorm)
-
-    if errorNorm/refNorm < 1.e-6:
-        print("streamOrderedAllocationPostSync PASSED")
-
-    checkCudaErrors(cudart.cudaStreamDestroy(stream))
-
-    return errorNorm/refNorm < 1.e-6
-
-def main():
-    cuda.cuInit(0)
-    if checkCmdLineFlag("help"):
-        print("Usage:  streamOrderedAllocation [OPTION]\n");
-        print("Options:");
-        print("  device=[device #]  Specify the device to be used");
-        return
-
-    dev = findCudaDevice()
-
-    version = checkCudaErrors(cudart.cudaDriverGetVersion())
-    if version < 11030:
-        isMemPoolSupported = False
-    else:
-        isMemPoolSupported = checkCudaErrors(cudart.cudaDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED, dev))
-    if not isMemPoolSupported:
-        print("Waiving execution as device does not support Memory Pools")
-        return
-
-    global _vectorAddGPU
-    kernelHelper = common.KernelHelper(streamOrderedAllocation, dev)
-    _vectorAddGPU = kernelHelper.getFunction(b'vectorAddGPU')
-
-    # Allocate CPU memory
-    nelem = 1048576
-    num_bytes = nelem*np.dtype(np.float32).itemsize
-
-    a = np.zeros(nelem, dtype='float32')
-    b = np.zeros(nelem, dtype='float32')
-    c = np.zeros(nelem, dtype='float32')
-    # Initialize the vectors
-    for i in range(nelem):
-        a[i] = rnd.random()
-        b[i] = rnd.random()
-
-    ret1 = basicStreamOrderedAllocation(dev, nelem, a, b, c)
-    ret2 = streamOrderedAllocationPostSync(dev, nelem, a, b, c)
-
-    if not ret1 or not ret2:
-        sys.exit(-1)
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/3_CUDA_Features/globalToShmemAsyncCopy_test.py b/cuda_bindings/examples/3_CUDA_Features/globalToShmemAsyncCopy_test.py
deleted file mode 100644
index bb6b5cb0..00000000
--- a/cuda_bindings/examples/3_CUDA_Features/globalToShmemAsyncCopy_test.py
+++ /dev/null
@@ -1,1075 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import sys
-import pytest
-from cuda import cuda, cudart
-from enum import Enum
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-from common.helper_string import checkCmdLineFlag, getCmdLineArgumentInt
-
-blockSize = 16
-class kernels(Enum):
-    AsyncCopyMultiStageLargeChunk  = 0
-    AsyncCopyLargeChunk            = 1
-    AsyncCopyLargeChunkAWBarrier   = 2
-    AsyncCopyMultiStageSharedState = 3
-    AsyncCopyMultiStage            = 4
-    AsyncCopySingleStage           = 5
-    Naive                          = 6
-    NaiveLargeChunk                = 7
-
-kernelNames = ["AsyncCopyMultiStageLargeChunk", "AsyncCopyLargeChunk",
-               "AsyncCopyLargeChunkAWBarrier", "AsyncCopyMultiStageSharedState",
-               "AsyncCopyMultiStage", "AsyncCopySingleStage", "Naive", "NaiveLargeChunk"]
-     
-globalToShmemAsyncCopy = '''\
-#line __LINE__
-#if __CUDA_ARCH__ >= 700
-#include <cuda/barrier>
-#endif
-#include <cooperative_groups.h>
-#include <cooperative_groups/reduce.h>
-#include <cuda/pipeline>
-namespace cg = cooperative_groups;
-
-#define BLOCK_SIZE 16 
-#define BLOCK_SIZE_X 16
-
-// Multi Stage memcpy_async pipeline with large chunk copy
-extern "C"
-__global__ void MatrixMulAsyncCopyMultiStageLargeChunk(float* __restrict__ C, 
-                                                       const float* __restrict__ A,
-                                                       const float* __restrict__ B, int wA,
-                                                       int wB) {
-    // Requires BLOCK_SIZE % 4 == 0 
-
-    // Multi-stage pipeline version
-    constexpr size_t maxPipelineStages = 4;
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A for each stage
-    __shared__ alignas(alignof(float4)) float As[maxPipelineStages][BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B for each stage
-    __shared__ alignas(alignof(float4)) float Bs[maxPipelineStages][BLOCK_SIZE][BLOCK_SIZE];
-
-    float Csub = 0.0;
-
-    // Index of the first sub-matrix of A processed by the block
-    const int aBegin = wA * (BLOCK_SIZE) * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    const int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    const int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    const int t4x = threadIdx.x * 4;
-    const auto shape4 = cuda::aligned_size_t<alignof(float4)>(sizeof(float4));
-
-    cuda::pipeline<cuda::thread_scope_thread> pipe = cuda::make_pipeline();
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin, i = 0, aStage = aBegin, bStage = bBegin, iStage = 0; a <= aEnd; a += aStep, b += bStep, ++i ) {
-        // Load the matrices from device memory to shared memory; each thread loads
-        // one element of each matrix
-        for ( ; aStage <= a + aStep * maxPipelineStages ; aStage += aStep, bStage += bStep, ++iStage )
-        {
-            pipe.producer_acquire();
-            if ( aStage <= aEnd && t4x < BLOCK_SIZE )
-            {
-                // Rotating buffer
-                const int j = iStage % maxPipelineStages;
-                cuda::memcpy_async(&As[j][threadIdx.y][t4x], &A[aStage + wA * threadIdx.y + t4x], shape4, pipe);
-                cuda::memcpy_async(&Bs[j][threadIdx.y][t4x], &B[aStage + wA * threadIdx.y + t4x], shape4, pipe);
-            }
-            pipe.producer_commit();
-        }
-
-        pipe.consumer_wait();
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        // Rotating buffer
-        const int j = i % maxPipelineStages;
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-        #pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[j][threadIdx.y][k] * Bs[j][k][threadIdx.x];
-        }
-        pipe.consumer_release();
-
-        // Don't have to synchronize because maxPipelineStages is greater than one
-        // therefore next iteration is loading to a different buffer.
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;    
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-
-// Single Stage memcpy_async pipeline with Large copy chunk (float4)
-extern "C"
-__global__ void MatrixMulAsyncCopyLargeChunk(float* __restrict__ C, 
-                                                        const float* __restrict__ A,
-                                                        const float* __restrict__ B, int wA,
-                                                        int wB) {
-    // Requires BLOCK_SIZE % 4 == 0 
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A
-    __shared__ alignas(alignof(float4)) float As[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B
-    __shared__ alignas(alignof(float4)) float Bs[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Index of the first sub-matrix of A processed by the block
-    int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    // Single-stage pipeline version
-    float Csub = 0.0;
-
-    const int t4x = threadIdx.x * 4;
-    const auto shape4 = cuda::aligned_size_t<alignof(float4)>(sizeof(float4));
-    cuda::pipeline<cuda::thread_scope_thread> pipe = cuda::make_pipeline();
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin; a <= aEnd; a += aStep, b += bStep) {
-        // Load the matrices from device memory to shared memory; 
-        // a subset of threads loads a contiguous chunk of elements.
-
-        // Previously, per-thread:
-        // As[ty][tx] = A[a + wA * ty + tx];
-        // Bs[ty][tx] = B[b + wB * ty + tx];
-
-        // Now, one fourth of the threads load four elements of each matrix
-        if ( t4x < BLOCK_SIZE ) {
-
-            pipe.producer_acquire();
-
-            cuda::memcpy_async(&As[threadIdx.y][t4x], &A[a + wA * threadIdx.y + t4x], shape4, pipe);
-            cuda::memcpy_async(&Bs[threadIdx.y][t4x], &B[a + wA * threadIdx.y + t4x], shape4, pipe);
-
-            pipe.producer_commit();
-            pipe.consumer_wait();
-        }
-
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-#pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[threadIdx.y][k] * Bs[k][threadIdx.x];
-        }
-
-        pipe.consumer_release();
-
-        // Synchronize to make sure that the preceding
-        // computation is done before overwriting the
-        // shared memory sub-matrix buffers As and Bs in the next iteration.
-        __syncthreads();
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-
-// Single Stage memcpy_async pipeline with Large copy chunk (float4) using arrive-wait barrier
-extern "C"
-__global__ void MatrixMulAsyncCopyLargeChunkAWBarrier(float* __restrict__ C, 
-                                                      const float* __restrict__ A,
-                                                      const float* __restrict__ B, int wA,
-                                                      int wB) {
-#if __CUDA_ARCH__ >= 700
-#pragma diag_suppress static_var_with_dynamic_init
-    // Requires BLOCK_SIZE % 4 == 0 
-
-    __shared__ cuda::barrier<cuda::thread_scope_block> bar;
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A
-    __shared__  alignas(alignof(float4)) float As[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B
-    __shared__ alignas(alignof(float4)) float Bs[BLOCK_SIZE][BLOCK_SIZE];
-
-    if (threadIdx.x == 0) {
-        init(&bar, blockDim.x*blockDim.y);
-    }
-    __syncthreads();
-
-    // Index of the first sub-matrix of A processed by the block
-    int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    float Csub = 0.0;
-
-    const int t4x = threadIdx.x * 4;
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin; a <= aEnd; a += aStep, b += bStep) {
-        // Load the matrices from device memory to shared memory; 
-        // a subset of threads loads a contiguous chunk of elements.
-
-        // Now, one fourth of the threads load four elements of each matrix
-        if ( t4x < BLOCK_SIZE ) {
-            float4 * const A4s = reinterpret_cast<float4*>(& As[threadIdx.y][t4x]);
-            float4 * const B4s = reinterpret_cast<float4*>(& Bs[threadIdx.y][t4x]);
-            const float4 * const A4  = reinterpret_cast<const float4*>(& A[a + wA * threadIdx.y + t4x]);
-            const float4 * const B4  = reinterpret_cast<const float4*>(& B[a + wA * threadIdx.y + t4x]);
-
-            cuda::memcpy_async(A4s, A4, sizeof(float4), bar);
-            cuda::memcpy_async(B4s, B4, sizeof(float4), bar);
-         }
-
-        // Synchronize to make sure the matrices are loaded
-        bar.arrive_and_wait();
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-#pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[threadIdx.y][k] * Bs[k][threadIdx.x];
-        }
-
-        // Synchronize to make sure that the preceding
-        // computation is done before overwriting the
-        // shared memory sub-matrix buffers As and Bs in the next iteration.
-        bar.arrive_and_wait();
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-#endif
-}
-
-// Single Stage memcpy_async pipeline with float copy
-extern "C"
- __global__ void MatrixMulAsyncCopySingleStage(float *C, const float *A,
-                                                        const float *B, int wA,
-                                                        int wB) {
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A
-    __shared__ float As[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B
-    __shared__ float Bs[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Index of the first sub-matrix of A processed by the block
-    int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    // Single-stage pipeline version
-    float Csub = 0.0;
-
-    cuda::pipeline<cuda::thread_scope_thread> pipe = cuda::make_pipeline();
-    const auto shape1 = cuda::aligned_size_t<alignof(float)>(sizeof(float));
-
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin; a <= aEnd; a += aStep, b += bStep) {
-        // Load the matrices from device memory to shared memory; each thread loads
-        // one element of each matrix
-        {
-            pipe.producer_acquire();
-
-            cuda::memcpy_async(&As[threadIdx.y][threadIdx.x], &A[a + wA * threadIdx.y + threadIdx.x], shape1, pipe);
-            cuda::memcpy_async(&Bs[threadIdx.y][threadIdx.x], &B[b + wB * threadIdx.y + threadIdx.x], shape1, pipe);
-
-            pipe.producer_commit();
-        }
-
-        pipe.consumer_wait();
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-#pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[threadIdx.y][k] * Bs[k][threadIdx.x];
-        }
-
-        // Synchronize to make sure that the preceding
-        // computation is done before overwriting the
-        // shared memory sub-matrix buffers As and Bs in the next iteration.
-        __syncthreads();
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-
-// Multi Stage memcpy_async thread_scope_thread pipeline with single-element async-copy
-extern "C"
-__global__ void MatrixMulAsyncCopyMultiStage(float* __restrict__ C, 
-                                                        const float* __restrict__ A,
-                                                        const float* __restrict__ B, int wA,
-                                                        int wB) {
-    // Multi-stage pipeline version
-    constexpr size_t maxPipelineStages = 4;
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A for each stage
-    __shared__ float As[maxPipelineStages][BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B for each stage
-    __shared__ float Bs[maxPipelineStages][BLOCK_SIZE][BLOCK_SIZE];
-
-    float Csub = 0.0;
-
-    // Index of the first sub-matrix of A processed by the block
-    const int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    const int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    const int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    cuda::pipeline<cuda::thread_scope_thread> pipe = cuda::make_pipeline();
-    const auto shape1 = cuda::aligned_size_t<alignof(float)>(sizeof(float));
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin, i = 0, aStage = aBegin, bStage = bBegin, iStage = 0; a <= aEnd; a += aStep, b += bStep, ++i ) {
-        // Load the matrices from device memory to shared memory; each thread loads
-        // one element of each matrix
-
-        for ( ; aStage <= a + aStep * maxPipelineStages ; aStage += aStep, bStage += bStep, ++iStage )
-        {
-            if ( aStage <= aEnd )
-            {
-                // Rotating buffer
-                const int j = iStage % maxPipelineStages;
-
-                pipe.producer_acquire();
-
-                cuda::memcpy_async(&As[j][threadIdx.y][threadIdx.x], &A[aStage + wA * threadIdx.y + threadIdx.x], shape1, pipe);
-                cuda::memcpy_async(&Bs[j][threadIdx.y][threadIdx.x], &B[bStage + wB * threadIdx.y + threadIdx.x], shape1, pipe);
-
-                pipe.producer_commit();
-            }
-        }
-        pipe.consumer_wait();
-
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        const int j = i % maxPipelineStages;
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[j][threadIdx.y][k] * Bs[j][k][threadIdx.x];
-        }
-
-        pipe.consumer_release();
-        // Don't have to synchronize because maxPipelineStages is greater than one
-        // therefore next iteration is loading to a different buffer.
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-
-// Multi Stage shared state memcpy_async pipeline thread_scope_block
-// with parititioned producer & consumer, here we've 1 warp as producer
-// group which issues memcpy_async operations and rest all warps are part of
-// consumer group which perform gemm computation on the loaded matrices by producer.
-extern "C"
-__global__ void MatrixMulAsyncCopyMultiStageSharedState(float* __restrict__ C, 
-                                                        const float* __restrict__ A,
-                                                        const float* __restrict__ B, int wA,
-                                                        int wB) {
-    // Multi-stage pipeline version
-    constexpr size_t maxPipelineStages = 4;
-
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A for each stage
-    __shared__ float As[maxPipelineStages][BLOCK_SIZE_X][BLOCK_SIZE_X];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B for each stage
-    __shared__ float Bs[maxPipelineStages][BLOCK_SIZE_X][BLOCK_SIZE_X];
-
-    float Csub = 0.0;
-
-    // Index of the first sub-matrix of A processed by the block
-    const int aBegin = wA * BLOCK_SIZE_X * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    const int aEnd = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    constexpr int aStep  = BLOCK_SIZE_X;
-
-    // Index of the first sub-matrix of B processed by the block
-    const int bBegin = BLOCK_SIZE_X * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE_X * wB;
-
-    auto cta = cg::this_thread_block();
-
-    const auto shape1 = cuda::aligned_size_t<alignof(float)>(sizeof(float));
-    __shared__ cuda::pipeline_shared_state<cuda::thread_scope_block, maxPipelineStages> shared_state;
-    constexpr int consumer_row_count =  BLOCK_SIZE_X;
-
-    const auto thread_role = (cta.thread_index().y < consumer_row_count)
-                                ? cuda::pipeline_role::consumer
-                                : cuda::pipeline_role::producer;
-    auto pipe = cuda::make_pipeline(cta, &shared_state, thread_role);
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin, i = 0, aStage = aBegin, bStage = bBegin, iStage = 0;
-                                                a <= aEnd; a += aStep, b += bStep, ++i) {
-        if (threadIdx.y >= consumer_row_count) {
-            // this is a whole producer warp because threadIdx.y >= 16 where 16 == consumer_row_count,
-            // which loads the matrices from device memory to shared memory; 
-            for (; aStage <= a + aStep * maxPipelineStages; aStage += aStep, bStage += bStep, ++iStage) {
-                if (aStage <= aEnd) {
-                    // Rotating buffer
-                    const int j = iStage % maxPipelineStages;
-                    const int strideRows = (blockDim.y - consumer_row_count);
-                    pipe.producer_acquire();
-                    for (int rowId = threadIdx.y - consumer_row_count; rowId < BLOCK_SIZE_X; rowId += strideRows) {
-                        cuda::memcpy_async(&As[j][rowId][threadIdx.x], 
-                                            &A[aStage + wA * rowId + threadIdx.x], shape1, pipe);
-                        cuda::memcpy_async(&Bs[j][rowId][threadIdx.x],
-                                            &B[bStage + wB * rowId + threadIdx.x], shape1, pipe);
-                    }
-                    pipe.producer_commit();
-                }
-            }
-        }
-        else {
-            // this is a whole set of consumer group because threadIdx.y < consumer_row_count where consumer_row_count == 16,
-            // which computes gemm operation on matrices loaded in shared memory by producer warp. 
-            const int j = i % maxPipelineStages;
-            // Synchronize consumer group to make sure the matrices are loaded by producer group.
-            pipe.consumer_wait();
-            // Multiply the two matrices together;
-            // each thread computes one element
-            // of the block sub-matrix
-            #pragma unroll
-            for (int k = 0; k < BLOCK_SIZE_X; ++k) {
-                Csub += As[j][threadIdx.y][k] * Bs[j][k][threadIdx.x];
-            }
-            pipe.consumer_release();
-        }
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes four element
-    if (threadIdx.y < consumer_row_count)
-    {
-        const int c = wB * BLOCK_SIZE_X * blockIdx.y + BLOCK_SIZE_X * blockIdx.x;
-        C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-    }
-}
-
-/**
- * Matrix multiplication (CUDA Kernel) on the device: C = A * B
- * wA is A's width and wB is B's width
- */
- extern "C"
- __global__ void MatrixMulNaive(float *C, float *A,
-                                                        float *B, int wA,
-                                                        int wB) {
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A
-    __shared__ float As[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B
-    __shared__ float Bs[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Index of the first sub-matrix of A processed by the block
-    int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    // Csub is used to store the element of the block sub-matrix
-    // that is computed by the thread
-    float Csub = 0;
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin;
-            a <= aEnd;
-            a += aStep, b += bStep) {
-
-        // Load the matrices from device memory
-        // to shared memory; each thread loads
-        // one element of each matrix
-        As[threadIdx.y][threadIdx.x] = A[a + wA * threadIdx.y + threadIdx.x];
-        Bs[threadIdx.y][threadIdx.x] = B[b + wB * threadIdx.y + threadIdx.x];
-
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-#pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[threadIdx.y][k] * Bs[k][threadIdx.x];
-        }
-
-        // Synchronize to make sure that the preceding
-        // computation is done before loading two new
-        // sub-matrices of A and B in the next iteration
-        __syncthreads();
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes one element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-
-extern "C"
-__global__ void MatrixMulNaiveLargeChunk(float *C, float *A,
-                                                        float *B, int wA,
-                                                        int wB) {
-    // Declaration of the shared memory array As used to
-    // store the sub-matrix of A
-    __shared__ alignas(alignof(float4)) float As[BLOCK_SIZE][BLOCK_SIZE];
-
-    // Declaration of the shared memory array Bs used to
-    // store the sub-matrix of B
-    __shared__ alignas(alignof(float4)) float Bs[BLOCK_SIZE][BLOCK_SIZE];
-
-    int t4x = threadIdx.x * 4 ;
-
-    // Index of the first sub-matrix of A processed by the block
-    int aBegin = wA * BLOCK_SIZE * blockIdx.y;
-
-    // Index of the last sub-matrix of A processed by the block
-    int aEnd   = aBegin + wA - 1;
-
-    // Step size used to iterate through the sub-matrices of A
-    int aStep  = BLOCK_SIZE;
-
-    // Index of the first sub-matrix of B processed by the block
-    int bBegin = BLOCK_SIZE * blockIdx.x;
-
-    // Step size used to iterate through the sub-matrices of B
-    int bStep  = BLOCK_SIZE * wB;
-
-    // Csub is used to store the element of the block sub-matrix
-    // that is computed by the thread
-    float Csub = 0;
-
-    // Loop over all the sub-matrices of A and B
-    // required to compute the block sub-matrix
-    for (int a = aBegin, b = bBegin;
-            a <= aEnd;
-            a += aStep, b += bStep) {
-
-        // Load the matrices from device memory
-        // to shared memory; 
-
-        // One fourth of the threads load four elements of each matrix
-        if ( t4x < BLOCK_SIZE ) {
-            float4 * const A4s = reinterpret_cast<float4*>(& As[threadIdx.y][t4x]);
-            float4 * const B4s = reinterpret_cast<float4*>(& Bs[threadIdx.y][t4x]);
-            const float4 * const A4 = reinterpret_cast<float4*>(& A[a + wA * threadIdx.y + t4x]);
-            const float4 * const B4 = reinterpret_cast<float4*>(& B[a + wA * threadIdx.y + t4x]);
-            *A4s = *A4 ;
-            *B4s = *B4 ;
-        }
-
-        // Synchronize to make sure the matrices are loaded
-        __syncthreads();
-
-        // Multiply the two matrices together;
-        // each thread computes one element
-        // of the block sub-matrix
-#pragma unroll
-        for (int k = 0; k < BLOCK_SIZE; ++k) {
-            Csub += As[threadIdx.y][k] * Bs[k][threadIdx.x];
-        }
-
-        // Synchronize to make sure that the preceding
-        // computation is done before loading two new
-        // sub-matrices of A and B in the next iteration
-        __syncthreads();
-    }
-
-    // Write the block sub-matrix to device memory;
-    // each thread writes one element
-    int c = wB * BLOCK_SIZE * blockIdx.y + BLOCK_SIZE * blockIdx.x;
-    C[c + wB * threadIdx.y + threadIdx.x] = Csub;
-}
-'''
-
-def ConstantInit(data, size, val):
-    p_data = (ctypes.c_float * size).from_address(data)
-    for i in range(size):
-        p_data[i] = val
-
-#
-# Run matrix multiplication using CUDA
-#
-def MatrixMultiply(dimsA, dimsB, kernel_number):
-    # Allocate host memory for matricies A and B
-    size_A = dimsA.x * dimsA.y
-    mem_size_A = np.dtype(np.float32).itemsize * size_A
-    h_A = checkCudaErrors(cudart.cudaMallocHost(mem_size_A))
-    size_B = dimsB.x * dimsB.y
-    mem_size_B = np.dtype(np.float32).itemsize * size_B
-    h_B = checkCudaErrors(cudart.cudaMallocHost(mem_size_B))
-
-    # Initialize host memory
-    valB = 2.10
-    ConstantInit(h_A, size_A, 1.0)
-    ConstantInit(h_B, size_B, valB)
-
-    # Allocate Device Memory
-
-    # Allocate host matrix C
-    dimsC = cudart.dim3()
-    dimsC.x = dimsB.x
-    dimsC.y = dimsA.y
-    dimsC.z = 1
-    mem_size_C = dimsC.x * dimsC.y * np.dtype(np.float32).itemsize
-    h_C = checkCudaErrors(cudart.cudaMallocHost(mem_size_C))
-
-    if h_C == 0:
-        print("Failed to allocate host matri C!")
-        exit(-1)
-
-    d_A = checkCudaErrors(cudart.cudaMalloc(mem_size_A))
-    d_B = checkCudaErrors(cudart.cudaMalloc(mem_size_B))
-    d_C = checkCudaErrors(cudart.cudaMalloc(mem_size_C))
-    # Allocate CUDA events that we'll use for timing
-    start = checkCudaErrors(cudart.cudaEventCreate())
-    stop = checkCudaErrors(cudart.cudaEventCreate())
-
-    stream = checkCudaErrors(cudart.cudaStreamCreateWithFlags(cudart.cudaStreamNonBlocking))
-
-    # Copy host memory to device
-    checkCudaErrors(cudart.cudaMemcpyAsync(d_A, h_A, mem_size_A, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-    checkCudaErrors(cudart.cudaMemcpyAsync(d_B, h_B, mem_size_B, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream))
-    checkCudaErrors(cudart.cudaMemsetAsync(d_C, 0, mem_size_C, stream))
-
-    # Setup execution parameters
-    threads = cudart.dim3()
-    threads.x = threads.y = blockSize
-    threads.z = 1
-    grid = cudart.dim3()
-    grid.x = dimsB.x / threads.x
-    grid.y = dimsA.y / threads.y
-    grid.z = 1
-
-    # Here the block size is 16x18, where first 16 rows are consumer thread group
-    # and last 2 rows (1 warp) is producer thread group
-    threadsSharedStateKernel = cudart.dim3()
-    threadsSharedStateKernel.x = blockSize
-    threadsSharedStateKernel.y = blockSize + 2
-    threadsSharedStateKernel.z = 1
-    gridSharedStateKernel = cudart.dim3()
-    gridSharedStateKernel.x = dimsB.x / threadsSharedStateKernel.x
-    gridSharedStateKernel.y = dimsA.y / threadsSharedStateKernel.x
-
-    print("Running kernel = {} - {}".format(kernel_number, kernelNames[kernel_number.value]))
-    # Create and start timer
-    print("Computing result using CUDA Kernel...")
-
-    # Performs warmup operation using matrixMul CUDA kernel
-    kernelArguments = ((d_C, d_A, d_B, dimsA.x, dimsB.x),
-                       (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_int))
-    if kernel_number == kernels.AsyncCopyMultiStageLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStageLargeChunk, 
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyLargeChunk,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyLargeChunkAWBarrier:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyLargeChunkAWBarrier,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyMultiStageSharedState:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStageSharedState,
-                                            gridSharedStateKernel.x, gridSharedStateKernel.y, gridSharedStateKernel.z,          # grid dim
-                                            threadsSharedStateKernel.x, threadsSharedStateKernel.y, threadsSharedStateKernel.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyMultiStage:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStage,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopySingleStage:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopySingleStage,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.Naive:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulNaive,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.NaiveLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulNaiveLargeChunk,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-
-    print('done')
-    checkCudaErrors(cudart.cudaStreamSynchronize(stream))
-
-
-    # Execute the kernel
-    nIter = 100
-
-    # Record the start event
-    checkCudaErrors(cudart.cudaEventRecord(start, stream))
-
-    if kernel_number == kernels.AsyncCopyMultiStageLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStageLargeChunk, 
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyLargeChunk,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyLargeChunkAWBarrier:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyLargeChunkAWBarrier,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyMultiStageSharedState:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStageSharedState,
-                                            gridSharedStateKernel.x, gridSharedStateKernel.y, gridSharedStateKernel.z,          # grid dim
-                                            threadsSharedStateKernel.x, threadsSharedStateKernel.y, threadsSharedStateKernel.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopyMultiStage:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopyMultiStage,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.AsyncCopySingleStage:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulAsyncCopySingleStage,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.Naive:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulNaive,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-    elif kernel_number == kernels.NaiveLargeChunk:
-        checkCudaErrors(cuda.cuLaunchKernel(_MatrixMulNaiveLargeChunk,
-                                            grid.x, grid.y, grid.z,          # grid dim
-                                            threads.x, threads.y, threads.z, # block dim
-                                            0,                               # shared mem
-                                            stream,                          # stream
-                                            kernelArguments, 0))             # arguments
-
-    # Record the stop event
-    checkCudaErrors(cudart.cudaEventRecord(stop, stream))
-
-    # Wait for the stop event to complete
-    checkCudaErrors(cudart.cudaEventSynchronize(stop))
-
-    msecTotal = checkCudaErrors(cudart.cudaEventElapsedTime(start, stop))
-
-    # Compute and print the performance
-    msecPerMatrixMul = msecTotal / nIter
-    flopsPerMatrixMul = 2.0 * dimsA.x * dimsA.y * dimsB.x
-    gigaFlops = (flopsPerMatrixMul * 1.0e-9) / (msecPerMatrixMul / 1000.0)
-
-    print("Performance= {:.2f} GFlop/s, Time= {:.2f} msec, Size= {:.0f} Ops, WorkgroupSize= {} threads/block".format(
-            gigaFlops,
-            msecPerMatrixMul,
-            flopsPerMatrixMul,
-            threads.x * threads.y))
-
-    # Copy result from device to host
-    checkCudaErrors(cudart.cudaMemcpyAsync(h_C, d_C, mem_size_C, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream))
-    checkCudaErrors(cudart.cudaStreamSynchronize(stream))
-
-    print("Checking computed result for correctness: ")
-    correct = True
-
-    # test relative error by the formula
-    # |<x, y>_cpu - <x,y>_gpu|/<|x|, |y|>  < eps
-    eps = 1.e-6
-
-    h_C_local = (ctypes.c_float * (dimsC.x * dimsC.y)).from_address(h_C)
-    for i in range(dimsC.x * dimsC.y):
-        abs_err = math.fabs(h_C_local[i] - (dimsA.x * valB))
-        dot_length = dimsA.x
-        abs_val = math.fabs(h_C_local[i])
-        rel_err = abs_err / abs_val / dot_length
-
-        if rel_err > eps:
-            print("Error! Matrix[{:.5f}]={:.8f} ref={:.8f} err term is > {}".format(i, h_C_local[i], dimsA.x * valB, rel_err))
-            correct = False
-
-    print("Result = PASS" if correct else "Result = FAIL")   
-
-    # Clean up memory
-    checkCudaErrors(cudart.cudaFreeHost(h_A))
-    checkCudaErrors(cudart.cudaFreeHost(h_B))
-    checkCudaErrors(cudart.cudaFreeHost(h_C))
-    checkCudaErrors(cudart.cudaFree(d_A))
-    checkCudaErrors(cudart.cudaFree(d_B))
-    checkCudaErrors(cudart.cudaFree(d_C))
-    checkCudaErrors(cudart.cudaEventDestroy(start))
-    checkCudaErrors(cudart.cudaEventDestroy(stop))
-    print("\nNOTE: The CUDA Samples are not meant for performance "\
-          "measurements. Results may vary when GPU Boost is enabled.");
-
-    if correct:
-        return 0
-    return -1
-
-def checkKernelCompiles():
-    kernel_headers = '''\
-    #line __LINE__
-    #if __CUDA_ARCH__ >= 700
-    #include <cuda/barrier>
-    #endif
-    #include <cooperative_groups.h>
-    #include <cooperative_groups/reduce.h>
-    #include <cuda/pipeline>
-    '''
-    try:
-        common.KernelHelper(kernel_headers, findCudaDevice())
-    except:
-        # Filters out test from automation for two reasons
-        # 1. Headers are not found
-        # 2. Incompatible device
-        return False
-    return True
-
-@pytest.mark.skipif(not checkKernelCompiles(), reason="Automation filter against incompatible kernel")
-def main():
-    print("[globalToShmemAsyncCopy] - Starting...")
-
-    version = checkCudaErrors(cuda.cuDriverGetVersion())
-    if version < 11010:
-        print("CUDA Toolkit 11.1 or greater is required")
-        return
-
-    if (checkCmdLineFlag("help") or checkCmdLineFlag("?")):
-        print("Usage device=n (n >= 0 for deviceID)")
-        print("      wA=WidthA hA=HeightA (Width x Height of Matrix A)")
-        print("      wB=WidthB hB=HeightB (Width x Height of Matrix B)")
-        print("      kernel=kernel_number (0 - AsyncCopyMultiStageLargeChunk; 1 - AsyncCopyLargeChunk)")
-        print("                            (2 - AsyncCopyLargeChunkAWBarrier; 3 - AsyncCopyMultiStageSharedState)")
-        print("                            (4 - AsyncCopyMultiStage; 5 - AsyncCopySingleStage; 6 - Naive without memcpy_async)")
-        print("                            (7 - NaiveLargeChunk without memcpy_async)")
-        print("  Note: Outer matrix dimensions of A & B matrices must be equal.")
-        return
-
-    # This will pick the best possible CUDA capable device, otherwise
-    # override the device ID based on input provided at the command line
-    devID = findCudaDevice()
-
-    matrixBlock = 32
-    dimsA = cudart.dim3()
-    dimsA.x = dimsA.y = 10 * 4 * matrixBlock
-    dimsA.z = 1
-    dimsB = cudart.dim3()
-    dimsB.x = dimsB.y = 10 * 4 * matrixBlock
-    dimsB.z = 1
-
-    # width of Matrix A
-    if checkCmdLineFlag("wA="):
-        dimsA.x = int(getCmdLineArgumentInt("wA="))
-
-    # height of Matrix A
-    if checkCmdLineFlag("hA="):
-        dimsA.y = int(getCmdLineArgumentInt("hA="))
-
-    # width of Matrix B
-    if checkCmdLineFlag("wB="):
-        dimsB.x = int(getCmdLineArgumentInt("wB="))
-
-    # height of Matrix B
-    if checkCmdLineFlag("hB="):
-        dimsB.y = int(getCmdLineArgumentInt("hB="))
-
-    if dimsA.x != dimsB.y:
-        print("Error: outer matrix dimensions must be equal. ({} != {})".format(dimsA.x, dimsB.y))
-        sys.exit(-1)
-
-    selected_kernel = kernels.AsyncCopyMultiStageLargeChunk
-
-    # kernel to run - default (AsyncCopyMultiStageLargeChunk == 0)
-    if checkCmdLineFlag("kernel="):
-        kernel_number = int(getCmdLineArgumentInt("kernel="))
-        if kernel_number < 8:
-            selected_kernel = kernels(kernel_number)
-        else:
-            print("Error: kernel number should be between 0 to 7, you have entered %d".format(kernel_number))
-            sys.exit(-1)
-
-    major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID))
-    if major < 7:
-        print("globalToShmemAsyncCopy requires SM 7.0 or higher.  Exiting...")
-        return
-
-    print("MatrixA({},{}), MatrixB({},{})".format(dimsA.x, dimsA.y,
-                                                  dimsB.x, dimsB.y))
-
-    global _MatrixMulAsyncCopyMultiStageLargeChunk
-    global _MatrixMulAsyncCopyLargeChunk
-    global _MatrixMulAsyncCopyLargeChunkAWBarrier
-    global _MatrixMulAsyncCopyMultiStageSharedState
-    global _MatrixMulAsyncCopyMultiStage
-    global _MatrixMulAsyncCopySingleStage
-    global _MatrixMulNaive
-    global _MatrixMulNaiveLargeChunk
-    kernelHelper = common.KernelHelper(globalToShmemAsyncCopy, devID)
-    _MatrixMulAsyncCopyMultiStageLargeChunk = kernelHelper.getFunction(b'MatrixMulAsyncCopyMultiStageLargeChunk')
-    _MatrixMulAsyncCopyLargeChunk = kernelHelper.getFunction(b'MatrixMulAsyncCopyLargeChunk')
-    _MatrixMulAsyncCopyLargeChunkAWBarrier = kernelHelper.getFunction(b'MatrixMulAsyncCopyLargeChunkAWBarrier')
-    _MatrixMulAsyncCopyMultiStageSharedState = kernelHelper.getFunction(b'MatrixMulAsyncCopyMultiStageSharedState')
-    _MatrixMulAsyncCopyMultiStage = kernelHelper.getFunction(b'MatrixMulAsyncCopyMultiStage')
-    _MatrixMulAsyncCopySingleStage = kernelHelper.getFunction(b'MatrixMulAsyncCopySingleStage')
-    _MatrixMulNaive = kernelHelper.getFunction(b'MatrixMulNaive')
-    _MatrixMulNaiveLargeChunk = kernelHelper.getFunction(b'MatrixMulNaiveLargeChunk')
-
-    matrix_result = MatrixMultiply(dimsA, dimsB, selected_kernel)
-
-    if matrix_result != 0:
-        sys.exit(-1)
-
-if __name__ == "__main__":
-    main()
diff --git a/cuda_bindings/examples/3_CUDA_Features/simpleCudaGraphs_test.py b/cuda_bindings/examples/3_CUDA_Features/simpleCudaGraphs_test.py
deleted file mode 100644
index 7a895acb..00000000
--- a/cuda_bindings/examples/3_CUDA_Features/simpleCudaGraphs_test.py
+++ /dev/null
@@ -1,375 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import numpy as np
-import pytest
-import random as rnd
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-
-THREADS_PER_BLOCK = 512
-GRAPH_LAUNCH_ITERATIONS = 3
-
-simpleCudaGraphs = '''\
-#include <cooperative_groups.h>
-#include <cuda_runtime.h>
-
-namespace cg = cooperative_groups;
-
-#define THREADS_PER_BLOCK 512
-#define GRAPH_LAUNCH_ITERATIONS 3
-
-extern "C"
-__global__ void reduce(float *inputVec, double *outputVec, size_t inputSize,
-                       size_t outputSize) {
-    __shared__ double tmp[THREADS_PER_BLOCK];
-
-    cg::thread_block cta = cg::this_thread_block();
-    size_t globaltid = blockIdx.x * blockDim.x + threadIdx.x;
-
-    double temp_sum = 0.0;
-    for (int i = globaltid; i < inputSize; i += gridDim.x * blockDim.x) {
-        temp_sum += (double)inputVec[i];
-    }
-    tmp[cta.thread_rank()] = temp_sum;
-
-    cg::sync(cta);
-
-    cg::thread_block_tile<32> tile32 = cg::tiled_partition<32>(cta);
-
-    double beta = temp_sum;
-    double temp;
-
-    for (int i = tile32.size() / 2; i > 0; i >>= 1) {
-        if (tile32.thread_rank() < i) {
-            temp = tmp[cta.thread_rank() + i];
-            beta += temp;
-            tmp[cta.thread_rank()] = beta;
-        }
-        cg::sync(tile32);
-    }
-    cg::sync(cta);
-
-    if (cta.thread_rank() == 0 && blockIdx.x < outputSize) {
-        beta = 0.0;
-        for (int i = 0; i < cta.size(); i += tile32.size()) {
-            beta += tmp[i];
-        }
-        outputVec[blockIdx.x] = beta;
-    }
-}
-
-extern "C"
-__global__ void reduceFinal(double *inputVec, double *result,
-                            size_t inputSize) {
-    __shared__ double tmp[THREADS_PER_BLOCK];
-
-    cg::thread_block cta = cg::this_thread_block();
-    size_t globaltid = blockIdx.x * blockDim.x + threadIdx.x;
-
-    double temp_sum = 0.0;
-    for (int i = globaltid; i < inputSize; i += gridDim.x * blockDim.x) {
-        temp_sum += (double)inputVec[i];
-    }
-    tmp[cta.thread_rank()] = temp_sum;
-
-    cg::sync(cta);
-
-    cg::thread_block_tile<32> tile32 = cg::tiled_partition<32>(cta);
-
-    // do reduction in shared mem
-    if ((blockDim.x >= 512) && (cta.thread_rank() < 256)) {
-        tmp[cta.thread_rank()] = temp_sum = temp_sum + tmp[cta.thread_rank() + 256];
-    }
-
-    cg::sync(cta);
-
-    if ((blockDim.x >= 256) && (cta.thread_rank() < 128)) {
-        tmp[cta.thread_rank()] = temp_sum = temp_sum + tmp[cta.thread_rank() + 128];
-    }
-
-    cg::sync(cta);
-
-    if ((blockDim.x >= 128) && (cta.thread_rank() < 64)) {
-        tmp[cta.thread_rank()] = temp_sum = temp_sum + tmp[cta.thread_rank() + 64];
-    }
-
-    cg::sync(cta);
-
-    if (cta.thread_rank() < 32) {
-          // Fetch final intermediate sum from 2nd warp
-          if (blockDim.x >= 64) temp_sum += tmp[cta.thread_rank() + 32];
-          // Reduce final warp using shuffle
-          for (int offset = tile32.size() / 2; offset > 0; offset /= 2) {
-                temp_sum += tile32.shfl_down(temp_sum, offset);
-          }
-    }
-    // write result for this block to global mem
-    if (cta.thread_rank() == 0) result[0] = temp_sum;
-}
-'''
-
-def init_input(a, size):
-    ctypes.c_float.from_address(a)
-    a_list = ctypes.pointer(ctypes.c_float.from_address(a))
-    for i in range(0, size):
-        a_list[i] = rnd.random()
-
-def cudaGraphsManual(inputVec_h, inputVec_d, outputVec_d, result_d, inputSize, numOfBlocks):
-    result_h = ctypes.c_double(0.0)
-    nodeDependencies = []
-
-    streamForGraph = checkCudaErrors(cudart.cudaStreamCreate())
-
-    kernelNodeParams = cuda.CUDA_KERNEL_NODE_PARAMS()
-    memcpyParams = cudart.cudaMemcpy3DParms()
-    memsetParams = cudart.cudaMemsetParams()
-
-    memcpyParams.srcArray = None
-    memcpyParams.srcPos = cudart.make_cudaPos(0, 0, 0)
-    memcpyParams.srcPtr = cudart.make_cudaPitchedPtr(inputVec_h, np.dtype(np.float32).itemsize * inputSize, inputSize, 1)
-    memcpyParams.dstArray = None
-    memcpyParams.dstPos = cudart.make_cudaPos(0, 0, 0)
-    memcpyParams.dstPtr = cudart.make_cudaPitchedPtr(inputVec_d, np.dtype(np.float32).itemsize * inputSize, inputSize, 1)
-    memcpyParams.extent = cudart.make_cudaExtent(np.dtype(np.float32).itemsize * inputSize, 1, 1)
-    memcpyParams.kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-
-    memsetParams.dst = outputVec_d
-    memsetParams.value = 0
-    memsetParams.pitch = 0
-    memsetParams.elementSize = np.dtype(np.float32).itemsize # elementSize can be max 4 bytes
-    memsetParams.width = numOfBlocks * 2
-    memsetParams.height = 1
-
-    graph = checkCudaErrors(cudart.cudaGraphCreate(0))
-
-    memcpyNode = checkCudaErrors(cudart.cudaGraphAddMemcpyNode(graph, None, 0, memcpyParams))
-    memsetNode = checkCudaErrors(cudart.cudaGraphAddMemsetNode(graph, None, 0, memsetParams))
-
-    nodeDependencies.append(memsetNode)
-    nodeDependencies.append(memcpyNode)
-
-    kernelArgs = ((inputVec_d, outputVec_d, inputSize, numOfBlocks),
-                  (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_uint))
-
-    kernelNodeParams.func = _reduce
-    kernelNodeParams.gridDimX = numOfBlocks
-    kernelNodeParams.gridDimY = kernelNodeParams.gridDimZ = 1
-    kernelNodeParams.blockDimX = THREADS_PER_BLOCK
-    kernelNodeParams.blockDimY = kernelNodeParams.blockDimZ = 1
-    kernelNodeParams.sharedMemBytes = 0
-    kernelNodeParams.kernelParams = kernelArgs
-    # kernelNodeParams.extra = None
-
-    kernelNode = checkCudaErrors(cuda.cuGraphAddKernelNode(graph, nodeDependencies, len(nodeDependencies), kernelNodeParams))
-
-    nodeDependencies.clear()
-    nodeDependencies.append(kernelNode)
-
-    memsetParams = cudart.cudaMemsetParams()
-    memsetParams.dst = result_d
-    memsetParams.value = 0
-    memsetParams.elementSize = np.dtype(np.float32).itemsize
-    memsetParams.width = 2
-    memsetParams.height = 1
-    memsetNode = checkCudaErrors(cudart.cudaGraphAddMemsetNode(graph, None, 0, memsetParams))
-
-    nodeDependencies.append(memsetNode)
-
-    kernelNodeParams = cuda.CUDA_KERNEL_NODE_PARAMS()
-    kernelNodeParams.func = _reduceFinal
-    kernelNodeParams.gridDimX = kernelNodeParams.gridDimY = kernelNodeParams.gridDimZ = 1
-    kernelNodeParams.blockDimX = THREADS_PER_BLOCK
-    kernelNodeParams.blockDimY = kernelNodeParams.blockDimZ = 1
-    kernelNodeParams.sharedMemBytes = 0
-    kernelArgs2 = ((outputVec_d, result_d, numOfBlocks),
-                   (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_uint))
-    kernelNodeParams.kernelParams = kernelArgs2
-    # kernelNodeParams.extra = None
-
-    kernelNode = checkCudaErrors(cuda.cuGraphAddKernelNode(graph, nodeDependencies, len(nodeDependencies), kernelNodeParams))
-
-    nodeDependencies.clear()
-    nodeDependencies.append(kernelNode)
-
-    memcpyParams = cudart.cudaMemcpy3DParms()
-
-    memcpyParams.srcArray = None
-    memcpyParams.srcPos = cudart.make_cudaPos(0, 0, 0)
-    memcpyParams.srcPtr = cudart.make_cudaPitchedPtr(result_d, np.dtype(np.float64).itemsize, 1, 1)
-    memcpyParams.dstArray = None
-    memcpyParams.dstPos = cudart.make_cudaPos(0, 0, 0)
-    memcpyParams.dstPtr = cudart.make_cudaPitchedPtr(result_h, np.dtype(np.float64).itemsize, 1, 1)
-    memcpyParams.extent = cudart.make_cudaExtent(np.dtype(np.float64).itemsize, 1, 1)
-    memcpyParams.kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    memcpyNode = checkCudaErrors(cudart.cudaGraphAddMemcpyNode(graph, nodeDependencies, len(nodeDependencies), memcpyParams))
-
-    nodeDependencies.clear()
-    nodeDependencies.append(memcpyNode)
-
-    # WIP: Host nodes
-
-    nodes, numNodes = checkCudaErrors(cudart.cudaGraphGetNodes(graph))
-    print("\nNum of nodes in the graph created manually = {}".format(numNodes))
-
-    graphExec = checkCudaErrors(cudart.cudaGraphInstantiate(graph, 0))
-
-    clonedGraph = checkCudaErrors(cudart.cudaGraphClone(graph))
-    clonedGraphExec = checkCudaErrors(cudart.cudaGraphInstantiate(clonedGraph, 0))
-
-    for i in range(GRAPH_LAUNCH_ITERATIONS):
-        checkCudaErrors(cudart.cudaGraphLaunch(graphExec, streamForGraph))
-
-    checkCudaErrors(cudart.cudaStreamSynchronize(streamForGraph))
-
-    print("Cloned Graph Output..")
-    for i in range(GRAPH_LAUNCH_ITERATIONS):
-        checkCudaErrors(cudart.cudaGraphLaunch(clonedGraphExec, streamForGraph))
-
-    checkCudaErrors(cudart.cudaStreamSynchronize(streamForGraph))
-
-    checkCudaErrors(cudart.cudaGraphExecDestroy(graphExec))
-    checkCudaErrors(cudart.cudaGraphExecDestroy(clonedGraphExec))
-    checkCudaErrors(cudart.cudaGraphDestroy(graph))
-    checkCudaErrors(cudart.cudaGraphDestroy(clonedGraph))
-    checkCudaErrors(cudart.cudaStreamDestroy(streamForGraph))
-
-def cudaGraphsUsingStreamCapture(inputVec_h, inputVec_d, outputVec_d, result_d, inputSize, numOfBlocks):
-    result_h = ctypes.c_double(0.0)
-
-    stream1 = checkCudaErrors(cudart.cudaStreamCreate())
-    stream2 = checkCudaErrors(cudart.cudaStreamCreate())
-    stream3 = checkCudaErrors(cudart.cudaStreamCreate())
-    streamForGraph = checkCudaErrors(cudart.cudaStreamCreate())
-
-    forkStreamEvent = checkCudaErrors(cudart.cudaEventCreate())
-    memsetEvent1 = checkCudaErrors(cudart.cudaEventCreate())
-    memsetEvent2 = checkCudaErrors(cudart.cudaEventCreate())
-
-    checkCudaErrors(cudart.cudaStreamBeginCapture(stream1, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal))
-
-    checkCudaErrors(cudart.cudaEventRecord(forkStreamEvent, stream1))
-    checkCudaErrors(cudart.cudaStreamWaitEvent(stream2, forkStreamEvent, 0))
-    checkCudaErrors(cudart.cudaStreamWaitEvent(stream3, forkStreamEvent, 0))
-
-    checkCudaErrors(cudart.cudaMemcpyAsync(inputVec_d, inputVec_h,
-                                           np.dtype(np.float32).itemsize * inputSize, cudart.cudaMemcpyKind.cudaMemcpyDefault,
-                                           stream1))
-
-    checkCudaErrors(cudart.cudaMemsetAsync(outputVec_d, 0, np.dtype(np.float64).itemsize * numOfBlocks, stream2))
-
-    checkCudaErrors(cudart.cudaEventRecord(memsetEvent1, stream2))
-
-    checkCudaErrors(cudart.cudaMemsetAsync(result_d, 0, np.dtype(np.float64).itemsize, stream3))
-    checkCudaErrors(cudart.cudaEventRecord(memsetEvent2, stream3))
-
-    checkCudaErrors(cudart.cudaStreamWaitEvent(stream1, memsetEvent1, 0))
-
-    kernelArgs = ((inputVec_d, outputVec_d, inputSize, numOfBlocks),
-                  (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_uint))
-    checkCudaErrors(cuda.cuLaunchKernel(_reduce,
-                                        numOfBlocks, 1, 1,
-                                        THREADS_PER_BLOCK, 1, 1,
-                                        0, stream1,
-                                        kernelArgs, 0))
-
-    checkCudaErrors(cudart.cudaStreamWaitEvent(stream1, memsetEvent2, 0))
-
-    kernelArgs2 = ((outputVec_d, result_d, numOfBlocks),
-                   (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_uint))
-    checkCudaErrors(cuda.cuLaunchKernel(_reduceFinal,
-                                        1, 1, 1,
-                                        THREADS_PER_BLOCK, 1, 1,
-                                        0, stream1,
-                                        kernelArgs2, 0))
-
-    checkCudaErrors(cudart.cudaMemcpyAsync(result_h, result_d, np.dtype(np.float64).itemsize,
-                                           cudart.cudaMemcpyKind.cudaMemcpyDefault, stream1))
-
-    # WIP: Host nodes
-
-    graph = checkCudaErrors(cudart.cudaStreamEndCapture(stream1))
-
-    nodes, numNodes = checkCudaErrors(cudart.cudaGraphGetNodes(graph))
-    print("\nNum of nodes in the graph created using stream capture API = {}".format(numNodes))
-
-    graphExec = checkCudaErrors(cudart.cudaGraphInstantiate(graph, 0))
-
-    clonedGraph = checkCudaErrors(cudart.cudaGraphClone(graph))
-    clonedGraphExec = checkCudaErrors(cudart.cudaGraphInstantiate(clonedGraph, 0))
-
-    for i in range(GRAPH_LAUNCH_ITERATIONS):
-        checkCudaErrors(cudart.cudaGraphLaunch(graphExec, streamForGraph))
-
-    checkCudaErrors(cudart.cudaStreamSynchronize(streamForGraph))
-
-    print("Cloned Graph Output..")
-    for i in range(GRAPH_LAUNCH_ITERATIONS):
-        checkCudaErrors(cudart.cudaGraphLaunch(clonedGraphExec, streamForGraph))
-
-    checkCudaErrors(cudart.cudaStreamSynchronize(streamForGraph))
-
-    checkCudaErrors(cudart.cudaGraphExecDestroy(graphExec))
-    checkCudaErrors(cudart.cudaGraphExecDestroy(clonedGraphExec))
-    checkCudaErrors(cudart.cudaGraphDestroy(graph))
-    checkCudaErrors(cudart.cudaGraphDestroy(clonedGraph))
-    checkCudaErrors(cudart.cudaStreamDestroy(stream1))
-    checkCudaErrors(cudart.cudaStreamDestroy(stream2))
-    checkCudaErrors(cudart.cudaStreamDestroy(streamForGraph))
-
-def checkKernelCompiles():
-    kernel_headers = '''\
-    #include <cooperative_groups.h>
-    '''
-    try:
-        common.KernelHelper(kernel_headers, findCudaDevice())
-    except:
-        # Filters out test from automation when CG header has issues compiling
-        # Automation issue is observed when CG headers are obtained through PYPI packages
-        # The problem is that these headers and their dependencies are segmented between
-        # multiple packages, and NVRTC requires that you specify the path to each segemented
-        # include path.
-        return False
-    return True
-
-@pytest.mark.skipif(not checkKernelCompiles(), reason="Automation filter against incompatible kernel")
-def main():
-    size = 1 << 24 # number of elements to reduce
-    maxBlocks = 512
-
-    # This will pick the best possible CUDA capable device
-    devID = findCudaDevice()
-
-    global _reduce
-    global _reduceFinal
-    kernelHelper = common.KernelHelper(simpleCudaGraphs, devID)
-    _reduce = kernelHelper.getFunction(b'reduce')
-    _reduceFinal = kernelHelper.getFunction(b'reduceFinal')
-
-    print("{} elements".format(size))
-    print("threads per block  = {}".format(THREADS_PER_BLOCK))
-    print("Graph Launch iterations = {}".format(GRAPH_LAUNCH_ITERATIONS))
-
-    inputVec_h = checkCudaErrors(cudart.cudaMallocHost(size * np.dtype(np.float32).itemsize))
-    inputVec_d = checkCudaErrors(cudart.cudaMalloc(size * np.dtype(np.float32).itemsize))
-    outputVec_d = checkCudaErrors(cudart.cudaMalloc(maxBlocks * np.dtype(np.float64).itemsize))
-    result_d = checkCudaErrors(cudart.cudaMalloc(np.dtype(np.float64).itemsize))
-
-    init_input(inputVec_h, size)
-
-    cudaGraphsManual(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks)
-    cudaGraphsUsingStreamCapture(inputVec_h, inputVec_d, outputVec_d, result_d, size, maxBlocks)
-
-    checkCudaErrors(cudart.cudaFree(inputVec_d))
-    checkCudaErrors(cudart.cudaFree(outputVec_d))
-    checkCudaErrors(cudart.cudaFree(result_d))
-    checkCudaErrors(cudart.cudaFreeHost(inputVec_h))
-
-if __name__ == "__main__":
-    main()
diff --git a/cuda_bindings/examples/4_CUDA_Libraries/conjugateGradientMultiBlockCG_test.py b/cuda_bindings/examples/4_CUDA_Libraries/conjugateGradientMultiBlockCG_test.py
deleted file mode 100644
index 6f64066f..00000000
--- a/cuda_bindings/examples/4_CUDA_Libraries/conjugateGradientMultiBlockCG_test.py
+++ /dev/null
@@ -1,330 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import math
-import numpy as np
-import sys
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors, findCudaDevice
-from random import random
-
-conjugateGradientMultiBlockCG = '''\
-#line __LINE__
-#include <cooperative_groups.h>
-#include <cooperative_groups/reduce.h>
-namespace cg = cooperative_groups;
-
-
-__device__ void gpuSpMV(int *I, int *J, float *val, int nnz, int num_rows,
-                        float alpha, float *inputVecX, float *outputVecY,
-                        cg::thread_block &cta, const cg::grid_group &grid) {
-  for (int i = grid.thread_rank(); i < num_rows; i += grid.size()) {
-    int row_elem = I[i];
-    int next_row_elem = I[i + 1];
-    int num_elems_this_row = next_row_elem - row_elem;
-
-    float output = 0.0;
-    for (int j = 0; j < num_elems_this_row; j++) {
-      // I or J or val arrays - can be put in shared memory
-      // as the access is random and reused in next calls of gpuSpMV function.
-      output += alpha * val[row_elem + j] * inputVecX[J[row_elem + j]];
-    }
-
-    outputVecY[i] = output;
-  }
-}
-
-__device__ void gpuSaxpy(float *x, float *y, float a, int size,
-                         const cg::grid_group &grid) {
-  for (int i = grid.thread_rank(); i < size; i += grid.size()) {
-    y[i] = a * x[i] + y[i];
-  }
-}
-
-__device__ void gpuDotProduct(float *vecA, float *vecB, double *result,
-                              int size, const cg::thread_block &cta,
-                              const cg::grid_group &grid) {
-  extern __shared__ double tmp[];
-
-  double temp_sum = 0.0;
-  for (int i = grid.thread_rank(); i < size; i += grid.size()) {
-    temp_sum += static_cast<double>(vecA[i] * vecB[i]);
-  }
-
-  cg::thread_block_tile<32> tile32 = cg::tiled_partition<32>(cta);
-
-  temp_sum = cg::reduce(tile32, temp_sum, cg::plus<double>());
-
-  if (tile32.thread_rank() == 0) {
-    tmp[tile32.meta_group_rank()] = temp_sum;
-  }
-
-  cg::sync(cta);
-
-  if (tile32.meta_group_rank() == 0) {
-     temp_sum = tile32.thread_rank() < tile32.meta_group_size() ? tmp[tile32.thread_rank()] : 0.0;
-     temp_sum = cg::reduce(tile32, temp_sum, cg::plus<double>());
-
-    if (tile32.thread_rank() == 0) {
-      atomicAdd(result, temp_sum);
-    }
-  }
-}
-
-__device__ void gpuCopyVector(float *srcA, float *destB, int size,
-                              const cg::grid_group &grid) {
-  for (int i = grid.thread_rank(); i < size; i += grid.size()) {
-    destB[i] = srcA[i];
-  }
-}
-
-__device__ void gpuScaleVectorAndSaxpy(const float *x, float *y, float a, float scale, int size,
-                         const cg::grid_group &grid) {
-  for (int i = grid.thread_rank(); i < size; i += grid.size()) {
-    y[i] = a * x[i] + scale * y[i];
-  }
-}
-
-extern "C" __global__ void gpuConjugateGradient(int *I, int *J, float *val,
-                                                float *x, float *Ax, float *p,
-                                                float *r, double *dot_result,
-                                                int nnz, int N, float tol) {
-  cg::thread_block cta = cg::this_thread_block();
-  cg::grid_group grid = cg::this_grid();
-
-  int max_iter = 10000;
-
-  float alpha = 1.0;
-  float alpham1 = -1.0;
-  float r0 = 0.0, r1, b, a, na;
-
-  gpuSpMV(I, J, val, nnz, N, alpha, x, Ax, cta, grid);
-
-  cg::sync(grid);
-
-  gpuSaxpy(Ax, r, alpham1, N, grid);
-
-  cg::sync(grid);
-
-  gpuDotProduct(r, r, dot_result, N, cta, grid);
-
-  cg::sync(grid);
-
-  r1 = *dot_result;
-
-  int k = 1;
-  while (r1 > tol * tol && k <= max_iter) {
-    if (k > 1) {
-      b = r1 / r0;
-      gpuScaleVectorAndSaxpy(r, p, alpha, b, N, grid);
-    } else {
-      gpuCopyVector(r, p, N, grid);
-    }
-
-    cg::sync(grid);
-
-    gpuSpMV(I, J, val, nnz, N, alpha, p, Ax, cta, grid);
-
-    if (threadIdx.x == 0 && blockIdx.x == 0) *dot_result = 0.0;
-
-    cg::sync(grid);
-
-    gpuDotProduct(p, Ax, dot_result, N, cta, grid);
-
-    cg::sync(grid);
-
-    a = r1 / *dot_result;
-
-    gpuSaxpy(p, x, a, N, grid);
-    na = -a;
-    gpuSaxpy(Ax, r, na, N, grid);
-
-    r0 = r1;
-
-    cg::sync(grid);
-    if (threadIdx.x == 0 && blockIdx.x == 0) *dot_result = 0.0;
-
-    cg::sync(grid);
-
-    gpuDotProduct(r, r, dot_result, N, cta, grid);
-
-    cg::sync(grid);
-
-    r1 = *dot_result;
-    k++;
-  }
-}
-'''
-
-def genTridiag(I, J, val, N, nz):
-    I[0] = 0 
-    J[0] = 0
-    J[1]= 0 
-
-    val[0] = float(random()) + 10.0
-    val[1] = float(random())
-
-    for i in range(1, N):
-        if i > 1:
-            I[i] = I[i - 1] + 3
-        else:
-            I[1] = 2
-
-        start = (i - 1) * 3 + 2
-        J[start] = i - 1
-        J[start + 1] = i
-
-        if i < N - 1:
-            J[start + 2] = i + 1
-
-        val[start] = val[start - 1]
-        val[start + 1] = float(random()) + 10.0
-
-        if i < N - 1:
-            val[start + 2] = float(random())
-    I[N] = nz
-
-THREADS_PER_BLOCK = 512
-sSDKname = "conjugateGradientMultiBlockCG";
-def main():
-    tol = 1e-5
-
-    print("Starting [%s]...\n" % sSDKname);
-
-    # WAIVE: Due to bug in NVRTC
-    return
-
-    # This will pick the best possible CUDA capable device
-    devID = findCudaDevice()
-    deviceProp = checkCudaErrors(cudart.cudaGetDeviceProperties(devID))
-
-    if not deviceProp.managedMemory:
-        # This sample requires being run on a device that supports Unified Memory
-        print("Unified Memory not supported on this device")
-        return
-
-    # This sample requires being run on a device that supports Cooperative Kernel
-    # Launch
-    if not deviceProp.cooperativeLaunch:
-        print("\nSelected GPU (%d) does not support Cooperative Kernel Launch, Waiving the run" %
-                (devID))
-        return
-
-    # Statistics about the GPU device
-    print("> GPU device has %d Multi-Processors, SM %d.%d compute capabilities\n" % 
-            (deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor))
-
-    # Get kernel
-    kernelHelper = common.KernelHelper(conjugateGradientMultiBlockCG, devID)
-    _gpuConjugateGradient = kernelHelper.getFunction(b'gpuConjugateGradient')
-
-    # Generate a random tridiagonal symmetric matrix in CSR format
-    N = 1048576
-    nz = (N - 2) * 3 + 4
-
-    I = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.int32).itemsize * (N+1), cudart.cudaMemAttachGlobal))
-    J = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.int32).itemsize * nz, cudart.cudaMemAttachGlobal))
-    val = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * nz, cudart.cudaMemAttachGlobal))
-    I_local = (ctypes.c_int * (N + 1)).from_address(I)
-    J_local = (ctypes.c_int * nz).from_address(J)
-    val_local = (ctypes.c_float * nz).from_address(val)
-
-    genTridiag(I_local, J_local, val_local, N, nz)
-
-    x = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * N, cudart.cudaMemAttachGlobal))
-    rhs = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * N, cudart.cudaMemAttachGlobal))
-    dot_result = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float64).itemsize, cudart.cudaMemAttachGlobal))
-    x_local = (ctypes.c_float * N).from_address(x)
-    rhs_local = (ctypes.c_float * N).from_address(rhs)
-    dot_result_local = (ctypes.c_double).from_address(dot_result)
-    dot_result_local = 0
-
-    # temp memory for CG
-    r = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * N, cudart.cudaMemAttachGlobal))
-    p = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * N, cudart.cudaMemAttachGlobal))
-    Ax = checkCudaErrors(cudart.cudaMallocManaged(np.dtype(np.float32).itemsize * N, cudart.cudaMemAttachGlobal))
-    r_local = (ctypes.c_float * N).from_address(r)
-    p_local = (ctypes.c_float * N).from_address(p)
-    Ax_local = (ctypes.c_float * N).from_address(Ax)
-
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    start = checkCudaErrors(cudart.cudaEventCreate())
-    stop = checkCudaErrors(cudart.cudaEventCreate())
-
-    for i in range(N):
-        r_local[i] = rhs_local[i] = 1.0
-        x_local[i] = 0.0
-
-    kernelArgs_value = (I, J, val, x,
-                        Ax, p, r, dot_result,
-                        nz, N, tol)
-    kernelArgs_types = (ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
-                        ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_void_p,
-                        ctypes.c_int, ctypes.c_int, ctypes.c_float)
-    kernelArgs = (kernelArgs_value, kernelArgs_types)
-
-    sMemSize = np.dtype(np.float64).itemsize * ((THREADS_PER_BLOCK/32) + 1)
-    numThreads = THREADS_PER_BLOCK
-    numBlocksPerSm = checkCudaErrors(cuda.cuOccupancyMaxActiveBlocksPerMultiprocessor(
-                                        _gpuConjugateGradient, numThreads, sMemSize))
-    numSms = deviceProp.multiProcessorCount
-    dimGrid = cudart.dim3()
-    dimGrid.x = numSms * numBlocksPerSm
-    dimGrid.y = 1
-    dimGrid.z = 1
-    dimBlock = cudart.dim3()
-    dimBlock.x = THREADS_PER_BLOCK
-    dimBlock.y = 1
-    dimBlock.z = 1
-
-    checkCudaErrors(cudart.cudaEventRecord(start, 0))
-    checkCudaErrors(cuda.cuLaunchCooperativeKernel(_gpuConjugateGradient,
-                                                   dimGrid.x, dimGrid.y, dimGrid.z,
-                                                   dimBlock.x, dimBlock.y, dimBlock.z,
-                                                   0, 0,
-                                                   kernelArgs))
-    checkCudaErrors(cudart.cudaEventRecord(stop, 0))
-    checkCudaErrors(cudart.cudaDeviceSynchronize())
-
-    time = checkCudaErrors(cudart.cudaEventElapsedTime(start, stop));
-
-    print("GPU Final, residual = %e, kernel execution time = %f ms" % 
-           (math.sqrt(dot_result_local), time))
-
-    err = 0.0
-    for i in range(N):
-        rsum = 0.0
-
-        for j in range(I_local[i], I_local[i+1]):
-            rsum += val_local[j] * x_local[J_local[j]]
-
-        diff = math.fabs(rsum - rhs_local[i])
-
-        if diff > err:
-            err = diff
-
-    checkCudaErrors(cudart.cudaFree(I))
-    checkCudaErrors(cudart.cudaFree(J))
-    checkCudaErrors(cudart.cudaFree(val))
-    checkCudaErrors(cudart.cudaFree(x))
-    checkCudaErrors(cudart.cudaFree(rhs))
-    checkCudaErrors(cudart.cudaFree(r))
-    checkCudaErrors(cudart.cudaFree(p))
-    checkCudaErrors(cudart.cudaFree(Ax))
-    checkCudaErrors(cudart.cudaFree(dot_result))
-    checkCudaErrors(cudart.cudaEventDestroy(start))
-    checkCudaErrors(cudart.cudaEventDestroy(stop))
-
-    print("Test Summary:  Error amount = %f" % err)
-    print("&&&& conjugateGradientMultiBlockCG %s\n" %
-          ("PASSED" if math.sqrt(dot_result_local) < tol else "FAILED"))
-
-    if math.sqrt(dot_result_local) >= tol:
-        sys.exit(-1)
diff --git a/cuda_bindings/examples/common/common.py b/cuda_bindings/examples/common/common.py
deleted file mode 100644
index c24322f9..00000000
--- a/cuda_bindings/examples/common/common.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import numpy as np
-import os
-from cuda import cuda, cudart, nvrtc
-from common.helper_cuda import checkCudaErrors
-
-class KernelHelper:
-    def __init__(self, code, devID):
-        prog = checkCudaErrors(nvrtc.nvrtcCreateProgram(str.encode(code), b'sourceCode.cu', 0, None, None))
-        CUDA_HOME = os.getenv('CUDA_HOME')
-        if CUDA_HOME == None:
-            CUDA_HOME = os.getenv('CUDA_PATH')
-        if CUDA_HOME == None:
-            raise RuntimeError('Environment variable CUDA_HOME or CUDA_PATH is not set')
-        include_dirs = os.path.join(CUDA_HOME, 'include')
-
-        # Initialize CUDA
-        checkCudaErrors(cudart.cudaFree(0))
-
-        major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID))
-        minor = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID))
-        _, nvrtc_minor = checkCudaErrors(nvrtc.nvrtcVersion())
-        use_cubin = (nvrtc_minor >= 1)
-        prefix = 'sm' if use_cubin else 'compute'
-        arch_arg = bytes(f'--gpu-architecture={prefix}_{major}{minor}', 'ascii')
-
-        try:
-            opts = [b'--fmad=true', arch_arg, '--include-path={}'.format(include_dirs).encode('UTF-8'),
-                    b'--std=c++11', b'-default-device']
-            checkCudaErrors(nvrtc.nvrtcCompileProgram(prog, len(opts), opts))
-        except RuntimeError as err:
-            logSize = checkCudaErrors(nvrtc.nvrtcGetProgramLogSize(prog))
-            log = b' ' * logSize
-            checkCudaErrors(nvrtc.nvrtcGetProgramLog(prog, log))
-            print(log.decode())
-            print(err)
-            exit(-1)
-
-        if use_cubin:
-            dataSize = checkCudaErrors(nvrtc.nvrtcGetCUBINSize(prog))
-            data = b' ' * dataSize
-            checkCudaErrors(nvrtc.nvrtcGetCUBIN(prog, data))
-        else:
-            dataSize = checkCudaErrors(nvrtc.nvrtcGetPTXSize(prog))
-            data = b' ' * dataSize
-            checkCudaErrors(nvrtc.nvrtcGetPTX(prog, data))
-
-        self.module = checkCudaErrors(cuda.cuModuleLoadData(np.char.array(data)))
-
-    def getFunction(self, name):
-        return checkCudaErrors(cuda.cuModuleGetFunction(self.module, name))
diff --git a/cuda_bindings/examples/common/helper_cuda.py b/cuda_bindings/examples/common/helper_cuda.py
deleted file mode 100644
index cbd0d2da..00000000
--- a/cuda_bindings/examples/common/helper_cuda.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from cuda import cuda, cudart, nvrtc
-from common.helper_string import getCmdLineArgumentInt, checkCmdLineFlag
-
-def _cudaGetErrorEnum(error):
-    if isinstance(error, cuda.CUresult):
-        err, name = cuda.cuGetErrorName(error)
-        return name if err == cuda.CUresult.CUDA_SUCCESS else "<unknown>"
-    elif isinstance(error, cudart.cudaError_t):
-        return cudart.cudaGetErrorName(error)[1]
-    elif isinstance(error, nvrtc.nvrtcResult):
-        return nvrtc.nvrtcGetErrorString(error)[1]
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(error))
-
-def checkCudaErrors(result):
-    if result[0].value:
-        raise RuntimeError("CUDA error code={}({})".format(result[0].value, _cudaGetErrorEnum(result[0])))
-    if len(result) == 1:
-        return None
-    elif len(result) == 2:
-        return result[1]
-    else:
-        return result[1:]
-
-def findCudaDevice():
-    devID = 0
-    if checkCmdLineFlag("device="):
-        devID = getCmdLineArgumentInt("device=")
-    checkCudaErrors(cudart.cudaSetDevice(devID))
-    return devID
-
-def findCudaDeviceDRV():
-    devID = 0
-    if checkCmdLineFlag("device="):
-        devID = getCmdLineArgumentInt("device=")
-    checkCudaErrors(cuda.cuInit(0))
-    cuDevice = checkCudaErrors(cuda.cuDeviceGet(devID))
-    return cuDevice
diff --git a/cuda_bindings/examples/common/helper_string.py b/cuda_bindings/examples/common/helper_string.py
deleted file mode 100644
index 1e0d65f1..00000000
--- a/cuda_bindings/examples/common/helper_string.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import sys
-
-def checkCmdLineFlag(stringRef):
-    k = 0
-    for i in sys.argv:
-        if stringRef == i and k < len(sys.argv) - 1:
-           return True
-        k += 1
-    return False
-
-def getCmdLineArgumentInt(stringRef):
-    k = 0
-    for i in sys.argv:
-        if stringRef == i and k < len(sys.argv) - 1:
-           return sys.argv[k+1]
-        k += 1
-    return 0
diff --git a/cuda_bindings/examples/extra/isoFDModelling_test.py b/cuda_bindings/examples/extra/isoFDModelling_test.py
deleted file mode 100644
index dd478182..00000000
--- a/cuda_bindings/examples/extra/isoFDModelling_test.py
+++ /dev/null
@@ -1,664 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import numpy as np
-import time
-from cuda import cuda, cudart
-from common import common
-from common.helper_cuda import checkCudaErrors
-
-isoPropagator = '''\
-extern "C"
-__global__ void injectSource(float *__restrict__ in, float *__restrict__ src, int it)
-{
-    if (threadIdx.x == 0)
-        in[0] = src[it];
-}
-
-extern "C"
-__global__ void createVelocity(float *__restrict__ vel, float vmult,  int nz,  int nx, int stride)
-{
-  int ix = blockIdx.x * blockDim.x + threadIdx.x;
-  int iy = blockIdx.y * blockDim.y + threadIdx.y;
-
-  int idx_out = iy * nx + ix;
-  for (int iz = 0; iz < nz ; iz++) {
-        vel[idx_out] = 3.0f * 3.0f * vmult;
-        idx_out += stride;
-    }
-}
-
-extern "C"
-__global__ void createSource(float *__restrict__ x, float dt, float freq, int nt)
-{
-    int istart = (int) (60.0f/dt); // start max at 30 ms
-    float pi2 = 2.0f * 3.141592654f;
-    float agauss = 0.5f * freq;
-
-    for ( int i=threadIdx.x; i < nt; ++ i) {
-        float arg = 1.0e-3 * fabsf(i - istart) * agauss;
-        x[i] = 1000.0f * expf(-2.0f * arg * arg) * cosf(pi2 * arg);
-    }
-}
-
-extern "C"
-__global__ void fwd_3D_orderX2k(float *g_curr_1, float *g_prev_1, float *g_vsq_1,
-                                int nz,  int dimx, int stride);
-
-#define radius 4
-#define diameter (2*radius+1)
-#define BDIMX 32
-#define BDIMY 16
-
-inline __device__ void advance(float2 *field, const int num_points) {
-    #pragma unroll
-    for (int i = 0; i < num_points; i++)
-        field[i] = field[i + 1];
-}
-
-__global__ void fwd_3D_orderX2k(float *g_curr_1, float *g_prev_1, float *g_vsq_1,
-                                int nz,  int nx, int stride) {
-    stride = stride / 2;
-    nx = nx / 2;
-    const float c_coeff[5]  = {-3.0f * 2.847222222f,
-                                1.600000f,
-                               -0.200000f,
-                                0.025396825f,
-                               -0.001785f};
-
-    float2 *g_prev = (float2 *)g_prev_1;
-    float2 *g_curr = (float2 *)g_curr_1;
-    float2 *g_vsq = (float2 *)g_vsq_1;
-    __shared__ float s_data[BDIMY + 2 * radius][2 * BDIMX + 2 * (radius + (radius % 2))];
-
-    int ix = blockIdx.x * blockDim.x + threadIdx.x;
-    int iy = blockIdx.y * blockDim.y + threadIdx.y;
-
-    int offset = -radius * stride;
-
-    int idx_out = iy * nx + ix;
-    int idx_in = idx_out + offset;
-
-    float2 local_input[diameter], tmp1, tmp2;
-
-    int tx = 2 * threadIdx.x + radius + (radius % 2);
-    int ty = threadIdx.y + radius;
-
-    #pragma unroll
-    for (int i = 1; i < diameter; i++) {
-        local_input[i] = g_curr[idx_in];
-        idx_in += stride;
-    }
-
-    for (int iz = 0; iz < nz ; iz++) {
-        advance(local_input, diameter - 1);
-        local_input[diameter - 1] = g_curr[idx_in];
-
-        // update the data slice in smem
-        s_data[ty][tx] = local_input[radius].x;
-        s_data[ty][tx + 1] = local_input[radius].y;
-
-        // halo above/below
-        if (threadIdx.y < radius) {
-            tmp1 = (g_curr[idx_out - radius * nx]);
-            s_data[threadIdx.y][tx] = tmp1.x;
-            s_data[threadIdx.y][tx + 1] = tmp1.y;
-        }
-
-        if (threadIdx.y >= radius && threadIdx.y < 2 * radius) {
-            tmp1 = (g_curr[idx_out + (BDIMY - radius) * nx]);
-            s_data[threadIdx.y + BDIMY][tx] = tmp1.x;
-            s_data[threadIdx.y + BDIMY][tx + 1] = tmp1.y;
-        }
-
-        // halo left/right
-        if (threadIdx.x < (radius + 1) / 2) {
-            tmp1 = (g_curr[idx_out - (radius + 1) / 2]);
-            s_data[ty][tx - radius - (radius % 2)] = tmp1.x;
-            s_data[ty][tx - radius - (radius % 2) + 1] = tmp1.y;
-
-            tmp2 = (g_curr[idx_out + BDIMX]);
-            s_data[ty][tx + 2 * BDIMX] = tmp2.x;
-            s_data[ty][tx + 2 * BDIMX + 1] = tmp2.y;
-        }
-        __syncthreads();
-
-        // compute the output values
-        float2 temp, div;
-
-        temp.x = 2.f * local_input[radius].x -  g_prev[idx_out].x;
-        temp.y = 2.f * local_input[radius].y -  g_prev[idx_out].y;
-
-        div.x = c_coeff[0] * local_input[radius].x;
-        div.y = c_coeff[0] * local_input[radius].y;
-
-        #pragma unroll
-        for (int d = 1; d <= radius; d++) {
-            div.x += c_coeff[d] * (local_input[radius + d].x + local_input[radius - d].x + s_data[ty - d][tx] +
-                                   s_data[ty + d][tx] + s_data[ty][tx - d] + s_data[ty][tx + d]);
-            div.y += c_coeff[d] * (local_input[radius + d].y + local_input[radius - d].y + s_data[ty - d][tx + 1] +
-                                   s_data[ty + d][tx + 1] + s_data[ty][tx - d + 1] + s_data[ty][tx + d + 1]);
-        }
-
-        g_prev[idx_out].x =  temp.x + div.x * g_vsq[idx_out].x;
-        g_prev[idx_out].y =  temp.y + div.y * g_vsq[idx_out].y;
-
-        __syncthreads();
-
-        idx_out += stride;
-        idx_in += stride;
-    }
-}
-'''
-
-display_graph = False
-verbose_prints = False
-
-def align_nx(nx, blk, nops):
-    n_align = (int)((nx - 1)/blk) + 1
-    n_align *= blk
-    n_align += 2*nops
-    n_align = (int)((n_align - 1) / 64) + 1
-    n_align *= 64
-    return (int)(n_align)
-
-def align_ny(ny, blk, nops):
-    n_align = (int)((ny - 1)/blk) + 1
-    n_align *= blk
-    n_align += 2*nops
-    return (int)(n_align)
-
-#
-# this class contains the input params
-#
-class params ():
-    def __init__(self):
-        self.BDIMX = 32 # tiles x y for fd operators
-        self.BDIMY = 16
-        self.FD_ORDER = 4
-        self.lead = 64 -  self.FD_ORDER
-        self.nx = align_nx(700, 2*self.BDIMX, self.FD_ORDER)
-        self.ny = align_ny(600, self.BDIMY, self.FD_ORDER)
-        self.blkx = (int) ((self.nx - 2*self.FD_ORDER) / (2*self.BDIMX))
-        self.blky = (int) ((self.ny - 2*self.FD_ORDER) / self.BDIMY)
-
-        self.nz = (int)(200)
-        self.delta = 25.0
-        self.dt = 0.3 * 1000.0 * self.delta  / 4500.0
-        self.tmax_propag = 1000.0
-        self.nt = int(self.tmax_propag / self.dt)
-        self.freqMax = 3.5* 1000.0  / (4.0 * self.delta)
-        print("dt= ",self.dt, " delta= ", self.delta, " nt= ", self.nt, " freq max= " , self.freqMax)
-
-#
-# this class contains all the kernels to be used bu propagator
-#
-class cudaKernels():
-    def __init__ (self, cntx):
-        checkCudaErrors(cuda.cuInit(0))
-        checkCudaErrors(cuda.cuCtxSetCurrent(cntx))
-        dev = checkCudaErrors(cuda.cuCtxGetDevice())
-
-        self.kernelHelper = common.KernelHelper(isoPropagator, int(dev))
-
-        # kernel to create a source fnction with some max frequency
-        self.creatSource = self.kernelHelper.getFunction(b'createSource')
-        # create a velocity to try things: just a sphere on the middle 4500 m/s and 2500 m/s all around
-        self.createVelocity = self.kernelHelper.getFunction(b'createVelocity')
-
-        # kernel to propagate the wavefield by 1 step in time
-        self.fdPropag = self.kernelHelper.getFunction(b'fwd_3D_orderX2k')
-
-        # kernel to propagate the wavefield by 1 step in time
-        self.injectSource = self.kernelHelper.getFunction(b'injectSource')
-
-#
-# this class contains: propagator, source creation, velocity creation
-# injection of data and domain exchange
-#
-class propagator:
-    def __init__(self, params, _dev):
-        print("init object for device ", _dev)
-        self.dev = _dev
-
-        checkCudaErrors(cuda.cuInit(0))
-        self.cuDevice = checkCudaErrors(cuda.cuDeviceGet(_dev))
-        self.context = checkCudaErrors(cuda.cuCtxCreate(0, self.cuDevice))
-        self.waveOut = 0
-        self.waveIn = 0
-        self.streamCenter = checkCudaErrors(cuda.cuStreamCreate(0))
-        self.streamHalo = checkCudaErrors(cuda.cuStreamCreate(0))
-        self.params = params
-
-    def __del__(self):
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-        checkCudaErrors(cuda.cuStreamDestroy(self.streamHalo))
-        checkCudaErrors(cuda.cuStreamDestroy(self.streamCenter))
-        if self.waveIn != 0:
-            checkCudaErrors(cuda.cuMemFree(self.waveIn))
-        if self.waveOut != 0:
-            checkCudaErrors(cuda.cuMemFree(self.waveOut))
-        checkCudaErrors(cuda.cuCtxDestroy(self.context))
-
-    #
-    # swap waveIn with waveOut
-    #
-    def swap(self):
-        if verbose_prints:
-            print("swap in out ", int(self.waveIn), " " , int(self.waveOut))
-        i = int(self.waveIn)
-        j = int(self.waveOut)
-        a = i
-        i = j
-        j = a
-        self.waveIn = cuda.CUdeviceptr(i)
-        self.waveOut = cuda.CUdeviceptr(j)
-
-    #
-    # allocate the device memory
-    #
-    def allocate(self):
-        nel = self.params.nx * self.params.ny  *  self.params.nz
-        n = np.array( nel, dtype=np.uint32)
-
-        bufferSize = n * np.dtype(np.float32).itemsize
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-
-        self.velocity = checkCudaErrors(cuda.cuMemAlloc(bufferSize))
-        checkCudaErrors(cuda.cuMemsetD32(self.velocity, 0, n))
-
-        nel += self.params.lead
-        n = np.array(nel, dtype=np.uint32) ## we need to align at the beginning of the tile
-
-        bufferSize = n * np.dtype(np.float32).itemsize
-        self.waveIn = checkCudaErrors(cuda.cuMemAlloc(bufferSize))
-        checkCudaErrors(cuda.cuMemsetD32(self.waveIn, 0, n))
-
-        self.waveOut = checkCudaErrors(cuda.cuMemAlloc(bufferSize))
-        checkCudaErrors(cuda.cuMemsetD32(self.waveOut, 0, n))
-
-        n = np.array(self.params.nt, dtype=np.uint32)
-        bufferSize = n * np.dtype(np.float32).itemsize
-        self.source = checkCudaErrors(cuda.cuMemAlloc(bufferSize))
-        checkCudaErrors(cuda.cuMemsetD32(self.source, 0, n))
-
-    #
-    # create source data
-    #
-    def createSource(self, kernel):
-        print("creating source on device ", self.dev)
-
-        buf = np.array([int(self.source)], dtype=np.uint64)
-        nt = np.array(self.params.nt, dtype=np.uint32)
-        dt = np.array(self.params.dt,  dtype=np.float32)
-        freq = np.array(self.params.freqMax, dtype=np.float32)
-
-        args = [buf, dt, freq, nt]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.creatSource,
-                                1, 1, 1,                        # grid dim
-                                1024, 1, 1,                     # block dim
-                                0, self.streamHalo,             # shared mem and stream
-                                args.ctypes.data, 0))     # arguments
-        checkCudaErrors(cuda.cuStreamSynchronize(self.streamHalo))
-
-    #
-    # inject source function: ony on the domain 0
-    #
-    def injectSource(self, kernel, iter):
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-
-        if self.dev != 0:
-           return
-
-        wavein = np.array([int(self.waveIn)], dtype=np.uint64)
-        src = np.array([int(self.source)], dtype=np.uint64)
-        offset_sourceInject = self.params.lead + (int)(self.params.nz/2) * self.params.nx * self.params.ny + \
-                              (int)(self.params.ny/2)  * self.params.nx +  (int) (self.params.nx/2)
-        offset_sourceInject *= np.dtype(np.float32).itemsize
-
-        np_it = np.array(iter, dtype=np.uint32)
-
-        args = [wavein+offset_sourceInject, src, np_it]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.injectSource,
-                                1, 1, 1,                        # grid dim
-                                1, 1, 1,                        # block dim
-                                0, self.streamHalo,             # shared mem and stream
-                                args.ctypes.data, 0))     # arguments
-
-    #
-    # create velocity
-    #
-    def createVelocity(self, kernel):
-        print("running create velocity on device ", self.dev)
-
-        offset_velocity = self.params.FD_ORDER * self.params.nx * self.params.ny + \
-                          self.params.FD_ORDER * self.params.nx + self.params.FD_ORDER
-        offset_velocity *= np.dtype(np.float32).itemsize
-
-        vel = np.array([int(self.velocity)], dtype=np.uint64)
-        dx_dt2 = (self.params.dt * self.params.dt) / (self.params.delta * self.params.delta)
-
-        stride = self.params.nx * self.params.ny
-        np_dx_dt2 = np.array(dx_dt2, dtype=np.float32)
-        np_nz = np.array((self.params.nz-2*self.params.FD_ORDER), dtype=np.uint32)
-        np_nx = np.array(self.params.nx, dtype=np.uint32)
-        np_stride = np.array(stride, dtype=np.uint32)
-
-        args = [vel+  offset_velocity, np_dx_dt2, np_nz, np_nx, np_stride]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-
-        # do halo up
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.createVelocity,
-                                            self.params.blkx, self.params.blky, 1,     # grid dim
-                                            2*self.params.BDIMX, self.params.BDIMY, 1, # block dim
-                                            0, self.streamHalo,                        # shared mem and stream
-                                            args.ctypes.data, 0))                # arguments
-        checkCudaErrors(cuda.cuStreamSynchronize(self.streamHalo))
-
-    #
-    # execute the center part of propagation
-    #
-    def executeCenter(self,  kernel):
-        if verbose_prints:
-            print("running center on device ", self.dev)
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-        offset_velocity = 2* self.params.FD_ORDER * self.params.nx * self.params.ny + \
-                             self.params.FD_ORDER * self.params.nx + self.params.FD_ORDER
-
-        offset_wave = self.params.lead + offset_velocity
-
-        offset_wave *= np.dtype(np.float32).itemsize
-        offset_velocity *= np.dtype(np.float32).itemsize
-
-        wavein = np.array([int(self.waveIn)], dtype=np.uint64)
-        waveout = np.array([int(self.waveOut)], dtype=np.uint64)
-
-        vel = np.array([int(self.velocity)], dtype=np.uint64)
-        stride = self.params.nx * self.params.ny
-        np_nz = np.array(self.params.nz - 4*self.params.FD_ORDER, dtype=np.uint32)
-        np_nx = np.array(self.params.nx, dtype=np.uint32)
-        np_stride = np.array(stride, dtype=np.uint32)
-
-        args = [wavein+offset_wave, waveout+offset_wave, vel+offset_velocity, np_nz, np_nx, np_stride]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-
-        # do center propagation from 2 * fd_order to nz - 2 * fd_order
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.fdPropag,
-                                self.params.blkx, self.params.blky, 1,   # grid dim
-                                self.params.BDIMX, self.params.BDIMY, 1, # block dim
-                                0, self.streamCenter,                    # shared mem and stream
-                                args.ctypes.data, 0))              # arguments
-
-    #
-    # execute the halo part of propagation
-    #
-    def executeHalo(self, kernel):
-        if verbose_prints:
-            print("running halos on device ", self.dev)
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-
-        offset_velocity = self.params.FD_ORDER * self.params.nx * self.params.ny + \
-                          self.params.FD_ORDER * self.params.nx + self.params.FD_ORDER
-
-        offset_wave = self.params.lead + offset_velocity
-
-        offset_wave *= np.dtype(np.float32).itemsize
-        offset_velocity *= np.dtype(np.float32).itemsize
-
-        wavein = np.array([int(self.waveIn)], dtype=np.uint64)
-        waveout = np.array([int(self.waveOut)], dtype=np.uint64)
-
-        vel = np.array([int(self.velocity)], dtype=np.uint64)
-        stride = self.params.nx * self.params.ny
-        np_nz = np.array(self.params.FD_ORDER, dtype=np.uint32)
-        np_nx = np.array(self.params.nx, dtype=np.uint32)
-        np_stride = np.array(stride, dtype=np.uint32)
-
-        args = [wavein+offset_wave, waveout+offset_wave, vel+offset_velocity, np_nz, np_nx, np_stride]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-
-        # do halo up
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.fdPropag,
-                                self.params.blkx, self.params.blky, 1,   # grid dim
-                                self.params.BDIMX, self.params.BDIMY, 1, # block dim
-                                0, self.streamHalo,                      # shared mem and stream
-                                args.ctypes.data, 0))              # arguments
-
-        # do halo down
-        offset_velocity = (self.params.nz - 2*self.params.FD_ORDER) * self.params.nx * self.params.ny + \
-                           self.params.FD_ORDER * self.params.nx + self.params.FD_ORDER
-        offset_wave = self.params.lead + offset_velocity
-
-        offset_wave *= np.dtype(np.float32).itemsize
-        offset_velocity *= np.dtype(np.float32).itemsize
-
-        args = [wavein+offset_wave, waveout+offset_wave, vel+offset_velocity, np_nz, np_nx, np_stride]
-        args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-        checkCudaErrors(cuda.cuLaunchKernel(kernel.fdPropag,
-                                self.params.blkx, self.params.blky, 1,   # grid dim
-                                self.params.BDIMX, self.params.BDIMY, 1, # block dim
-                                0, self.streamHalo,                      # shared mem and stream
-                                args.ctypes.data, 0))              # arguments
-
-    #
-    # exchange the halos
-    #
-    def exchangeHalo(self, propag):
-        if verbose_prints:
-            print("exchange  halos on device ", self.dev, "with dev ", propag.dev)
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-
-        #
-        # the following variables don't change
-        #
-        nstride  = self.params.nx * self.params.ny
-
-        devS = self.context
-        devD = propag.context
-
-        n_exch = self.params.FD_ORDER * nstride
-        n_exch *= np.dtype(np.float32).itemsize
-
-        if self.dev < propag.dev:
-            # exchange up
-            offsetS = self.params.lead + (self.params.nz - 2*self.params.FD_ORDER) * nstride
-            offsetD = propag.params.lead
-
-            offsetS *= np.dtype(np.float32).itemsize
-            offsetD *= np.dtype(np.float32).itemsize
-
-            waveD = cuda.CUdeviceptr(int(propag.waveOut) + offsetD)
-            waveS = cuda.CUdeviceptr(int(self.waveOut) + offsetS)
-
-            checkCudaErrors(cuda.cuMemcpyPeerAsync(waveD, devD, waveS, devS, n_exch, self.streamHalo))
-        else:
-            # exchange down
-            offsetS = self.params.lead  + self.params.FD_ORDER * nstride
-            offsetD = propag.params.lead  + (propag.params.nz - propag.params.FD_ORDER) * nstride
-
-            offsetS *= np.dtype(np.float32).itemsize
-            offsetD *= np.dtype(np.float32).itemsize
-
-            waveD = cuda.CUdeviceptr(int(propag.waveOut) + offsetD)
-            waveS = cuda.CUdeviceptr(int(self.waveOut) + offsetS)
-
-            checkCudaErrors(cuda.cuMemcpyPeerAsync(waveD, devD, waveS, devS, n_exch, self.streamHalo))
-
-    #
-    # sync stream
-    #
-    def syncStream(self, stream):
-        checkCudaErrors(cuda.cuCtxSetCurrent(self.context))
-        checkCudaErrors(cuda.cuStreamSynchronize(stream))
-
-def main():
-    checkCudaErrors(cuda.cuInit(0))
-
-    # Number of GPUs
-    print("Checking for multiple GPUs...")
-    gpu_n = checkCudaErrors(cuda.cuDeviceGetCount())
-    print("CUDA-capable device count: {}".format(gpu_n))
-
-    if gpu_n < 2:
-        print("Two or more GPUs with Peer-to-Peer access capability are required")
-        return
-
-    prop = [checkCudaErrors(cudart.cudaGetDeviceProperties(i)) for i in range(gpu_n)]
-    # Check possibility for peer access
-    print("\nChecking GPU(s) for support of peer to peer memory access...")
-
-    p2pCapableGPUs = [-1, -1]
-    for i in range(gpu_n):
-        p2pCapableGPUs[0] = i
-        for j in range(gpu_n):
-            if i == j:
-                continue
-            i_access_j = checkCudaErrors(cudart.cudaDeviceCanAccessPeer(i, j))
-            j_access_i = checkCudaErrors(cudart.cudaDeviceCanAccessPeer(j, i))
-            print("> Peer access from {} (GPU{}) -> {} (GPU{}) : {}\n".format(
-                    prop[i].name, i, prop[j].name, j, "Yes" if i_access_j else "No"))
-            print("> Peer access from {} (GPU{}) -> {} (GPU{}) : {}\n".format(
-                    prop[j].name, j, prop[i].name, i, "Yes" if i_access_j else "No"))
-            if i_access_j and j_access_i:
-                p2pCapableGPUs[1] = j
-                break
-        if p2pCapableGPUs[1] != -1:
-            break
-
-    if p2pCapableGPUs[0] == -1 or p2pCapableGPUs[1] == -1:
-        print("Two or more GPUs with Peer-to-Peer access capability are required.")
-        print("Peer to Peer access is not available amongst GPUs in the system, waiving test.")
-        return
-
-    # Use first pair of p2p capable GPUs detected
-    gpuid = [p2pCapableGPUs[0], p2pCapableGPUs[1]]
-
-
-    #
-    # init device
-    #
-    pars = params()
-
-    #
-    # create propagators
-    #
-    propags = []
-    kerns   = []
-
-    #
-    # create kernels and propagators that are going to be used on device
-    #
-    for i in gpuid:
-        p = propagator(pars, i)
-        k = cudaKernels(p.context)
-        propags.append(p)
-        kerns.append(k)
-
-    # allocate resources in device
-    for propag, kern in zip(propags, kerns):
-        propag.allocate()
-        propag.createSource(kern)
-        propag.createVelocity(kern)
-
-    #
-    # loop over time iterations
-    #
-    start = time.time()
-    for it in range(pars.nt):
-
-        for propag in propags:
-            propag.syncStream(propag.streamHalo)
-
-        for propag, kern in zip(propags, kerns):
-            propag.injectSource(kern, it)
-
-        for propag, kern in zip(propags, kerns):
-            propag.executeHalo(kern)
-
-        for propag in propags:
-            propag.syncStream(propag.streamHalo)
-
-        propags[1].exchangeHalo(propags[0])
-
-        propags[0].exchangeHalo(propags[1])
-
-        for propag, kern in zip(propags, kerns):
-            propag.executeCenter(kern)
-
-        for propag in propags:
-            propag.syncStream(propag.streamCenter)
-
-        for propag in propags:
-            propag.swap()
-
-    end = time.time()
-    npoints = (pars.nz - 2 * pars.FD_ORDER) * (pars.blkx * 2 * pars.BDIMX) * (pars.blky * pars.BDIMY)
-
-    nops = 1.0e-9 * pars.nt * npoints / (end - start)
-
-    print("this code generates " , nops , " GPoints/sec / device ")
-
-    #
-    # get the result out of gpu
-    #
-    nz = 2 * (int)(pars.nz - 2 * pars.FD_ORDER)
-    print(" nz= ", nz, " nx= ", pars.nx)
-    hOut = np.zeros((nz, pars.nx), dtype='float32')
-
-    istart = 0
-    for propag in propags:
-        checkCudaErrors(cuda.cuCtxSetCurrent(propag.context))
-        offset = pars.lead + pars.FD_ORDER * pars.nx * pars.ny + \
-                 (int)(pars.ny/2) * pars.nx
-
-        for j in range(pars.nz- 2*pars.FD_ORDER):
-            ptr = cuda.CUdeviceptr(int(propag.waveOut) + offset*4)
-
-            checkCudaErrors(cuda.cuMemcpyDtoH(hOut[istart].ctypes.data, ptr,
-                                              pars.nx * np.dtype(np.float32).itemsize))
-            offset += pars.nx * pars.ny
-            istart += 1
-
-    #
-    #  delete kernels and propagatrs
-    #
-    for propag in propags:
-        del propag
-
-    if display_graph:
-        nrows = nz
-        ncols = pars.nx
-        dbz = hOut
-        dbz = np.reshape(dbz,(nrows, ncols))
-
-        ##
-        ## those are to plot results
-        ##
-        import matplotlib.pyplot as plt
-        import matplotlib.cm as cm
-        fig, ax = plt.subplots()
-        title = "test fd kernels up to " + str(pars.tmax_propag) + " ms "
-        plt.title(title, fontsize=20)
-        im = ax.imshow(dbz, interpolation='bilinear', cmap=plt.get_cmap('Greys'), aspect='auto',
-                       origin='upper',extent=[1, pars.nx, nz, 1],
-                       vmax=abs(dbz).max(), vmin=-abs(dbz).max())
-
-        fig.colorbar(im, ax=ax)
-
-        plt.show()
-
-    print("Done")
-
-if __name__ == "__main__":
-    display_graph = True
-    verbose_prints = True
-    main()
diff --git a/cuda_bindings/examples/extra/jit_program_test.py b/cuda_bindings/examples/extra/jit_program_test.py
deleted file mode 100644
index e55b48ec..00000000
--- a/cuda_bindings/examples/extra/jit_program_test.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import ctypes
-import numpy as np
-from cuda import cuda, nvrtc
-
-def ASSERT_DRV(err):
-    if isinstance(err, cuda.CUresult):
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            raise RuntimeError('Cuda Error: {}'.format(err))
-    elif isinstance(err, nvrtc.nvrtcResult):
-        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError('Nvrtc Error: {}'.format(err))
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(err))
-
-saxpy = '''\
-extern "C" __global__
-void saxpy(float a, float *x, float *y, float *out, size_t n)
-{
-    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
-    if (tid < n) {
-        out[tid] = a * x[tid] + y[tid];
-    }
-}
-'''
-
-def main():
-    # Init
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-
-    # Device
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-
-    # Ctx
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-
-    # Create program
-    err, prog = nvrtc.nvrtcCreateProgram(str.encode(saxpy), b'saxpy.cu', 0, None, None)
-    ASSERT_DRV(err)
-
-    # Get target architecture
-    err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice)
-    ASSERT_DRV(err)
-    err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice)
-    ASSERT_DRV(err)
-    err, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion()
-    ASSERT_DRV(err)
-    use_cubin = (nvrtc_minor >= 1)
-    prefix = 'sm' if use_cubin else 'compute'
-    arch_arg = bytes(f'--gpu-architecture={prefix}_{major}{minor}', 'ascii')
-
-    # Compile program
-    opts = [b'--fmad=false', arch_arg]
-    err, = nvrtc.nvrtcCompileProgram(prog, len(opts), opts)
-    ASSERT_DRV(err)
-
-    # Get log from compilation
-    err, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
-    ASSERT_DRV(err)
-    log = b' ' * logSize
-    err, = nvrtc.nvrtcGetProgramLog(prog, log)
-    ASSERT_DRV(err)
-    print(log.decode())
-
-    # Get data from compilation
-    if use_cubin:
-        err, dataSize = nvrtc.nvrtcGetCUBINSize(prog)
-        ASSERT_DRV(err)
-        data = b' ' * dataSize
-        err, = nvrtc.nvrtcGetCUBIN(prog, data)
-        ASSERT_DRV(err)
-    else:
-        err, dataSize = nvrtc.nvrtcGetPTXSize(prog)
-        ASSERT_DRV(err)
-        data = b' ' * dataSize
-        err, = nvrtc.nvrtcGetPTX(prog, data)
-        ASSERT_DRV(err)
-
-    # Load data as module data and retrieve function
-    data = np.char.array(data)
-    err, module = cuda.cuModuleLoadData(data)
-    ASSERT_DRV(err)
-    err, kernel = cuda.cuModuleGetFunction(module, b'saxpy')
-    ASSERT_DRV(err)
-
-    # Test the kernel
-    NUM_THREADS = 128
-    NUM_BLOCKS = 32
-
-    a = np.float32(2.0)
-    n = np.array(NUM_THREADS * NUM_BLOCKS, dtype=np.uint32)
-    bufferSize = n * a.itemsize
-
-    err, dX = cuda.cuMemAlloc(bufferSize)
-    ASSERT_DRV(err)
-    err, dY = cuda.cuMemAlloc(bufferSize)
-    ASSERT_DRV(err)
-    err, dOut = cuda.cuMemAlloc(bufferSize)
-    ASSERT_DRV(err)
-
-    hX = np.random.rand(n).astype(dtype=np.float32)
-    hY = np.random.rand(n).astype(dtype=np.float32)
-    hOut = np.zeros(n).astype(dtype=np.float32)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuMemcpyHtoDAsync(dX, hX, bufferSize, stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemcpyHtoDAsync(dY, hY, bufferSize, stream)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-
-    # Assert values are different before running kernel
-    hZ = a * hX + hY
-    if np.allclose(hOut, hZ):
-        raise ValueError('Error inside tolerence for host-device vectors')
-
-    arg_values = (a, dX, dY, dOut, n)
-    arg_types = (ctypes.c_float, None, None, None, ctypes.c_size_t)
-    err, = cuda.cuLaunchKernel(kernel,
-                              NUM_BLOCKS, 1, 1,           # grid dim
-                              NUM_THREADS, 1, 1,          # block dim
-                              0, stream,                  # shared mem and stream
-                              (arg_values, arg_types), 0) # arguments
-    ASSERT_DRV(err)
-
-    err, = cuda.cuMemcpyDtoHAsync(hOut, dOut, bufferSize, stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-
-    # Assert values are same after running kernel
-    hZ = a * hX + hY
-    if not np.allclose(hOut, hZ):
-        raise ValueError('Error outside tolerence for host-device vectors')
-
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuMemFree(dX)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(dY)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(dOut)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-if __name__=="__main__":
-    main()
diff --git a/cuda_bindings/examples/extra/numba_emm_plugin.py b/cuda_bindings/examples/extra/numba_emm_plugin.py
deleted file mode 100644
index a80c3bbf..00000000
--- a/cuda_bindings/examples/extra/numba_emm_plugin.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-"""Numba EMM Plugin using the CUDA Python Driver API.
-
-This example provides an External Memory Management (EMM) Plugin for Numba (see
-https://numba.readthedocs.io/en/stable/cuda/external-memory.html) that uses the
-NVIDIA CUDA Python Driver API for all on-device allocations and frees. For
-other operations interacting with the driver, Numba uses its internal ctypes
-wrapper. This serves as an example of interoperability between the NVIDIA CUDA
-Python Driver API, and other implementations of driver API wrappers (in this
-case Numba's ctypes wrapper), and demonstrates an on-ramp to using the NVIDIA
-CUDA Python Driver API wrapper by showing that it can co-exist with other
-wrappers - it is not necessary to replace all wrappers in all libraries to
-start using the NVIDIA wrapper.
-
-The current version of Numba passes all tests using this plugin (with a small
-patch to recognize CUDA 11.3 as a supported version). The Numba test suite can
-be run with the plugin by executing:
-
-    NUMBA_CUDA_MEMORY_MANAGER=numba_emm_plugin \\
-        python -m numba.runtests numba.cuda.tests -vf -m
-
-when the directory containing this example is on the PYTHONPATH. When tests are
-run, the test summary is expected to be close to:
-
-    Ran 1121 tests in 159.572s
-
-    OK (skipped=17, expected failures=1)
-
-The number of tests may vary with changes between commits in Numba, but the
-main result is that there are no unexpected failures.
-
-This example can also be run standalone with:
-
-    python numba_emm_plugin.py
-
-in which case it sets up Numba to use the included EMM plugin, then creates and
-destroys a device array. When run standalone, the output may look like:
-
-    Free before creating device array: 50781159424
-    Free after creating device array: 50779062272
-    Free after freeing device array: 50781159424
-
-The initial value may vary, but the expectation is that 2097152 bytes (2MB)
-should be taken up by the device array creation, and the original value should
-be restored after freeing it.
-"""
-
-from numba import cuda
-from numba.cuda import (HostOnlyCUDAMemoryManager, GetIpcHandleMixin,
-                        MemoryPointer, MemoryInfo)
-
-from cuda import cuda as cuda_driver
-
-from ctypes import c_size_t
-
-
-# Python functions for allocation, deallocation, and memory info via the NVIDIA
-# CUDA Python Driver API
-
-def driver_alloc(size):
-    """
-    Allocate `size` bytes of device memory and return a device pointer to the
-    allocated memory.
-    """
-    err, ptr = cuda_driver.cuMemAlloc(size)
-    if err != cuda_driver.CUresult.CUDA_SUCCESS:
-        raise RuntimeError(f'Unexpected error code {err} from cuMemAlloc')
-    return ptr
-
-
-def driver_free(ptr):
-    """
-    Free device memory pointed to by `ptr`.
-    """
-    err, = cuda_driver.cuMemFree(ptr)
-    if err != cuda_driver.CUresult.CUDA_SUCCESS:
-        raise RuntimeError(f'Unexpected error code {err} from cuMemFree')
-
-
-def driver_memory_info():
-    """
-    Return the free and total amount of device memory in bytes as a tuple.
-    """
-    err, free, total = cuda_driver.cuMemGetInfo()
-    if err != cuda_driver.CUresult.CUDA_SUCCESS:
-        raise RuntimeError(f'Unexpected error code {err} from cuMemGetInfo')
-    return free, total
-
-
-# EMM Plugin implementation. For documentation of the methods implemented here,
-# see:
-#
-#    https://numba.readthedocs.io/en/stable/cuda/external-memory.html#numba.cuda.BaseCUDAMemoryManager
-
-class DriverEMMPlugin(GetIpcHandleMixin, HostOnlyCUDAMemoryManager):
-    def memalloc(self, size):
-        ptr = driver_alloc(size)
-        ctx = self.context
-        finalizer = make_finalizer(ptr)
-        # We wrap the pointer value in a c_size_t because Numba expects ctypes
-        # objects
-        wrapped_ptr = c_size_t(int(ptr))
-        return MemoryPointer(ctx, wrapped_ptr, size, finalizer=finalizer)
-
-    def initialize(self):
-        # No setup required to use the EMM Plugin in a given context
-        pass
-
-    def get_memory_info(self):
-        free, total = driver_memory_info()
-        return MemoryInfo(free=free, total=total)
-
-    @property
-    def interface_version(self):
-        return 1
-
-
-def make_finalizer(ptr):
-    def finalizer():
-        driver_free(ptr)
-
-    return finalizer
-
-
-# If NUMBA_CUDA_MEMORY_MANAGER is set to this module (e.g.
-# `NUMBA_CUDA_MEMORY_MANAGER=numba_emm_plugin`), then Numba will look at the
-# _numba_memory_manager global to determine what class to use for memory
-# management.
-
-_numba_memory_manager = DriverEMMPlugin
-
-
-def main():
-    """
-    A simple test / demonstration setting the memory manager and
-    allocating/deleting an array.
-    """
-
-    cuda.set_memory_manager(DriverEMMPlugin)
-    ctx = cuda.current_context()
-    print(f"Free before creating device array: {ctx.get_memory_info().free}")
-    x = cuda.device_array(1000)
-    print(f"Free after creating device array: {ctx.get_memory_info().free}")
-    del x
-    print(f"Free after freeing device array: {ctx.get_memory_info().free}")
-
-
-if __name__ == '__main__':
-    import argparse
-    formatter = argparse.RawDescriptionHelpFormatter
-    parser = argparse.ArgumentParser(description=__doc__,
-                                     formatter_class=formatter)
-    parser.parse_args()
-    main()
diff --git a/cuda_bindings/examples/pytest.ini b/cuda_bindings/examples/pytest.ini
deleted file mode 100644
index e105585d..00000000
--- a/cuda_bindings/examples/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[pytest]
-python_files = *_test.py
-python_functions = main
-pythonpath = .
diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
deleted file mode 100644
index f4c9c5bc..00000000
--- a/cuda_bindings/pyproject.toml
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2023-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-[build-system]
-requires = ["setuptools", "versioneer[toml]==0.29", "cython", "pyclibrary"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "cuda-python"
-description = "Python bindings for CUDA"
-authors = [{name = "NVIDIA Corporation", email = "cuda-python-conduct@nvidia.com"},]
-license = {file = "LICENSE"}
-classifiers = [
-    "Intended Audience :: Developers",
-    "Topic :: Database",
-    "Topic :: Scientific/Engineering",
-    "License :: Other/Proprietary License",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Environment :: GPU :: NVIDIA CUDA",
-]
-dynamic = ["version"]
-dependencies = [
-  "pywin32; sys_platform == 'win32'",
-]
-
-[project.urls]
-Repository = "https://github.com/NVIDIA/cuda-python"
-Documentation = "https://nvidia.github.io/cuda-python/"
-
-# BETA
-# [tool.setuptools]
-# zip-safe = false
-
-# BETA
-# [tool.setuptools.packages.find]
-# where = ["cuda"]
-# include = ["cuda", "cuda.*"]
-
-# BETA
-# [tool.setuptools.package-data]
-# "*" = ["*.pxd", "*.pyx", "*.h", "*.cpp"]
-
-[tool.versioneer]
-VCS = "git"
-style = "pep440"
-versionfile_source = "cuda/bindings/_version.py"
-versionfile_build = "cuda/bindings/_version.py"
-tag_prefix = "v"
-parentdir_prefix = "cuda-python-"
diff --git a/cuda_bindings/requirements.txt b/cuda_bindings/requirements.txt
deleted file mode 100644
index 0cf074d5..00000000
--- a/cuda_bindings/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Cython>=3.0.0
-pytest>=6.2.4
-pytest-benchmark>=3.4.1
-numpy>=1.21.1
-pyclibrary>=0.1.7
-setuptools
-tomli; python_version < "3.11"
-versioneer==0.29
-wheel
-pywin32; sys_platform == 'win32'
diff --git a/cuda_bindings/setup.py b/cuda_bindings/setup.py
deleted file mode 100644
index fb9d7b95..00000000
--- a/cuda_bindings/setup.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-import glob
-import os
-import platform
-import sys
-import sysconfig
-
-from Cython import Tempita
-from Cython.Build import cythonize
-from pyclibrary import CParser
-from setuptools import find_packages, setup
-from setuptools.extension import Extension
-from setuptools.command.build_ext import build_ext
-import versioneer
-
-
-# ----------------------------------------------------------------------
-# Fetch configuration options
-
-CUDA_HOME = os.environ.get("CUDA_HOME")
-if not CUDA_HOME:
-    CUDA_HOME = os.environ.get("CUDA_PATH")
-if not CUDA_HOME:
-    raise RuntimeError('Environment variable CUDA_HOME or CUDA_PATH is not set')
-
-CUDA_HOME = CUDA_HOME.split(os.pathsep)
-nthreads = int(os.environ.get("PARALLEL_LEVEL", "0") or "0")
-PARSER_CACHING = os.environ.get("CUDA_PYTHON_PARSER_CACHING", False)
-PARSER_CACHING = True if PARSER_CACHING else False
-
-# ----------------------------------------------------------------------
-# Parse user-provided CUDA headers
-
-header_dict = {
-    'driver' : ['cuda.h',
-                'cudaProfiler.h',
-                'cudaEGL.h',
-                'cudaGL.h',
-                'cudaVDPAU.h'],
-    'runtime' : ['driver_types.h',
-                 'vector_types.h',
-                 'cuda_runtime.h',
-                 'surface_types.h',
-                 'texture_types.h',
-                 'library_types.h',
-                 'cuda_runtime_api.h',
-                 'device_types.h',
-                 'driver_functions.h',
-                 'cuda_profiler_api.h',
-                 'cuda_egl_interop.h',
-                 'cuda_gl_interop.h',
-                 'cuda_vdpau_interop.h'],
-    'nvrtc' : ['nvrtc.h']}
-
-replace = {' __device_builtin__ ':' ',
-           'CUDARTAPI ':' ',
-           'typedef __device_builtin__ enum cudaError cudaError_t;' : 'typedef cudaError cudaError_t;',
-           'typedef __device_builtin__ enum cudaOutputMode cudaOutputMode_t;' : 'typedef cudaOutputMode cudaOutputMode_t;',
-           'typedef enum cudaError cudaError_t;' : 'typedef cudaError cudaError_t;',
-           'typedef enum cudaOutputMode cudaOutputMode_t;' : 'typedef cudaOutputMode cudaOutputMode_t;',
-           'typedef enum cudaDataType_t cudaDataType_t;' : '',
-           'typedef enum libraryPropertyType_t libraryPropertyType_t;' : '',
-           '  enum ' : '   ',
-           ', enum ' : ', ',
-           '\\(enum ' : '(',}
-
-found_types = []
-found_structs = {}
-found_unions = {}
-found_functions = []
-found_values = []
-
-include_path_list = [os.path.join(path, 'include') for path in CUDA_HOME]
-print(f'Parsing headers in "{include_path_list}" (Caching {PARSER_CACHING})')
-for library, header_list in header_dict.items():
-    header_paths = []
-    for header in header_list:
-        path_candidate = [os.path.join(path, header) for path in include_path_list]
-        for path in path_candidate:
-            if os.path.exists(path):
-                header_paths += [path]
-                break
-        if not os.path.exists(path):
-            print(f'Missing header {header}')
-
-    print(f'Parsing {library} headers')
-    parser = CParser(header_paths,
-                     cache='./cache_{}'.format(library.split('.')[0]) if PARSER_CACHING else None,
-                     replace=replace)
-
-    if library == 'driver':
-        CUDA_VERSION = parser.defs['macros']['CUDA_VERSION'] if 'CUDA_VERSION' in parser.defs['macros'] else 'Unknown'
-        print(f'Found CUDA_VERSION: {CUDA_VERSION}')
-
-    # Combine types with others since they sometimes get tangled
-    found_types += {key for key in parser.defs['types']}
-    found_types += {key for key in parser.defs['structs']}
-    found_structs.update(parser.defs['structs'])
-    found_types += {key for key in parser.defs['unions']}
-    found_unions.update(parser.defs['unions'])
-    found_types += {key for key in parser.defs['enums']}
-    found_functions += {key for key in parser.defs['functions']}
-    found_values += {key for key in parser.defs['values']}
-
-if len(found_functions) == 0:
-    raise RuntimeError(f'Parser found no functions. Is CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")')
-
-# Unwrap struct and union members
-def unwrapMembers(found_dict):
-    for key in found_dict:
-        members = [var for var, _, _ in found_dict[key]['members']]
-        found_dict[key]['members'] = members
-
-unwrapMembers(found_structs)
-unwrapMembers(found_unions)
-
-# ----------------------------------------------------------------------
-# Generate
-
-def fetch_input_files(path):
-    return [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.in')]
-
-def generate_output(infile, local):
-    assert infile.endswith('.in')
-    outfile = infile[:-3]
-
-    with open(infile) as f:
-        pxdcontent = Tempita.Template(f.read()).substitute(local)
-
-    if os.path.exists(outfile):
-        with open(outfile) as f:
-            if f.read() == pxdcontent:
-                print(f'Skipping {infile} (No change)')
-                return
-    with open(outfile, "w") as f:
-        print(f'Generating {infile}')
-        f.write(pxdcontent)
-
-path_list = [os.path.join('cuda'),
-             os.path.join('cuda', 'bindings'),
-             os.path.join('cuda', 'bindings', '_bindings'),
-             os.path.join('cuda', 'bindings', '_lib'),
-             os.path.join('cuda', 'bindings', '_lib', 'cyruntime')]
-input_files = []
-for path in path_list:
-    input_files += fetch_input_files(path)
-
-for file in input_files:
-    generate_output(file, locals())
-
-# ----------------------------------------------------------------------
-# Prepare compile arguments
-
-# For Cython
-include_dirs = [
-    os.path.dirname(sysconfig.get_path("include")),
-] + include_path_list
-library_dirs = [sysconfig.get_path("platlib"), os.path.join(os.sys.prefix, "lib")]
-
-extra_compile_args = []
-extra_cythonize_kwargs = {}
-if sys.platform != 'win32':
-    extra_compile_args += [
-        '-std=c++14',
-        '-fpermissive',
-        '-Wno-deprecated-declarations',
-        '-D _GLIBCXX_ASSERTIONS',
-        '-fno-var-tracking-assignments'
-    ]
-    if '--debug' in sys.argv:
-        extra_cythonize_kwargs['gdb_debug'] = True
-        extra_compile_args += ['-g', '-O0']
-    else:
-        extra_compile_args += ['-O3']
-
-# For Setup
-extensions = []
-cmdclass = {}
-
-# ----------------------------------------------------------------------
-# Cythonize
-
-def prep_extensions(sources):
-    pattern = sources[0]
-    files = glob.glob(pattern)
-    exts = []
-    for pyx in files:
-        mod_name = pyx.replace(".pyx", "").replace(os.sep, ".").replace("/", ".")
-        exts.append(
-            Extension(
-                mod_name,
-                sources=[pyx, *sources[1:]],
-                include_dirs=include_dirs,
-                library_dirs=library_dirs,
-                runtime_library_dirs=[],
-                libraries=[],
-                language="c++",
-                extra_compile_args=extra_compile_args,
-            )
-        )
-    return exts
-
-
-def do_cythonize(extensions):
-    return cythonize(
-        extensions,
-        nthreads=nthreads,
-        compiler_directives=dict(
-            profile=True, language_level=3, embedsignature=True, binding=True
-        ),
-        **extra_cythonize_kwargs)
-
-
-sources_list = [
-    # private
-    ["cuda/bindings/_bindings/*.pyx", "cuda/bindings/_bindings/loader.cpp"],
-    # utils
-    ["cuda/bindings/_lib/*.pyx", "cuda/bindings/_lib/param_packer.cpp"],
-    ["cuda/bindings/_lib/cyruntime/*.pyx"],
-    # public
-    ["cuda/bindings/*.pyx"],
-    # public (deprecated, to be removed)
-    ["cuda/*.pyx"],
-    # tests
-    ["tests/*.pyx"],
-]
-
-for sources in sources_list:
-    extensions += prep_extensions(sources)
-
-# ---------------------------------------------------------------------
-# Custom build_ext command
-# Files are build in two steps:
-# 1) Cythonized (in the do_cythonize() command)
-# 2) Compiled to .o files as part of build_ext
-# This class is solely for passing the value of nthreads to build_ext
-
-class ParallelBuildExtensions(build_ext):
-    def initialize_options(self):
-        build_ext.initialize_options(self)
-        if nthreads > 0:
-            self.parallel = nthreads
-
-    def finalize_options(self):
-        build_ext.finalize_options(self)
-
-cmdclass = {"build_ext": ParallelBuildExtensions}
-cmdclass = versioneer.get_cmdclass(cmdclass)
-
-# ----------------------------------------------------------------------
-# Setup
-
-setup(
-    version=versioneer.get_version(),
-    ext_modules=do_cythonize(extensions),
-    packages=find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]),
-    package_data=dict.fromkeys(
-        find_packages(include=["cuda.cuda", "cuda.cuda.*", "cuda.cuda.bindings", "cuda.cuda.bindings._bindings", "cuda.cuda.bindings._lib", "cuda.cuda.bindings._lib.cyruntime", "tests"]),
-        ["*.pxd", "*.pyx", "*.py", "*.h", "*.cpp"],
-    ),
-    cmdclass=cmdclass,
-    zip_safe=False,
-)
diff --git a/cuda_bindings/tests/test_ccuda.pyx b/cuda_bindings/tests/test_ccuda.pyx
deleted file mode 100644
index 0d90ba90..00000000
--- a/cuda_bindings/tests/test_ccuda.pyx
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from libc.string cimport (
-    memset,
-    memcmp
-    )
-# TODO: update to new module once the old ones are removed, we use the
-# tests to cover backward compatibility.
-cimport cuda.ccuda as ccuda
-
-def test_ccuda_memcpy():
-    # Init CUDA
-    err = ccuda.cuInit(0)
-    assert(err == 0)
-
-    # Get device
-    cdef ccuda.CUdevice device
-    err = ccuda.cuDeviceGet(&device, 0)
-    assert(err == 0)
-
-    # Construct context
-    cdef ccuda.CUcontext ctx
-    err = ccuda.cuCtxCreate(&ctx, 0, device)
-    assert(err == 0)
-
-    # Allocate dev memory
-    cdef ccuda.CUdeviceptr dptr
-    err = ccuda.cuMemAlloc(&dptr, 1024)
-    assert(err == 0)
-
-    # Set h1 and h2 memory to be different
-    cdef char[1024] hptr1
-    memset(hptr1, 1, 1024)
-    cdef char[1024] hptr2
-    memset(hptr2, 2, 1024)
-    assert(memcmp(hptr1, hptr2, 1024) != 0)
-
-    # h1 to D
-    err = ccuda.cuMemcpyHtoD(dptr, <void*>hptr1, 1024)
-    assert(err == 0)
-
-    # D to h2
-    err = ccuda.cuMemcpyDtoH(<void*>hptr2, dptr, 1024)
-    assert(err == 0)
-
-    # Validate h1 == h2
-    assert(memcmp(hptr1, hptr2, 1024) == 0)
-
-    # Cleanup
-    err = ccuda.cuMemFree(dptr)
-    assert(err == 0)
-    err = ccuda.cuCtxDestroy(ctx)
-    assert(err == 0)
diff --git a/cuda_bindings/tests/test_ccudart.pyx b/cuda_bindings/tests/test_ccudart.pyx
deleted file mode 100644
index b228661c..00000000
--- a/cuda_bindings/tests/test_ccudart.pyx
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from libc.string cimport (
-    memset,
-    memcmp
-    )
-# TODO: update to new module once the old ones are removed, we use the
-# tests to cover backward compatibility.
-cimport cuda.ccudart as ccudart
-
-def test_ccudart_memcpy():
-    # Allocate dev memory
-    cdef void* dptr
-    err = ccudart.cudaMalloc(&dptr, 1024)
-    assert(err == ccudart.cudaSuccess)
-
-    # Set h1 and h2 memory to be different
-    cdef char[1024] hptr1
-    memset(hptr1, 1, 1024)
-    cdef char[1024] hptr2
-    memset(hptr2, 2, 1024)
-    assert(memcmp(hptr1, hptr2, 1024) != 0)
-
-    # h1 to D
-    err = ccudart.cudaMemcpy(dptr, <void*>hptr1, 1024, ccudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assert(err == ccudart.cudaSuccess)
-
-    # D to h2
-    err = ccudart.cudaMemcpy(<void*>hptr2, dptr, 1024, ccudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
-    assert(err == ccudart.cudaSuccess)
-
-    # Validate h1 == h2
-    assert(memcmp(hptr1, hptr2, 1024) == 0)
-
-    # Cleanup
-    err = ccudart.cudaFree(dptr)
-    assert(err == ccudart.cudaSuccess)
-
-from cuda.ccudart cimport dim3
-from cuda.ccudart cimport cudaMemAllocationHandleType
-from cuda.ccudart cimport CUuuid, cudaUUID_t
-
-cdef extern from *:
-    """
-    #include <cuda_runtime_api.h>
-    dim3 copy_and_append_dim3(dim3 copy) {
-        return dim3(copy.x + 1, copy.y + 1, copy.z + 1);
-    }
-    void foo(cudaMemAllocationHandleType x) {
-        return;
-    }
-    int compareUUID(CUuuid cuType, cudaUUID_t cudaType) {
-        return memcmp(&cuType, &cudaType, sizeof(CUuuid));
-    }
-    """
-    void foo(cudaMemAllocationHandleType x)
-    dim3 copy_and_append_dim3(dim3 copy)
-    int compareUUID(CUuuid cuType, cudaUUID_t cudaType)
-
-def test_ccudart_interoperable():
-    # struct
-    cdef dim3 oldDim, newDim
-    oldDim.x = 1
-    oldDim.y = 2
-    oldDim.z = 3
-    newDim = copy_and_append_dim3(oldDim)
-    assert oldDim.x + 1 == newDim.x
-    assert oldDim.y + 1 == newDim.y
-    assert oldDim.z + 1 == newDim.z
-
-    # Enum
-    foo(cudaMemAllocationHandleType.cudaMemHandleTypeNone)
-
-    # typedef struct
-    cdef CUuuid type_one
-    cdef cudaUUID_t type_two
-    memset(type_one.bytes, 1, sizeof(type_one.bytes))
-    memset(type_two.bytes, 1, sizeof(type_one.bytes))
-    assert compareUUID(type_one, type_two) == 0
-    memset(type_two.bytes, 2, sizeof(type_one.bytes))
-    assert compareUUID(type_one, type_two) != 0
diff --git a/cuda_bindings/tests/test_cuda.py b/cuda_bindings/tests/test_cuda.py
deleted file mode 100644
index d55a4209..00000000
--- a/cuda_bindings/tests/test_cuda.py
+++ /dev/null
@@ -1,875 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import platform
-import pytest
-import cuda.cuda as cuda
-import cuda.cudart as cudart
-import numpy as np
-import textwrap
-import shutil
-from sysconfig import get_paths
-
-def driverVersionLessThan(target):
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, version = cuda.cuDriverGetVersion()
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    return version < target
-
-def supportsMemoryPool():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
-    return err == cudart.cudaError_t.cudaSuccess and isSupported
-
-def supportsManagedMemory():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrManagedMemory, 0)
-    return err == cudart.cudaError_t.cudaSuccess and isSupported
-
-def supportsCudaAPI(name):
-    return name in dir(cuda)
-
-def callableBinary(name):
-    return shutil.which(name) != None
-
-def test_cuda_memcpy():
-    # Init CUDA
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Get device
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Construct context
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Allocate dev memory
-    size = int(1024 * np.uint8().itemsize)
-    err, dptr = cuda.cuMemAlloc(size)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Set h1 and h2 memory to be different
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # h1 to D
-    err, = cuda.cuMemcpyHtoD(dptr, h1, size)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # D to h2
-    err, = cuda.cuMemcpyDtoH(h2, dptr, size)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # Cleanup
-    err, = cuda.cuMemFree(dptr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_cuda_array():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # No context created
-    desc = cuda.CUDA_ARRAY_DESCRIPTOR()
-    err, arr = cuda.cuArrayCreate(desc)
-    assert(err == cuda.CUresult.CUDA_ERROR_INVALID_CONTEXT or err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE)
-
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Desciption not filled
-    err, arr = cuda.cuArrayCreate(desc)
-    assert(err == cuda.CUresult.CUDA_ERROR_INVALID_VALUE)
-
-    # Pass
-    desc.Format = cuda.CUarray_format.CU_AD_FORMAT_SIGNED_INT8
-    desc.NumChannels = 1
-    desc.Width = 1
-    err, arr = cuda.cuArrayCreate(desc)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, = cuda.cuArrayDestroy(arr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_cuda_repr_primitive():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(str(device) == '<CUdevice 0>')
-    assert(int(device) == 0)
-
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(str(ctx).startswith('<CUcontext 0x'))
-    assert(int(ctx) > 0)
-    assert(hex(ctx) == hex(int(ctx)))
-
-    # CUdeviceptr
-    err, dptr = cuda.cuMemAlloc(1024 * np.uint8().itemsize)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(str(dptr).startswith('<CUdeviceptr '))
-    assert(int(dptr) > 0)
-    err, = cuda.cuMemFree(dptr)
-    size = 7
-    dptr = cuda.CUdeviceptr(size)
-    assert(str(dptr) == '<CUdeviceptr {}>'.format(size))
-    assert(int(dptr) == size)
-    size = 4294967295
-    dptr = cuda.CUdeviceptr(size)
-    assert(str(dptr) == '<CUdeviceptr {}>'.format(size))
-    assert(int(dptr) == size)
-    size = 18446744073709551615
-    dptr = cuda.CUdeviceptr(size)
-    assert(str(dptr) == '<CUdeviceptr {}>'.format(size))
-    assert(int(dptr) == size)
-
-    # cuuint32_t
-    size = 7
-    int32 = cuda.cuuint32_t(size)
-    assert(str(int32) == '<cuuint32_t {}>'.format(size))
-    assert(int(int32) == size)
-    size = 4294967295
-    int32 = cuda.cuuint32_t(size)
-    assert(str(int32) == '<cuuint32_t {}>'.format(size))
-    assert(int(int32) == size)
-    size = 18446744073709551615
-    try:
-        int32 = cuda.cuuint32_t(size)
-        raise RuntimeError('int32 = cuda.cuuint32_t(18446744073709551615) did not fail')
-    except OverflowError as err:
-        pass
-
-    # cuuint64_t
-    size = 7
-    int64 = cuda.cuuint64_t(size)
-    assert(str(int64) == '<cuuint64_t {}>'.format(size))
-    assert(int(int64) == size)
-    size = 4294967295
-    int64 = cuda.cuuint64_t(size)
-    assert(str(int64) == '<cuuint64_t {}>'.format(size))
-    assert(int(int64) == size)
-    size = 18446744073709551615
-    int64 = cuda.cuuint64_t(size)
-    assert(str(int64) == '<cuuint64_t {}>'.format(size))
-    assert(int(int64) == size)
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_cuda_repr_pointer():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Test 1: Classes representing pointers
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(str(ctx).startswith('<CUcontext 0x'))
-    assert(int(ctx) > 0)
-    assert(hex(ctx) == hex(int(ctx)))
-    randomCtxPointer = 12345
-    randomCtx = cuda.CUcontext(randomCtxPointer)
-    assert(str(randomCtx) == '<CUcontext {}>'.format(hex(randomCtxPointer)))
-    assert(int(randomCtx) == randomCtxPointer)
-    assert(hex(randomCtx) == hex(randomCtxPointer))
-
-    # Test 2: Function pointers
-    func = 12345
-    b2d_cb = cuda.CUoccupancyB2DSize(func)
-    assert(str(b2d_cb) == '<CUoccupancyB2DSize {}>'.format(hex(func)))
-    assert(int(b2d_cb) == func)
-    assert(hex(b2d_cb) == hex(func))
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_cuda_uuid_list_access():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, uuid = cuda.cuDeviceGetUuid(device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(len(uuid.bytes) <= 16)
-
-    jit_option = cuda.CUjit_option
-    options = {
-        jit_option.CU_JIT_INFO_LOG_BUFFER: 1,
-        jit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES: 2,
-        jit_option.CU_JIT_ERROR_LOG_BUFFER: 3,
-        jit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES: 4,
-        jit_option.CU_JIT_LOG_VERBOSE: 5,
-    }
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_cuda_cuModuleLoadDataEx():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, dev = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, dev)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-
-    option_keys = [
-        cuda.CUjit_option.CU_JIT_INFO_LOG_BUFFER,
-        cuda.CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-        cuda.CUjit_option.CU_JIT_ERROR_LOG_BUFFER,
-        cuda.CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
-        cuda.CUjit_option.CU_JIT_LOG_VERBOSE
-    ]
-    err, mod = cuda.cuModuleLoadDataEx(0, 0, option_keys, [])
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-
-def test_cuda_repr():
-    actual = cuda.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS()
-    assert isinstance(actual, cuda.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)
-
-    actual_repr = actual.__repr__()
-    expected_repr = textwrap.dedent("""
-    params :
-    fence :
-        value : 0
-    nvSciSync :
-        fence : 0x0
-        reserved : 0
-    keyedMutex :
-        key : 0
-    reserved : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-flags : 0
-reserved : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-""")
-    assert actual_repr.split() == expected_repr.split()
-
-    actual_repr = cuda.CUDA_KERNEL_NODE_PARAMS_st().__repr__()
-    expected_repr = textwrap.dedent("""
-    func : <CUfunction 0x0>
-gridDimX : 0
-gridDimY : 0
-gridDimZ : 0
-blockDimX : 0
-blockDimY : 0
-blockDimZ : 0
-sharedMemBytes : 0
-kernelParams : 0
-extra : 0
-""")
-    assert actual_repr.split() == expected_repr.split()
-
-
-def test_cuda_struct_list_of_enums():
-    desc = cuda.CUDA_TEXTURE_DESC_st()
-    desc.addressMode = [cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_WRAP,
-                        cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_CLAMP,
-                        cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_MIRROR]
-
-    # # Too many args
-    # desc.addressMode = [cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_WRAP,
-    #                     cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_CLAMP,
-    #                     cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_MIRROR,
-    #                     cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_BORDER]
-
-    # # Too little args
-    # desc.addressMode = [cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_WRAP,
-    #                     cuda.CUaddress_mode.CU_TR_ADDRESS_MODE_CLAMP]
-
-def test_cuda_CUstreamBatchMemOpParams():
-    params = cuda.CUstreamBatchMemOpParams()
-    params.operation = cuda.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_32
-    params.waitValue.operation = cuda.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_32
-    params.writeValue.operation = cuda.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_32
-    params.flushRemoteWrites.operation = cuda.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_32
-    params.waitValue.value64 = 666
-    assert(int(params.waitValue.value64) == 666)
-
-@pytest.mark.skipif(driverVersionLessThan(11030) or not supportsMemoryPool(), reason='When new attributes were introduced')
-def test_cuda_memPool_attr():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    poolProps = cuda.CUmemPoolProps()
-    poolProps.allocType = cuda.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
-    poolProps.location.id = 0
-    poolProps.location.type = cuda.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-
-    attr_list = [None] * 8
-    err, pool = cuda.cuMemPoolCreate(poolProps)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    for idx, attr in enumerate([cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_HIGH]):
-        err, attr_tmp = cuda.cuMemPoolGetAttribute(pool, attr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        attr_list[idx] = attr_tmp
-
-    for idxA, attr in enumerate([cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,
-                                 cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
-                                 cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES]):
-        err, = cuda.cuMemPoolSetAttribute(pool, attr, 0)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-    for idx, attr in enumerate([cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD]):
-        err, = cuda.cuMemPoolSetAttribute(pool, attr, cuda.cuuint64_t(9))
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    for idx, attr in enumerate([cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
-                                cuda.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD]):
-        err, attr_tmp = cuda.cuMemPoolGetAttribute(pool, attr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        attr_list[idx] = attr_tmp
-    assert(attr_list[0] == 0)
-    assert(attr_list[1] == 0)
-    assert(attr_list[2] == 0)
-    assert(int(attr_list[3]) == 9)
-
-    err, = cuda.cuMemPoolDestroy(pool)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(driverVersionLessThan(11030) or not supportsManagedMemory(), reason='When new attributes were introduced')
-def test_cuda_pointer_attr():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ptr = cuda.cuMemAllocManaged(0x1000, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Individual version
-    attr_type_list = [cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_CONTEXT,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_POINTER,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_HOST_POINTER,
-                      # cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_P2P_TOKENS, # TODO: Can I somehow test this?
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_BUFFER_ID,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_SIZE,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MAPPED,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS,
-                      cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE]
-    attr_value_list = [None] * len(attr_type_list)
-    for idx, attr in enumerate(attr_type_list):
-        err, attr_tmp = cuda.cuPointerGetAttribute(attr, ptr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        attr_value_list[idx] = attr_tmp
-
-    # List version
-    err, attr_value_list_v2 = cuda.cuPointerGetAttributes(len(attr_type_list), attr_type_list, ptr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
-        assert(str(attr1) == str(attr2))
-
-    # Test setting values
-    for val in (True, False):
-        err, = cuda.cuPointerSetAttribute(val, cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, ptr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        err, attr_tmp = cuda.cuPointerGetAttribute(cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, ptr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        assert(attr_tmp == val)
-
-    err, = cuda.cuMemFree(ptr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(not supportsManagedMemory(), reason='When new attributes were introduced')
-def test_cuda_mem_range_attr():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    size = int(0x1000)
-    err, ptr = cuda.cuMemAllocManaged(size, cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL.value)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuMemAdvise(ptr, size, cuda.CUmem_advise.CU_MEM_ADVISE_SET_READ_MOSTLY, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuMemAdvise(ptr, size, cuda.CUmem_advise.CU_MEM_ADVISE_SET_PREFERRED_LOCATION, cuda.CU_DEVICE_CPU)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuMemAdvise(ptr, size, cuda.CUmem_advise.CU_MEM_ADVISE_SET_ACCESSED_BY, cuda.CU_DEVICE_CPU)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, concurrentSupported = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    if concurrentSupported:
-        err, = cuda.cuMemAdvise(ptr, size, cuda.CUmem_advise.CU_MEM_ADVISE_SET_ACCESSED_BY, device)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        expected_values_list = ([1, -1, [0, -1, -2], -2],)
-    else:
-        expected_values_list = ([1, -1, [-1, -2, -2], -2], [0, -2, [-2, -2, -2], -2])
-
-    # Individual version
-    attr_type_list = [cuda.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY,
-                      cuda.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION,
-                      cuda.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY,
-                      cuda.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION]
-    attr_type_size_list = [4, 4, 12, 4]
-    attr_value_list = [None] * len(attr_type_list)
-    for idx in range(len(attr_type_list)):
-        err, attr_tmp = cuda.cuMemRangeGetAttribute(attr_type_size_list[idx], attr_type_list[idx], ptr, size)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        attr_value_list[idx] = attr_tmp
-
-    matched = False
-    for expected_values in expected_values_list:
-        if expected_values == attr_value_list:
-            matched = True
-            break
-    if not matched:
-        raise RuntimeError(f'attr_value_list {attr_value_list} did not match any {expected_values_list}')
-
-    # List version
-    err, attr_value_list_v2 = cuda.cuMemRangeGetAttributes(attr_type_size_list, attr_type_list, len(attr_type_list), ptr, size)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    for attr1, attr2 in zip(attr_value_list, attr_value_list_v2):
-        assert(str(attr1) == str(attr2))
-
-    err, = cuda.cuMemFree(ptr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(driverVersionLessThan(11040) or not supportsMemoryPool(), reason='Mempool for graphs not supported')
-def test_cuda_graphMem_attr():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, stream = cuda.cuStreamCreate(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, graph = cuda.cuGraphCreate(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    allocSize = 1
-
-    params = cuda.CUDA_MEM_ALLOC_NODE_PARAMS()
-    params.poolProps.location.type = cuda.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-    params.poolProps.location.id = device
-    params.poolProps.allocType = cuda.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
-    params.bytesize = allocSize
-
-    err, allocNode = cuda.cuGraphAddMemAllocNode(graph, None, 0, params)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, freeNode = cuda.cuGraphAddMemFreeNode(graph, [allocNode], 1, params.dptr)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, graphExec = cuda.cuGraphInstantiate(graph, 0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, = cuda.cuGraphLaunch(graphExec, stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, used = cuda.cuDeviceGetGraphMemAttribute(device, cuda.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, usedHigh = cuda.cuDeviceGetGraphMemAttribute(device, cuda.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, reserved = cuda.cuDeviceGetGraphMemAttribute(device, cuda.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, reservedHigh = cuda.cuDeviceGetGraphMemAttribute(device, cuda.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    assert int(used) >= allocSize
-    assert int(usedHigh) == int(used)
-    assert int(reserved) == int(usedHigh)
-    assert int(reservedHigh) == int(reserved)
-
-    err, = cuda.cuGraphDestroy(graph)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuStreamDestroy(stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(driverVersionLessThan(12010)
-                    or not supportsCudaAPI('cuCoredumpSetAttributeGlobal')
-                    or not supportsCudaAPI('cuCoredumpGetAttributeGlobal'), reason='Coredump API not present')
-def test_cuda_coredump_attr():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    attr_list = [None] * 6
-
-    err, = cuda.cuCoredumpSetAttributeGlobal(cuda.CUcoredumpSettings.CU_COREDUMP_TRIGGER_HOST, False)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCoredumpSetAttributeGlobal(cuda.CUcoredumpSettings.CU_COREDUMP_FILE, b'corefile')
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCoredumpSetAttributeGlobal(cuda.CUcoredumpSettings.CU_COREDUMP_PIPE, b'corepipe')
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCoredumpSetAttributeGlobal(cuda.CUcoredumpSettings.CU_COREDUMP_LIGHTWEIGHT, True)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    for idx, attr in enumerate([cuda.CUcoredumpSettings.CU_COREDUMP_TRIGGER_HOST,
-                                cuda.CUcoredumpSettings.CU_COREDUMP_FILE,
-                                cuda.CUcoredumpSettings.CU_COREDUMP_PIPE,
-                                cuda.CUcoredumpSettings.CU_COREDUMP_LIGHTWEIGHT,
-                                ]):
-        err, attr_tmp = cuda.cuCoredumpGetAttributeGlobal(attr)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        attr_list[idx] = attr_tmp
-
-    assert(attr_list[0] == False)
-    assert(attr_list[1] == b'corefile')
-    assert(attr_list[2] == b'corepipe')
-    assert(attr_list[3] == True)
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_get_error_name_and_string():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, device = cuda.cuDeviceGet(0)
-    _, s = cuda.cuGetErrorString(err)
-    assert s == b"no error"
-    _, s = cuda.cuGetErrorName(err)
-    assert s == b"CUDA_SUCCESS"
-
-    err, device = cuda.cuDeviceGet(-1)
-    _, s = cuda.cuGetErrorString(err)
-    assert s == b"invalid device ordinal"
-    _, s = cuda.cuGetErrorName(err)
-    assert s == b"CUDA_ERROR_INVALID_DEVICE"
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(not callableBinary('nvidia-smi'), reason='Binary existance needed')
-def test_device_get_name():
-    import subprocess
-
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    p = subprocess.run(
-        ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
-        stdout=subprocess.PIPE, stderr=subprocess.PIPE
-    )
-
-    delimiter = b'\r\n' if platform.system() == "Windows" else b'\n'
-    expect = p.stdout.split(delimiter)
-    size = 64
-    _, got = cuda.cuDeviceGetName(size, device)
-    got = got.split(b'\x00')[0]
-    if any(b'Unable to determine the device handle for' in result for result in expect):
-        # Undeterministic devices get waived
-        pass
-    else:
-        assert any(got in result for result in expect)
-
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-# TODO: cuStreamGetCaptureInfo_v2
-@pytest.mark.skipif(driverVersionLessThan(11030), reason='Driver too old for cuStreamGetCaptureInfo_v2')
-def test_stream_capture():
-    pass
-
-def test_profiler():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuProfilerStart()
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuProfilerStop()
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-def test_eglFrame():
-    val = cuda.CUeglFrame()
-    # [<CUarray 0x0>, <CUarray 0x0>, <CUarray 0x0>]
-    assert(int(val.frame.pArray[0]) == 0)
-    assert(int(val.frame.pArray[1]) == 0)
-    assert(int(val.frame.pArray[2]) == 0)
-    val.frame.pArray = [1,2,3]
-    # [<CUarray 0x1>, <CUarray 0x2>, <CUarray 0x3>]
-    assert(int(val.frame.pArray[0]) == 1)
-    assert(int(val.frame.pArray[1]) == 2)
-    assert(int(val.frame.pArray[2]) == 3)
-    val.frame.pArray = [cuda.CUarray(4),2,3]
-    # [<CUarray 0x4>, <CUarray 0x2>, <CUarray 0x3>]
-    assert(int(val.frame.pArray[0]) == 4)
-    assert(int(val.frame.pArray[1]) == 2)
-    assert(int(val.frame.pArray[2]) == 3)
-    val.frame.pPitch = [4, 2, 3]
-    # [4, 2, 3]
-    assert(int(val.frame.pPitch[0]) == 4)
-    assert(int(val.frame.pPitch[1]) == 2)
-    assert(int(val.frame.pPitch[2]) == 3)
-    val.frame.pPitch = [1,2,3]
-    assert(int(val.frame.pPitch[0]) == 1)
-    assert(int(val.frame.pPitch[1]) == 2)
-    assert(int(val.frame.pPitch[2]) == 3)
-
-def test_char_range():
-    val = cuda.CUipcMemHandle_st()
-    for x in range(-128, 0):
-        val.reserved = [x] * 64
-        assert(val.reserved[0] == 256 + x)
-    for x in range(0, 256):
-        val.reserved = [x] * 64
-        assert(val.reserved[0] == x)
-
-def test_anon_assign():
-    val1 = cuda.CUexecAffinityParam_st()
-    val2 = cuda.CUexecAffinityParam_st()
-
-    assert(val1.param.smCount.val == 0)
-    val1.param.smCount.val = 5
-    assert(val1.param.smCount.val == 5)
-    val2.param.smCount.val = 11
-    assert(val2.param.smCount.val == 11)
-
-    val1.param = val2.param
-    assert(val1.param.smCount.val == 11)
-
-def test_union_assign():
-    val = cuda.CUlaunchAttributeValue()
-    val.clusterDim.x, val.clusterDim.y, val.clusterDim.z = 9,9,9
-    attr = cuda.CUlaunchAttribute()
-    attr.value = val
-
-    assert(val.clusterDim.x == 9)
-    assert(val.clusterDim.y == 9)
-    assert(val.clusterDim.z == 9)
-
-def test_invalid_repr_attribute():
-    val = cuda.CUlaunchAttributeValue()
-    string = str(val)
-
-@pytest.mark.skipif(driverVersionLessThan(12020)
-                    or not supportsCudaAPI('cuGraphAddNode')
-                    or not supportsCudaAPI('cuGraphNodeSetParams')
-                    or not supportsCudaAPI('cuGraphExecNodeSetParams'), reason='Polymorphic graph APIs required')
-def test_graph_poly():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, stream = cuda.cuStreamCreate(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # cuGraphAddNode
-
-    # Create 2 buffers
-    size = int(1024 * np.uint8().itemsize)
-    buffers = []
-    for _ in range(2):
-        err, dptr = cuda.cuMemAlloc(size)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-        buffers += [(np.full(size, 2).astype(np.uint8), dptr)]
-
-    # Update dev buffers
-    for host, device in buffers:
-        err, = cuda.cuMemcpyHtoD(device, host, size)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Create graph
-    nodes = []
-    err, graph = cuda.cuGraphCreate(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Memset
-    host, device = buffers[0]
-    memsetParams = cuda.CUgraphNodeParams()
-    memsetParams.type = cuda.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEMSET
-    memsetParams.memset.elementSize = np.uint8().itemsize
-    memsetParams.memset.width = size
-    memsetParams.memset.height = 1
-    memsetParams.memset.dst = device
-    memsetParams.memset.value = 1
-    err, node = cuda.cuGraphAddNode(graph, None, 0, memsetParams)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    nodes += [node]
-
-    # Memcpy
-    host, device = buffers[1]
-    memcpyParams = cuda.CUgraphNodeParams()
-    memcpyParams.type = cuda.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEMCPY
-    memcpyParams.memcpy.copyParams.srcMemoryType = cuda.CUmemorytype.CU_MEMORYTYPE_DEVICE
-    memcpyParams.memcpy.copyParams.srcDevice = device
-    memcpyParams.memcpy.copyParams.dstMemoryType = cuda.CUmemorytype.CU_MEMORYTYPE_HOST
-    memcpyParams.memcpy.copyParams.dstHost = host
-    memcpyParams.memcpy.copyParams.WidthInBytes = size
-    memcpyParams.memcpy.copyParams.Height = 1
-    memcpyParams.memcpy.copyParams.Depth = 1
-    err, node = cuda.cuGraphAddNode(graph, None, 0, memcpyParams)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    nodes += [node]
-
-    # Instantiate, execute, validate
-    err, graphExec = cuda.cuGraphInstantiate(graph, 0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuGraphLaunch(graphExec, stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuStreamSynchronize(stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Validate
-    for host, device in buffers:
-        err, = cuda.cuMemcpyDtoH(host, device, size)
-        assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(np.array_equal(buffers[0][0], np.full(size, 1).astype(np.uint8)))
-    assert(np.array_equal(buffers[1][0], np.full(size, 2).astype(np.uint8)))
-
-    # cuGraphNodeSetParams
-    host, device = buffers[1]
-    err, memcpyParamsCopy = cuda.cuGraphMemcpyNodeGetParams(nodes[1])
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(int(memcpyParamsCopy.srcDevice) == int(device))
-    host, device = buffers[0]
-    memcpyParams.memcpy.copyParams.srcDevice = device
-    err, = cuda.cuGraphNodeSetParams(nodes[1], memcpyParams)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, memcpyParamsCopy = cuda.cuGraphMemcpyNodeGetParams(nodes[1])
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(int(memcpyParamsCopy.srcDevice) == int(device))
-
-    # cuGraphExecNodeSetParams
-    memsetParams.memset.value = 11
-    err, = cuda.cuGraphExecNodeSetParams(graphExec, nodes[0], memsetParams)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuGraphLaunch(graphExec, stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuStreamSynchronize(stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuMemcpyDtoH(buffers[0][0], buffers[0][1], size)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(np.array_equal(buffers[0][0], np.full(size, 11).astype(np.uint8)))
-
-    # Cleanup
-    err, = cuda.cuMemFree(buffers[0][1])
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuMemFree(buffers[1][1])
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuGraphExecDestroy(graphExec)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuGraphDestroy(graph)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuStreamDestroy(stream)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(driverVersionLessThan(12040)
-                    or not supportsCudaAPI('cuDeviceGetDevResource'), reason='Polymorphic graph APIs required')
-def test_cuDeviceGetDevResource():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, resource_in = cuda.cuDeviceGetDevResource(device, cuda.CUdevResourceType.CU_DEV_RESOURCE_TYPE_SM)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, res, count, rem = cuda.cuDevSmResourceSplitByCount(0, resource_in, 0, 2)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(count != 0)
-    assert(len(res) == 0)
-    err, res, count_same, rem = cuda.cuDevSmResourceSplitByCount(count, resource_in, 0, 2)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(count == count_same)
-    assert(len(res) == count)
-    err, res, count, rem = cuda.cuDevSmResourceSplitByCount(3, resource_in, 0, 2)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    assert(len(res) == 3)
-
-    err, = cuda.cuCtxDestroy(ctx)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(driverVersionLessThan(12030)
-                    or not supportsCudaAPI('cuGraphConditionalHandleCreate'), reason='Conditional graph APIs required')
-def test_conditional():
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    err, graph = cuda.cuGraphCreate(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-    err, handle = cuda.cuGraphConditionalHandleCreate(graph, ctx, 0, 0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    params = cuda.CUgraphNodeParams()
-    params.type = cuda.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
-    params.conditional.handle = handle
-    params.conditional.type = cuda.CUgraphConditionalNodeType.CU_GRAPH_COND_TYPE_IF
-    params.conditional.size = 1
-    params.conditional.ctx = ctx
-
-    assert(len(params.conditional.phGraph_out) == 1)
-    assert(int(params.conditional.phGraph_out[0]) == 0)
-    err, node = cuda.cuGraphAddNode(graph, None, 0, params)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    assert(len(params.conditional.phGraph_out) == 1)
-    assert(int(params.conditional.phGraph_out[0]) != 0)
diff --git a/cuda_bindings/tests/test_cudart.py b/cuda_bindings/tests/test_cudart.py
deleted file mode 100644
index 0e2c0af0..00000000
--- a/cuda_bindings/tests/test_cudart.py
+++ /dev/null
@@ -1,1302 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-from _pytest.mark.structures import store_mark
-import ctypes
-import cuda.cuda as cuda
-import cuda.cudart as cudart
-import math
-import numpy as np
-import pytest
-
-def isSuccess(err):
-    return err == cudart.cudaError_t.cudaSuccess
-
-def assertSuccess(err):
-    assert(isSuccess(err))
-
-def driverVersionLessThan(target):
-    err, version = cudart.cudaDriverGetVersion()
-    assertSuccess(err)
-    return version < target
-
-def supportsMemoryPool():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
-    return isSuccess(err) and isSupported
-
-def supportsSparseTexturesDeviceFilter():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported, 0)
-    return isSuccess(err) and isSupported
-
-def supportsCudaAPI(name):
-    return name in dir(cuda) or dir(cudart)
-
-def test_cudart_memcpy():
-    # Allocate dev memory
-    size = 1024 * np.uint8().itemsize
-    err, dptr = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # Set h1 and h2 memory to be different
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # h1 to D
-    err, = cudart.cudaMemcpy(dptr, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assertSuccess(err)
-
-    # D to h2
-    err, = cudart.cudaMemcpy(h2, dptr, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
-    assertSuccess(err)
-
-    # Validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # Cleanup
-    err, = cudart.cudaFree(dptr)
-    assertSuccess(err)
-
-def test_cudart_hostRegister():
-    # Use hostRegister API to check for correct enum return values
-    page_size = 80
-    addr_host = np.full(page_size * 3, 1).astype(np.uint8)
-    addr = addr_host.ctypes.data
-
-    size_0 = ((16 * page_size) / 8)
-    addr_0 = addr + int(((0 * page_size) / 8))
-    size_1 = ((16 * page_size) / 8)
-    addr_1 = addr + int(((8 * page_size) / 8))
-
-    err, = cudart.cudaHostRegister(addr_0, size_0, 3)
-    assertSuccess(err)
-    err, = cudart.cudaHostRegister(addr_1, size_1, 3)
-    assert(err == cudart.cudaError_t.cudaErrorHostMemoryAlreadyRegistered)
-
-    err, = cudart.cudaHostUnregister(addr_1)
-    assert(err == cudart.cudaError_t.cudaErrorInvalidValue)
-    err, = cudart.cudaHostUnregister(addr_0)
-    assertSuccess(err)
-
-def test_cudart_class_reference():
-    offset = 1
-    width = 4
-    height = 5
-    depth = 6
-    flags = 0
-    numMipLevels = 1
-
-    extent = cudart.cudaExtent()
-    formatDesc = cudart.cudaChannelFormatDesc()
-    externalMemoryMipmappedArrayDesc = cudart.cudaExternalMemoryMipmappedArrayDesc()
-
-    # Get/set class attributes
-    extent.width  = width
-    extent.height = height
-    extent.depth  = depth
-
-    formatDesc.x = 8
-    formatDesc.y = 0
-    formatDesc.z = 0
-    formatDesc.w = 0
-    formatDesc.f = cudart.cudaChannelFormatKind.cudaChannelFormatKindSigned
-
-    externalMemoryMipmappedArrayDesc.offset     = offset
-    externalMemoryMipmappedArrayDesc.formatDesc = formatDesc
-    externalMemoryMipmappedArrayDesc.extent     = extent
-    externalMemoryMipmappedArrayDesc.flags      = flags
-    externalMemoryMipmappedArrayDesc.numLevels  = numMipLevels
-
-    # Can manipulate child structure values directly
-    externalMemoryMipmappedArrayDesc.extent.width  = width+1
-    externalMemoryMipmappedArrayDesc.extent.height = height+1
-    externalMemoryMipmappedArrayDesc.extent.depth  = depth+1
-    assert(externalMemoryMipmappedArrayDesc.extent.width == width+1)
-    assert(externalMemoryMipmappedArrayDesc.extent.height == height+1)
-    assert(externalMemoryMipmappedArrayDesc.extent.depth == depth+1)
-
-    externalMemoryMipmappedArrayDesc.formatDesc.x = 20
-    externalMemoryMipmappedArrayDesc.formatDesc.y = 21
-    externalMemoryMipmappedArrayDesc.formatDesc.z = 22
-    externalMemoryMipmappedArrayDesc.formatDesc.w = 23
-    externalMemoryMipmappedArrayDesc.formatDesc.f = cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.x == 20)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.y == 21)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.z == 22)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.w == 23)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.f == cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat)
-
-    # Can copy classes over
-    externalMemoryMipmappedArrayDesc.extent = extent
-    assert(externalMemoryMipmappedArrayDesc.extent.width == width)
-    assert(externalMemoryMipmappedArrayDesc.extent.height == height)
-    assert(externalMemoryMipmappedArrayDesc.extent.depth == depth)
-
-    externalMemoryMipmappedArrayDesc.formatDesc = formatDesc
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.x == 8)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.y == 0)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.z == 0)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.w == 0)
-    assert(externalMemoryMipmappedArrayDesc.formatDesc.f == cudart.cudaChannelFormatKind.cudaChannelFormatKindSigned)
-
-@pytest.mark.skipif(not supportsSparseTexturesDeviceFilter(), reason='Sparse Texture Device Filter')
-def test_cudart_class_inline():
-    extent = cudart.cudaExtent()
-    extent.width  = 1000
-    extent.height = 500
-    extent.depth  = 0
-
-    desc = cudart.cudaChannelFormatDesc()
-    desc.x = 32
-    desc.y = 32
-    desc.z = 32
-    desc.w = 32
-    desc.f = cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat
-
-    numChannels = 4
-    numBytesPerChannel = desc.x/8
-    numBytesPerTexel = numChannels * numBytesPerChannel
-
-    flags = cudart.cudaArraySparse
-    maxDim = max(extent.width, extent.height)
-    numLevels = int(float(1.0) + math.log(maxDim, 2))
-
-    err, mipmap = cudart.cudaMallocMipmappedArray(desc, extent, numLevels, flags)
-    assertSuccess(err)
-
-    err, sparseProp = cudart.cudaMipmappedArrayGetSparseProperties(mipmap)
-    assertSuccess(err)
-
-    # tileExtent
-    # TODO: Will these values always be this same? Maybe need a more stable test?
-    # TODO: Are these values even correct? Need to research the function some more.. Maybe need an easier API test
-    assert(sparseProp.tileExtent.width == 64)
-    assert(sparseProp.tileExtent.height == 64)
-    assert(sparseProp.tileExtent.depth == 1)
-
-    sparsePropNew = cudart.cudaArraySparseProperties()
-    sparsePropNew.tileExtent.width = 15
-    sparsePropNew.tileExtent.height = 16
-    sparsePropNew.tileExtent.depth = 17
-
-    # Check that we can copy inner structs
-    sparseProp.tileExtent = sparsePropNew.tileExtent
-    assert(sparseProp.tileExtent.width == 15)
-    assert(sparseProp.tileExtent.height == 16)
-    assert(sparseProp.tileExtent.depth == 17)
-
-    assert(sparseProp.miptailFirstLevel == 3)
-    assert(sparseProp.miptailSize == 196608)
-    assert(sparseProp.flags == 0)
-
-    err, = cudart.cudaFreeMipmappedArray(mipmap)
-    assertSuccess(err)
-
-    # TODO
-    example = cudart.cudaExternalSemaphoreSignalNodeParams()
-    example.extSemArray = [cudart.cudaExternalSemaphore_t(0), cudart.cudaExternalSemaphore_t(123), cudart.cudaExternalSemaphore_t(999)]
-    a1 = cudart.cudaExternalSemaphoreSignalParams()
-    a1.params.fence.value = 7
-    a1.params.nvSciSync.fence = 999
-    a1.params.keyedMutex.key = 9
-    a1.flags = 1
-    a2 = cudart.cudaExternalSemaphoreSignalParams()
-    a2.params.fence.value = 7
-    a2.params.nvSciSync.fence = 999
-    a2.params.keyedMutex.key = 9
-    a2.flags = 2
-    a3 = cudart.cudaExternalSemaphoreSignalParams()
-    a3.params.fence.value = 7
-    a3.params.nvSciSync.fence = 999
-    a3.params.keyedMutex.key = 9
-    a3.flags = 3
-    example.paramsArray = [a1]
-    # Note: Setting is a pass by value. Changing the object does not reflect internal value
-    a3.params.fence.value = 4
-    a3.params.nvSciSync.fence = 4
-    a3.params.keyedMutex.key = 4
-    a3.flags = 4
-    example.numExtSems = 3
-
-def test_cudart_graphs():
-    err, graph = cudart.cudaGraphCreate(0)
-    assertSuccess(err)
-
-    err, pGraphNode0 = cudart.cudaGraphAddEmptyNode(graph, None, 0)
-    assertSuccess(err)
-    err, pGraphNode1 = cudart.cudaGraphAddEmptyNode(graph, [pGraphNode0], 1)
-    assertSuccess(err)
-    err, pGraphNode2 = cudart.cudaGraphAddEmptyNode(graph, [pGraphNode0, pGraphNode1], 2)
-    assertSuccess(err)
-
-    err, nodes, numNodes = cudart.cudaGraphGetNodes(graph)
-    err, nodes, numNodes = cudart.cudaGraphGetNodes(graph, numNodes)
-
-    stream_legacy = cudart.cudaStream_t(cudart.cudaStreamLegacy)
-    stream_per_thread = cudart.cudaStream_t(cudart.cudaStreamPerThread)
-    err, stream_with_flags = cudart.cudaStreamCreateWithFlags(cudart.cudaStreamNonBlocking)
-    assertSuccess(err)
-
-def test_cudart_list_access():
-    err, prop = cudart.cudaGetDeviceProperties(0)
-    prop.name = prop.name + b' '*(256-len(prop.name))
-
-def test_cudart_class_setters():
-    dim = cudart.dim3()
-
-    dim.x = 1
-    dim.y = 2
-    dim.z = 3
-
-    assert dim.x == 1
-    assert dim.y == 2
-    assert dim.z == 3
-
-def test_cudart_both_type():
-    err, mode = cudart.cudaThreadExchangeStreamCaptureMode(cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal)
-    assertSuccess(err)
-    err, mode = cudart.cudaThreadExchangeStreamCaptureMode(cudart.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed)
-    assertSuccess(err)
-    assert(mode == cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal)
-    err, mode = cudart.cudaThreadExchangeStreamCaptureMode(cudart.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal)
-    assertSuccess(err)
-    assert(mode == cudart.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed)
-    err, mode = cudart.cudaThreadExchangeStreamCaptureMode(cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal)
-    assertSuccess(err)
-    assert(mode == cudart.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal)
-
-def test_cudart_cudaGetDeviceProperties():
-    err, prop = cudart.cudaGetDeviceProperties(0)
-    assertSuccess(err)
-    attrs = ['accessPolicyMaxWindowSize', 'asyncEngineCount', 'canMapHostMemory', 'canUseHostPointerForRegisteredMem', 'clockRate', 'computeMode', 'computePreemptionSupported', 'concurrentKernels', 'concurrentManagedAccess', 'cooperativeLaunch', 'cooperativeMultiDeviceLaunch', 'deviceOverlap', 'directManagedMemAccessFromHost', 'getPtr', 'globalL1CacheSupported', 'hostNativeAtomicSupported', 'integrated', 'isMultiGpuBoard', 'kernelExecTimeoutEnabled', 'l2CacheSize', 'localL1CacheSupported', 'luid', 'luidDeviceNodeMask', 'major', 'managedMemory', 'maxBlocksPerMultiProcessor', 'maxGridSize', 'maxSurface1D', 'maxSurface1DLayered', 'maxSurface2D', 'maxSurface2DLayered', 'maxSurface3D', 'maxSurfaceCubemap', 'maxSurfaceCubemapLayered', 'maxTexture1D', 'maxTexture1DLayered', 'maxTexture1DLinear', 'maxTexture1DMipmap', 'maxTexture2D', 'maxTexture2DGather', 'maxTexture2DLayered', 'maxTexture2DLinear', 'maxTexture2DMipmap', 'maxTexture3D', 'maxTexture3DAlt', 'maxTextureCubemap', 'maxTextureCubemapLayered', 'maxThreadsDim', 'maxThreadsPerBlock', 'maxThreadsPerMultiProcessor', 'memPitch', 'memoryBusWidth', 'memoryClockRate', 'minor', 'multiGpuBoardGroupID', 'multiProcessorCount', 'name', 'pageableMemoryAccess', 'pageableMemoryAccessUsesHostPageTables', 'pciBusID', 'pciDeviceID', 'pciDomainID', 'persistingL2CacheMaxSize', 'regsPerBlock', 'regsPerMultiprocessor', 'reservedSharedMemPerBlock', 'sharedMemPerBlock', 'sharedMemPerBlockOptin', 'sharedMemPerMultiprocessor', 'singleToDoublePrecisionPerfRatio', 'streamPrioritiesSupported', 'surfaceAlignment', 'tccDriver', 'textureAlignment', 'texturePitchAlignment', 'totalConstMem', 'totalGlobalMem', 'unifiedAddressing', 'uuid', 'warpSize']
-    for attr in attrs:
-        assert hasattr(prop, attr)
-    assert len(prop.name.decode("utf-8")) != 0
-    assert len(prop.uuid.bytes.hex()) != 0
-
-    example = cudart.cudaExternalSemaphoreSignalNodeParams()
-    example.extSemArray = [cudart.cudaExternalSemaphore_t(0), cudart.cudaExternalSemaphore_t(123), cudart.cudaExternalSemaphore_t(999)]
-    a1 = cudart.cudaExternalSemaphoreSignalParams()
-    a1.params.fence.value = 7
-    a1.params.nvSciSync.fence = 999
-    a1.params.keyedMutex.key = 9
-    a1.flags = 1
-    a2 = cudart.cudaExternalSemaphoreSignalParams()
-    a2.params.fence.value = 7
-    a2.params.nvSciSync.fence = 999
-    a2.params.keyedMutex.key = 9
-    a2.flags = 2
-    a3 = cudart.cudaExternalSemaphoreSignalParams()
-    a3.params.fence.value = 7
-    a3.params.nvSciSync.fence = 999
-    a3.params.keyedMutex.key = 9
-    a3.flags = 3
-    example.paramsArray = [a1]
-    # Note: Setting is a pass by value. Changing the object does not reflect internal value
-    a3.params.fence.value = 4
-    a3.params.nvSciSync.fence = 4
-    a3.params.keyedMutex.key = 4
-    a3.flags = 4
-    example.numExtSems = 3
-
-@pytest.mark.skipif(driverVersionLessThan(11030) or not supportsMemoryPool(), reason='When new attributes were introduced')
-def test_cudart_MemPool_attr():
-    poolProps = cudart.cudaMemPoolProps()
-    poolProps.allocType = cudart.cudaMemAllocationType.cudaMemAllocationTypePinned
-    poolProps.location.id = 0
-    poolProps.location.type = cudart.cudaMemLocationType.cudaMemLocationTypeDevice
-
-    attr_list = [None] * 8
-    err, pool = cudart.cudaMemPoolCreate(poolProps)
-    assertSuccess(err)
-
-    for idx, attr in enumerate([cudart.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies,
-                                cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic,
-                                cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrReservedMemCurrent,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrReservedMemHigh,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrUsedMemCurrent,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrUsedMemHigh]):
-        err, attr_tmp = cudart.cudaMemPoolGetAttribute(pool, attr)
-        assertSuccess(err)
-        attr_list[idx] = attr_tmp
-
-    for idxA, attr in enumerate([cudart.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies,
-                                 cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic,
-                                 cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies]):
-        err, = cudart.cudaMemPoolSetAttribute(pool, attr, 0)
-        assertSuccess(err)
-    for idx, attr in enumerate([cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold]):
-        err, = cudart.cudaMemPoolSetAttribute(pool, attr, cuda.cuuint64_t(9))
-        assertSuccess(err)
-
-    for idx, attr in enumerate([cudart.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies,
-                                cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic,
-                                cudart.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies,
-                                cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold]):
-        err, attr_tmp = cudart.cudaMemPoolGetAttribute(pool, attr)
-        assertSuccess(err)
-        attr_list[idx] = attr_tmp
-    assert(attr_list[0] == 0)
-    assert(attr_list[1] == 0)
-    assert(attr_list[2] == 0)
-    assert(int(attr_list[3]) == 9)
-
-    err, = cudart.cudaMemPoolDestroy(pool)
-    assertSuccess(err)
-
-def test_cudart_make_api():
-    err, channelDesc = cudart.cudaCreateChannelDesc(32,0,0,0,cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat)
-    assertSuccess(err)
-    assert(channelDesc.x == 32)
-    assert(channelDesc.y == 0)
-    assert(channelDesc.z == 0)
-    assert(channelDesc.w == 0)
-    assert(channelDesc.f == cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat)
-
-    # make_cudaPitchedPtr
-    cudaPitchedPtr = cudart.make_cudaPitchedPtr(1,2,3,4)
-    assert(cudaPitchedPtr.ptr == 1)
-    assert(cudaPitchedPtr.pitch == 2)
-    assert(cudaPitchedPtr.xsize == 3)
-    assert(cudaPitchedPtr.ysize == 4)
-
-    # make_cudaPos
-    cudaPos = cudart.make_cudaPos(1,2,3)
-    assert(cudaPos.x == 1)
-    assert(cudaPos.y == 2)
-    assert(cudaPos.z == 3)
-
-    # make_cudaExtent
-    cudaExtent = cudart.make_cudaExtent(1,2,3)
-    assert(cudaExtent.width == 1)
-    assert(cudaExtent.height == 2)
-    assert(cudaExtent.depth == 3)
-
-def test_cudart_cudaStreamGetCaptureInfo():
-    # create stream
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-
-    # validate that stream is not capturing
-    err, status, *info = cudart.cudaStreamGetCaptureInfo(stream)
-    assertSuccess(err)
-    assert(status == cudart.cudaStreamCaptureStatus.cudaStreamCaptureStatusNone)
-
-    # start capture
-    err, = cudart.cudaStreamBeginCapture(
-        stream, cudart.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
-    )
-    assertSuccess(err)
-
-    # validate that stream is capturing now
-    err, status, *info = cudart.cudaStreamGetCaptureInfo(stream)
-    assertSuccess(err)
-    assert(status == cudart.cudaStreamCaptureStatus.cudaStreamCaptureStatusActive)
-
-    # clean up
-    err, pgraph = cudart.cudaStreamEndCapture(stream)
-    assertSuccess(err)
-
-def test_cudart_cudaArrayGetInfo():
-    # create channel descriptor
-    x, y, z, w = 8, 0, 0, 0
-    f = cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    err, desc = cudart.cudaCreateChannelDesc(
-        x, y, z, w, f
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    width = 10
-    height = 0
-    inFlags = 0
-    err, arr = cudart.cudaMallocArray(desc, width, height, inFlags)
-    assertSuccess(err)
-
-    # get device array info
-    err, desc, extent, outFlags = cudart.cudaArrayGetInfo(arr)
-    assertSuccess(err)
-
-    # validate descriptor, extent, flags
-    assert(desc.x == x)
-    assert(desc.y == y)
-    assert(desc.z == z)
-    assert(desc.w == w)
-    assert(desc.f == f)
-    assert(extent.width == width)
-    assert(extent.height == height)
-    assert(inFlags == outFlags)
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-    
-def test_cudart_cudaMemcpy2DToArray():
-    # create host arrays
-    size = int(1024 * np.uint8().itemsize)
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to arr
-    err, = cudart.cudaMemcpy2DToArray(
-        arr, 0, 0, h1, size, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    )
-    assertSuccess(err)
-
-    # arr to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, arr, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpy2DToArray_DtoD():
-    # allocate device memory
-    size = 1024 * np.uint8().itemsize
-    err, d1 = cudart.cudaMalloc(size)
-    assertSuccess(err)
-    err, d2 = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to d1
-    err, = cudart.cudaMemcpy(d1, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assertSuccess(err)
-
-    # d1 to arr
-    err, = cudart.cudaMemcpy2DToArray(
-        arr, 0, 0, d1, size, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice
-    )
-    assertSuccess(err)
-
-    # arr to d2
-    err, = cudart.cudaMemcpy2DFromArray(
-        d2, size, arr, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice
-    )
-    assertSuccess(err)
-
-    # d2 to h2
-    err, = cudart.cudaMemcpy(h2, d2, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-    err, = cudart.cudaFree(d2)
-    assertSuccess(err)
-    err, = cudart.cudaFree(d1)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpy2DArrayToArray():
-    # create host arrays
-    size = 1024 * np.uint8().itemsize
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device arrays
-    err, a1 = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-    err, a2 = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to a1
-    err, = cudart.cudaMemcpy2DToArray(
-        a1, 0, 0, h1, size, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    )
-    assertSuccess(err)
-
-    # a1 to a2
-    err, = cudart.cudaMemcpy2DArrayToArray(
-        a2, 0, 0, a1, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice    
-    )
-    assertSuccess(err)
-
-    # a2 to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, a2, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(a2)
-    assertSuccess(err)
-    err, = cudart.cudaFreeArray(a1)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpyArrayToArray():
-    # create host arrays
-    size = 1024 * np.uint8().itemsize
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device arrays
-    err, a1 = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-    err, a2 = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to a1
-    err, = cudart.cudaMemcpy2DToArray(
-        a1, 0, 0, h1, size, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    )
-    assertSuccess(err)
-
-    # a1 to a2
-    err, = cudart.cudaMemcpyArrayToArray(
-        a2, 0, 0, a1, 0, 0, size,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice    
-    )
-    assertSuccess(err)
-
-    # a2 to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, a2, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(a2)
-    assertSuccess(err)
-    err, = cudart.cudaFreeArray(a1)
-    assertSuccess(err)
-
-def test_cudart_cudaGetChannelDesc():
-    # create channel descriptor
-    x, y, z, w = 8, 0, 0, 0
-    f = cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    err, desc = cudart.cudaCreateChannelDesc(
-        x, y, z, w, f
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    width = 10
-    height = 0
-    flags = 0
-    err, arr = cudart.cudaMallocArray(desc, width, height, flags)
-    assertSuccess(err)
-
-    # get channel descriptor from array
-    err, desc = cudart.cudaGetChannelDesc(arr)
-    assertSuccess(err)
-
-    # validate array channel descriptor
-    assert(desc.x == x)
-    assert(desc.y == y)
-    assert(desc.z == z)
-    assert(desc.w == w)
-    assert(desc.f == f)
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-
-def test_cudart_cudaGetTextureObjectTextureDesc():
-    # create channel descriptor
-    err, channelDesc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device arrays
-    err, arr = cudart.cudaMallocArray(channelDesc, 1024, 0, 0)
-    assertSuccess(err)
-
-    # create descriptors for texture object
-    resDesc = cudart.cudaResourceDesc()
-    resDesc.res.array.array = arr
-    inTexDesc = cudart.cudaTextureDesc()
-
-    # create texture object
-    err, texObject = cudart.cudaCreateTextureObject(resDesc, inTexDesc, None)
-    assertSuccess(err)
-
-    # get texture descriptor
-    err, outTexDesc = cudart.cudaGetTextureObjectTextureDesc(texObject)
-    assertSuccess(err)
-
-    # validate texture descriptor
-    for attr in dir(outTexDesc):
-        if attr in ["borderColor", "getPtr"]:
-            continue
-        if not attr.startswith("_"):
-            assert(getattr(outTexDesc, attr) == getattr(inTexDesc, attr))
-    
-    # clean up
-    err, = cudart.cudaDestroyTextureObject(texObject)
-    assertSuccess(err)
-
-def test_cudart_cudaMemset3D():
-    # create host arrays
-    size = 1024 * np.uint8().itemsize
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # allocate device memory
-    devExtent = cudart.make_cudaExtent(32, 32, 1)
-    err, devPitchedPtr = cudart.cudaMalloc3D(devExtent)
-    assertSuccess(err)
-
-    # set memory
-    memExtent = cudart.make_cudaExtent(devPitchedPtr.pitch, devPitchedPtr.ysize, 1)
-    err, = cudart.cudaMemset3D(devPitchedPtr, 1, memExtent)
-    assertSuccess(err)
-
-    # D to h2
-    err, = cudart.cudaMemcpy(
-        h2, devPitchedPtr.ptr, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFree(devPitchedPtr.ptr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemset3D_2D():
-    # create host arrays
-    size = 512 * np.uint8().itemsize
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # allocate device memory
-    devExtent = cudart.make_cudaExtent(1024, 1, 1)
-    err, devPitchedPtr = cudart.cudaMalloc3D(devExtent)
-    assertSuccess(err)
-
-    # set memory
-    memExtent = cudart.make_cudaExtent(size, devPitchedPtr.ysize, 1)
-    err, = cudart.cudaMemset3D(devPitchedPtr, 1, memExtent)
-    assertSuccess(err)
-
-    # D to h2
-    err, = cudart.cudaMemcpy(
-        h2, devPitchedPtr.ptr, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFree(devPitchedPtr.ptr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpyToArray():
-    # create host arrays
-    size = 1024 * np.uint8().itemsize
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-    
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to arr
-    err, = cudart.cudaMemcpyToArray(
-        arr, 0, 0, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    )
-    assertSuccess(err)
-
-    # arr to h2
-    err, = cudart.cudaMemcpyFromArray(
-        h2, arr, 0, 0, size,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpyToArray_DtoD():
-    # allocate device memory
-    size = int(1024 * np.uint8().itemsize)
-    err, d1 = cudart.cudaMalloc(size)
-    assertSuccess(err)
-    err, d2 = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-    
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # h1 to d1
-    err, = cudart.cudaMemcpy(d1, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assertSuccess(err)
-
-    # d1 to arr
-    err, = cudart.cudaMemcpyToArray(
-        arr, 0, 0, d1, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice
-    )
-    assertSuccess(err)
-
-    # arr to d2
-    err, = cudart.cudaMemcpyFromArray(
-        d2, arr, 0, 0, size,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice
-    )
-    assertSuccess(err)
-
-    # d2 to h2
-    err, = cudart.cudaMemcpy(h2, d2, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-    err, = cudart.cudaFree(d2)
-    assertSuccess(err)
-    err, = cudart.cudaFree(d1)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpy3DAsync():
-    # create host arrays
-    size = int(1024 * np.uint8().itemsize)
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # create stream
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-
-    # create memcpy params
-    params = cudart.cudaMemcpy3DParms()
-    params.srcPtr = cudart.make_cudaPitchedPtr(h1, size, 1, 1)
-    params.dstArray = arr
-    params.extent = cudart.make_cudaExtent(size, 1, 1)
-    params.kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-
-    # h1 to arr
-    err, = cudart.cudaMemcpy3DAsync(params, stream)
-    assertSuccess(err)
-
-    # await results
-    err, = cudart.cudaStreamSynchronize(stream)
-    assertSuccess(err)
-
-    # arr to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, arr, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-
-def test_cudart_cudaGraphAddMemcpyNode1D():
-    # allocate device memory
-    size = 1024 * np.uint8().itemsize
-    err, dptr = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # build graph
-    err, graph = cudart.cudaGraphCreate(0)
-    assertSuccess(err)
-
-    # add nodes
-    err, hToDNode = cudart.cudaGraphAddMemcpyNode1D(
-        graph, [], 0, dptr, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
-    )
-    assertSuccess(err)
-    err, dToHNode = cudart.cudaGraphAddMemcpyNode1D(
-        graph, [hToDNode], 1, h2, dptr, size,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # create stream
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-
-    # execute graph
-    err, execGraph = cudart.cudaGraphInstantiate(graph, 0)
-    assertSuccess(err)
-    err, = cudart.cudaGraphLaunch(execGraph, stream)
-
-    # await results
-    err, = cudart.cudaStreamSynchronize(stream)
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFree(dptr)
-    assertSuccess(err)
-
-def test_cudart_cudaGraphAddMemsetNode():
-    # allocate device memory
-    size = 1024 * np.uint8().itemsize
-    err, dptr = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # build graph
-    err, graph = cudart.cudaGraphCreate(0)
-    assertSuccess(err)
-
-    # set memset params
-    params = cudart.cudaMemsetParams()
-    params.dst = dptr
-    params.pitch = size
-    params.value = 1
-    params.elementSize = 1
-    params.width = size
-    params.height = 1
-
-    # add nodes
-    err, setNode = cudart.cudaGraphAddMemsetNode(
-        graph, [], 0, params
-    )
-    assertSuccess(err)
-    err, cpyNode = cudart.cudaGraphAddMemcpyNode1D(
-        graph, [setNode], 1, h2, dptr, size,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-
-    # create stream
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-
-    # execute graph
-    err, execGraph = cudart.cudaGraphInstantiate(graph, 0)
-    assertSuccess(err)
-    err, = cudart.cudaGraphLaunch(execGraph, stream)
-    assertSuccess(err)
-
-    # await results
-    err, = cudart.cudaStreamSynchronize(stream)
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFree(dptr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpy3DPeer():
-    # allocate device memory
-    size = int(1024 * np.uint8().itemsize)
-    err, dptr = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # create memcpy params
-    params = cudart.cudaMemcpy3DPeerParms()
-    params.srcPtr = cudart.make_cudaPitchedPtr(dptr, size, 1, 1)
-    params.dstArray = arr
-    params.extent = cudart.make_cudaExtent(size, 1, 1)
-
-    # h1 to D
-    err, = cudart.cudaMemcpy(dptr, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assertSuccess(err)
-
-    # D to arr
-    err, = cudart.cudaMemcpy3DPeer(params)
-    assertSuccess(err)
-
-    # arr to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, arr, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-    err, = cudart.cudaFree(dptr)
-    assertSuccess(err)
-
-def test_cudart_cudaMemcpy3DPeerAsync():
-    # allocate device memory
-    size = 1024 * np.uint8().itemsize
-    err, dptr = cudart.cudaMalloc(size)
-    assertSuccess(err)
-
-    # create host arrays
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # create channel descriptor
-    err, desc = cudart.cudaCreateChannelDesc(
-        8, 0, 0, 0, cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-    )
-    assertSuccess(err)
-
-    # allocate device array
-    err, arr = cudart.cudaMallocArray(desc, size, 0, 0)
-    assertSuccess(err)
-
-    # create stream
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-
-    # create memcpy params
-    params = cudart.cudaMemcpy3DPeerParms()
-    params.srcPtr = cudart.make_cudaPitchedPtr(dptr, size, 1, 1)
-    params.dstArray = arr
-    params.extent = cudart.make_cudaExtent(size, 1, 1)
-
-    # h1 to D
-    err, = cudart.cudaMemcpy(dptr, h1, size, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice)
-    assertSuccess(err)
-
-    # D to arr
-    err, = cudart.cudaMemcpy3DPeerAsync(params, stream)
-    assertSuccess(err)
-
-    # await results
-    err, = cudart.cudaStreamSynchronize(stream)
-    assertSuccess(err)
-
-    # arr to h2
-    err, = cudart.cudaMemcpy2DFromArray(
-        h2, size, arr, 0, 0, size, 1,
-        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
-    )
-    assertSuccess(err)
-
-    # validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # clean up
-    err, = cudart.cudaFreeArray(arr)
-    assertSuccess(err)
-    err, = cudart.cudaFree(dptr)
-    assertSuccess(err)
-
-def test_profiler():
-    err, = cudart.cudaProfilerStart()
-    assertSuccess(err)
-    err, = cudart.cudaProfilerStop()
-    assertSuccess(err)
-
-def test_cudart_eglFrame():
-    frame = cudart.cudaEglFrame()
-    # [<cudaArray_t 0x0>, <cudaArray_t 0x0>, <cudaArray_t 0x0>]
-    assert(int(frame.frame.pArray[0]) == 0)
-    assert(int(frame.frame.pArray[1]) == 0)
-    assert(int(frame.frame.pArray[2]) == 0)
-    frame.frame.pArray = [1,2,3]
-    # [<cudaArray_t 0x1>, <cudaArray_t 0x2>, <cudaArray_t 0x3>]
-    assert(int(frame.frame.pArray[0]) == 1)
-    assert(int(frame.frame.pArray[1]) == 2)
-    assert(int(frame.frame.pArray[2]) == 3)
-    frame.frame.pArray = [1,2,cudart.cudaArray_t(4)]
-    # [<cudaArray_t 0x1>, <cudaArray_t 0x2>, <cudaArray_t 0x4>]
-    assert(int(frame.frame.pArray[0]) == 1)
-    assert(int(frame.frame.pArray[1]) == 2)
-    assert(int(frame.frame.pArray[2]) == 4)
-    # frame.frame.pPitch
-    # [ptr : 0x1
-    # pitch : 2
-    # xsize : 4
-    # ysize : 0, ptr : 0x0
-    # pitch : 0
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 0
-    # xsize : 0
-    # ysize : 0]
-    assert(int(frame.frame.pPitch[0].ptr) == 1)
-    assert(int(frame.frame.pPitch[0].pitch) == 2)
-    assert(int(frame.frame.pPitch[0].xsize) == 4)
-    assert(int(frame.frame.pPitch[0].ysize) == 0)
-    assert(int(frame.frame.pPitch[1].ptr) == 0)
-    assert(int(frame.frame.pPitch[1].pitch) == 0)
-    assert(int(frame.frame.pPitch[1].xsize) == 0)
-    assert(int(frame.frame.pPitch[1].ysize) == 0)
-    assert(int(frame.frame.pPitch[2].ptr) == 0)
-    assert(int(frame.frame.pPitch[2].pitch) == 0)
-    assert(int(frame.frame.pPitch[2].xsize) == 0)
-    assert(int(frame.frame.pPitch[2].ysize) == 0)
-    frame.frame.pPitch = [cudart.cudaPitchedPtr(), cudart.cudaPitchedPtr(), cudart.cudaPitchedPtr()]
-    # [ptr : 0x0
-    # pitch : 0
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 0
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 0
-    # xsize : 0
-    # ysize : 0]
-    assert(int(frame.frame.pPitch[0].ptr) == 0)
-    assert(int(frame.frame.pPitch[0].pitch) == 0)
-    assert(int(frame.frame.pPitch[0].xsize) == 0)
-    assert(int(frame.frame.pPitch[0].ysize) == 0)
-    assert(int(frame.frame.pPitch[1].ptr) == 0)
-    assert(int(frame.frame.pPitch[1].pitch) == 0)
-    assert(int(frame.frame.pPitch[1].xsize) == 0)
-    assert(int(frame.frame.pPitch[1].ysize) == 0)
-    assert(int(frame.frame.pPitch[2].ptr) == 0)
-    assert(int(frame.frame.pPitch[2].pitch) == 0)
-    assert(int(frame.frame.pPitch[2].xsize) == 0)
-    assert(int(frame.frame.pPitch[2].ysize) == 0)
-    x = frame.frame.pPitch[0]
-    x.pitch = 123
-    frame.frame.pPitch = [x,x,x]
-    # [ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0]
-    assert(int(frame.frame.pPitch[0].ptr) == 0)
-    assert(int(frame.frame.pPitch[0].pitch) == 123)
-    assert(int(frame.frame.pPitch[0].xsize) == 0)
-    assert(int(frame.frame.pPitch[0].ysize) == 0)
-    assert(int(frame.frame.pPitch[1].ptr) == 0)
-    assert(int(frame.frame.pPitch[1].pitch) == 123)
-    assert(int(frame.frame.pPitch[1].xsize) == 0)
-    assert(int(frame.frame.pPitch[1].ysize) == 0)
-    assert(int(frame.frame.pPitch[2].ptr) == 0)
-    assert(int(frame.frame.pPitch[2].pitch) == 123)
-    assert(int(frame.frame.pPitch[2].xsize) == 0)
-    assert(int(frame.frame.pPitch[2].ysize) == 0)
-    x.pitch = 1234
-    # [ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0, ptr : 0x0
-    # pitch : 123
-    # xsize : 0
-    # ysize : 0]
-    assert(int(frame.frame.pPitch[0].ptr) == 0)
-    assert(int(frame.frame.pPitch[0].pitch) == 123)
-    assert(int(frame.frame.pPitch[0].xsize) == 0)
-    assert(int(frame.frame.pPitch[0].ysize) == 0)
-    assert(int(frame.frame.pPitch[1].ptr) == 0)
-    assert(int(frame.frame.pPitch[1].pitch) == 123)
-    assert(int(frame.frame.pPitch[1].xsize) == 0)
-    assert(int(frame.frame.pPitch[1].ysize) == 0)
-    assert(int(frame.frame.pPitch[2].ptr) == 0)
-    assert(int(frame.frame.pPitch[2].pitch) == 123)
-    assert(int(frame.frame.pPitch[2].xsize) == 0)
-    assert(int(frame.frame.pPitch[2].ysize) == 0)
-
-def cudart_func_stream_callback(use_host_api):
-    class testStruct(ctypes.Structure):
-        _fields_ = [('a', ctypes.c_int),
-                    ('b', ctypes.c_int),
-                    ('c', ctypes.c_int),]
-
-    def task_callback_host(userData):
-        data = testStruct.from_address(userData)
-        assert(data.a == 1)
-        assert(data.b == 2)
-        assert(data.c == 3)
-        return 0
-
-    def task_callback_stream(stream, status, userData):
-        data = testStruct.from_address(userData)
-        assert(data.a == 1)
-        assert(data.b == 2)
-        assert(data.c == 3)
-        return 0
-
-    if use_host_api:
-        callback_type = ctypes.PYFUNCTYPE(ctypes.c_int, ctypes.c_void_p)
-        target_task = task_callback_host
-    else:
-        callback_type = ctypes.PYFUNCTYPE(ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
-        target_task = task_callback_stream
-
-    # Construct ctype data
-    c_callback = callback_type(target_task)
-    c_data = testStruct(1, 2, 3)
-
-    # ctypes is managing the pointer value for us
-    if use_host_api:
-        callback = cudart.cudaHostFn_t(_ptr=ctypes.addressof(c_callback))
-    else:
-        callback = cudart.cudaStreamCallback_t(_ptr=ctypes.addressof(c_callback))
-
-    # Run
-    err, stream = cudart.cudaStreamCreate()
-    assertSuccess(err)
-    if use_host_api:
-        err, = cudart.cudaLaunchHostFunc(stream, callback, ctypes.addressof(c_data))
-        assertSuccess(err)
-    else:
-        err, = cudart.cudaStreamAddCallback(stream, callback, ctypes.addressof(c_data), 0)
-        assertSuccess(err)
-    err, = cudart.cudaDeviceSynchronize()
-    assertSuccess(err)
-
-
-def test_cudart_func_callback():
-    cudart_func_stream_callback(use_host_api=False)
-    cudart_func_stream_callback(use_host_api=True)
-
-@pytest.mark.skipif(driverVersionLessThan(12030)
-                    or not supportsCudaAPI('cudaGraphConditionalHandleCreate'), reason='Conditional graph APIs required')
-def test_cudart_conditional():
-    err, graph = cudart.cudaGraphCreate(0)
-    assertSuccess(err)
-    err, handle = cudart.cudaGraphConditionalHandleCreate(graph, 0, 0)
-    assertSuccess(err)
-
-    params = cudart.cudaGraphNodeParams()
-    params.type = cudart.cudaGraphNodeType.cudaGraphNodeTypeConditional
-    params.conditional.handle = handle
-    params.conditional.type = cudart.cudaGraphConditionalNodeType.cudaGraphCondTypeIf
-    params.conditional.size = 1
-
-    assert(len(params.conditional.phGraph_out) == 1)
-    assert(int(params.conditional.phGraph_out[0]) == 0)
-    err, node = cudart.cudaGraphAddNode(graph, None, 0, params)
-    assertSuccess(err)
-
-    assert(len(params.conditional.phGraph_out) == 1)
-    assert(int(params.conditional.phGraph_out[0]) != 0)
diff --git a/cuda_bindings/tests/test_cython.py b/cuda_bindings/tests/test_cython.py
deleted file mode 100644
index 439d7aa0..00000000
--- a/cuda_bindings/tests/test_cython.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import functools
-import importlib
-import sys
-
-
-def py_func(func):
-    """
-    Wraps func in a plain Python function.
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args, **kwargs):
-        return func(*args, **kwargs)
-
-    return wrapped
-
-
-cython_test_modules = ["test_ccuda",
-                       "test_ccudart",
-                       "test_interoperability_cython"]
-
-
-for mod in cython_test_modules:
-    try:
-        # For each callable in `mod` with name `test_*`,
-        # wrap the callable in a plain Python function
-        # and set the result as an attribute of this module.
-        mod = importlib.import_module(mod)
-        for name in dir(mod):
-            item = getattr(mod, name)
-            if callable(item) and name.startswith("test_"):
-                item = py_func(item)
-                setattr(sys.modules[__name__], name, item)
-    except ImportError:
-        raise
diff --git a/cuda_bindings/tests/test_interoperability.py b/cuda_bindings/tests/test_interoperability.py
deleted file mode 100644
index aab3b9a9..00000000
--- a/cuda_bindings/tests/test_interoperability.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-import cuda.cuda as cuda
-import cuda.cudart as cudart
-import numpy as np
-
-def supportsMemoryPool():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
-    return err == cudart.cudaError_t.cudaSuccess and isSupported
-
-def test_interop_stream():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, stream = cuda.cuStreamCreate(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaStreamDestroy(stream)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, stream = cudart.cudaStreamCreate()
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuStreamDestroy(stream)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_event():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, event = cuda.cuEventCreate(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaEventDestroy(event)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, event = cudart.cudaEventCreate()
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuEventDestroy(event)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_graph():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, graph = cuda.cuGraphCreate(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaGraphDestroy(graph)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, graph = cudart.cudaGraphCreate(0)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuGraphDestroy(graph)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_graphNode():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_dr, graph = cuda.cuGraphCreate(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, node = cuda.cuGraphAddEmptyNode(graph, [], 0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaGraphDestroyNode(node)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, node = cudart.cudaGraphAddEmptyNode(graph, [], 0)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuGraphDestroyNode(node)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_rt, = cudart.cudaGraphDestroy(graph)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_userObject():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # cudaUserObject_t
-    # TODO
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_function():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # cudaFunction_t
-    # TODO
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-@pytest.mark.skipif(not supportsMemoryPool(), reason='Requires mempool operations')
-def test_interop_memPool():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, pool = cuda.cuDeviceGetDefaultMemPool(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaDeviceSetMemPool(0, pool)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, pool = cudart.cudaDeviceGetDefaultMemPool(0)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuDeviceSetMemPool(0, pool)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_graphExec():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, graph = cuda.cuGraphCreate(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, node = cuda.cuGraphAddEmptyNode(graph, [], 0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    err_dr, graphExec = cuda.cuGraphInstantiate(graph, 0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_rt, = cudart.cudaGraphExecDestroy(graphExec)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # RT to DRV
-    err_rt, graphExec = cudart.cudaGraphInstantiate(graph, 0)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuGraphExecDestroy(graphExec)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    err_rt, = cudart.cudaGraphDestroy(graph)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-def test_interop_deviceptr():
-    # Init CUDA
-    err, = cuda.cuInit(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Get device
-    err, device = cuda.cuDeviceGet(0)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Construct context
-    err, ctx = cuda.cuCtxCreate(0, device)
-    assert(err == cuda.CUresult.CUDA_SUCCESS)
-
-    # Allocate dev memory
-    size = 1024 * np.uint8().itemsize
-    err_dr, dptr = cuda.cuMemAlloc(size)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # Allocate host memory
-    h1 = np.full(size, 1).astype(np.uint8)
-    h2 = np.full(size, 2).astype(np.uint8)
-    assert(np.array_equal(h1, h2) is False)
-
-    # Initialize device memory
-    err_rt, = cudart.cudaMemset(dptr, 1, size)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # D to h2
-    err_rt, = cudart.cudaMemcpy(h2, dptr, size, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost)
-    assert(err_rt == cudart.cudaError_t.cudaSuccess)
-
-    # Validate h1 == h2
-    assert(np.array_equal(h1, h2))
-
-    # Cleanup
-    err_dr, = cuda.cuMemFree(dptr)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
diff --git a/cuda_bindings/tests/test_interoperability_cython.pyx b/cuda_bindings/tests/test_interoperability_cython.pyx
deleted file mode 100644
index 5c58e8ea..00000000
--- a/cuda_bindings/tests/test_interoperability_cython.pyx
+++ /dev/null
@@ -1,217 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-
-# TODO: update to new module once the old ones are removed, we use the
-# tests to cover backward compatibility.
-import pytest
-import cuda.cuda as cuda
-import cuda.cudart as cudart
-import numpy as np
-
-cimport cuda.ccuda as ccuda
-cimport cuda.ccudart as ccudart
-from libc.stdlib cimport calloc, free
-
-
-def supportsMemoryPool():
-    err, isSupported = cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported, 0)
-    return err == cudart.cudaError_t.cudaSuccess and isSupported
-
-
-def test_interop_stream():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    cdef ccuda.CUstream* stream_dr = <ccuda.CUstream*>calloc(1, sizeof(ccuda.CUstream))
-    cerr_dr = ccuda.cuStreamCreate(stream_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaStreamDestroy(stream_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-    free(stream_dr)
-
-    # RT to DRV
-    cdef ccudart.cudaStream_t* stream_rt = <ccudart.cudaStream_t*>calloc(1, sizeof(ccudart.cudaStream_t))
-    cerr_rt = ccudart.cudaStreamCreate(stream_rt)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuStreamDestroy(stream_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    free(stream_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-
-def test_interop_event():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    cdef ccuda.CUevent* event_dr = <ccuda.CUevent*>calloc(1, sizeof(ccuda.CUevent))
-    cerr_dr = ccuda.cuEventCreate(event_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaEventDestroy(event_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-    free(event_dr)
-
-    # RT to DRV
-    cdef ccudart.cudaEvent_t* event_rt = <ccudart.cudaEvent_t*>calloc(1, sizeof(ccudart.cudaEvent_t))
-    cerr_rt = ccudart.cudaEventCreate(event_rt)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuEventDestroy(event_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    free(event_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-
-def test_interop_graph():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    cdef ccuda.CUgraph* graph_dr = <ccuda.CUgraph*>calloc(1, sizeof(ccuda.CUgraph))
-    cerr_dr = ccuda.cuGraphCreate(graph_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaGraphDestroy(graph_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-    free(graph_dr)
-
-    # RT to DRV
-    cdef ccudart.cudaGraph_t* graph_rt = <ccudart.cudaGraph_t*>calloc(1, sizeof(ccudart.cudaGraph_t))
-    cerr_rt = ccudart.cudaGraphCreate(graph_rt, 0)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuGraphDestroy(graph_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    free(graph_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-
-def test_interop_graphNode():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    cdef ccuda.CUgraph* graph_dr = <ccuda.CUgraph*>calloc(1, sizeof(ccuda.CUgraph))
-    cdef ccuda.CUgraphNode* graph_node_dr = <ccuda.CUgraphNode*>calloc(1, sizeof(ccuda.CUgraphNode))
-    cdef ccuda.CUgraphNode* dependencies_dr = NULL
-
-    cerr_dr = ccuda.cuGraphCreate(graph_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_dr = ccuda.cuGraphAddEmptyNode(graph_node_dr, graph_dr[0], dependencies_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaGraphDestroyNode(graph_node_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-
-    # RT to DRV
-    cdef ccudart.cudaGraphNode_t* graph_node_rt = <ccudart.cudaGraphNode_t*>calloc(1, sizeof(ccudart.cudaGraphNode_t))
-    cerr_rt = ccudart.cudaGraphAddEmptyNode(graph_node_rt, graph_dr[0], dependencies_dr, 0)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuGraphDestroyNode(graph_node_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaGraphDestroy(graph_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-
-    free(graph_dr)
-    free(graph_node_dr)
-    free(graph_node_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-
-@pytest.mark.skipif(not supportsMemoryPool(), reason='Requires mempool operations')
-def test_interop_memPool():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    # DRV to RT
-    cdef ccuda.CUmemoryPool* mempool_dr = <ccuda.CUmemoryPool*>calloc(1, sizeof(ccuda.CUmemoryPool))
-    cerr_dr = ccuda.cuDeviceGetDefaultMemPool(mempool_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaDeviceSetMemPool(0, mempool_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-
-    # RT to DRV
-    cdef ccudart.cudaMemPool_t* mempool_rt = <ccudart.cudaMemPool_t*>calloc(1, sizeof(ccudart.cudaMemPool_t))
-    cerr_rt = ccudart.cudaDeviceGetDefaultMemPool(mempool_rt, 0)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuDeviceSetMemPool(cuda.CUdevice(0), mempool_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-
-    free(mempool_dr)
-    free(mempool_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-
-def test_interop_graphExec():
-    err_dr, = cuda.cuInit(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, device = cuda.cuDeviceGet(0)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-    err_dr, ctx = cuda.cuCtxCreate(0, device)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
-
-    cdef ccuda.CUgraph* graph_dr = <ccuda.CUgraph*>calloc(1, sizeof(ccuda.CUgraph))
-    cdef ccuda.CUgraphNode* graph_node_dr = <ccuda.CUgraphNode*>calloc(1, sizeof(ccuda.CUgraphNode))
-    cdef ccuda.CUgraphExec* graph_exec_dr = <ccuda.CUgraphExec*>calloc(1, sizeof(ccuda.CUgraphExec))
-    cdef ccuda.CUgraphNode* dependencies_dr = NULL
-
-    cerr_dr = ccuda.cuGraphCreate(graph_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_dr = ccuda.cuGraphAddEmptyNode(graph_node_dr, graph_dr[0], dependencies_dr, 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-
-    # DRV to RT
-    cerr_dr = ccuda.cuGraphInstantiate(graph_exec_dr, graph_dr[0], 0)
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaGraphExecDestroy(graph_exec_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-
-    # RT to DRV
-    cdef ccudart.cudaGraphExec_t* graph_exec_rt = <ccudart.cudaGraphExec_t*>calloc(1, sizeof(ccudart.cudaGraphExec_t))
-
-    cerr_rt = ccudart.cudaGraphInstantiate(graph_exec_rt, graph_dr[0], 0)
-    assert(cerr_rt == ccudart.cudaSuccess)
-    cerr_dr = ccuda.cuGraphExecDestroy(graph_exec_rt[0])
-    assert(cerr_dr == ccuda.CUDA_SUCCESS)
-    cerr_rt = ccudart.cudaGraphDestroy(graph_dr[0])
-    assert(cerr_rt == ccudart.cudaSuccess)
-
-    free(graph_dr)
-    free(graph_node_dr)
-    free(graph_exec_dr)
-    free(graph_exec_rt)
-
-    err_dr, = cuda.cuCtxDestroy(ctx)
-    assert(err_dr == cuda.CUresult.CUDA_SUCCESS)
diff --git a/cuda_bindings/tests/test_kernelParams.py b/cuda_bindings/tests/test_kernelParams.py
deleted file mode 100644
index 5bf745ef..00000000
--- a/cuda_bindings/tests/test_kernelParams.py
+++ /dev/null
@@ -1,728 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-from cuda import cuda, cudart, nvrtc
-import numpy as np
-import ctypes
-
-def ASSERT_DRV(err):
-    if isinstance(err, cuda.CUresult):
-        if err != cuda.CUresult.CUDA_SUCCESS:
-            raise RuntimeError('Cuda Error: {}'.format(err))
-    elif isinstance(err, cudart.cudaError_t):
-        if err != cudart.cudaError_t.cudaSuccess:
-            raise RuntimeError('Cudart Error: {}'.format(err))
-    elif isinstance(err, nvrtc.nvrtcResult):
-        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError('Nvrtc Error: {}'.format(err))
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(err))
-
-def common_nvrtc(allKernelStrings, dev):
-    err, major = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev)
-    ASSERT_DRV(err)
-    err, minor = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev)
-    ASSERT_DRV(err)
-    err, _, nvrtc_minor = nvrtc.nvrtcVersion()
-    ASSERT_DRV(err)
-    use_cubin = (nvrtc_minor >= 1)
-    prefix = 'sm' if use_cubin else 'compute'
-    arch_arg = bytes(f'--gpu-architecture={prefix}_{major}{minor}', 'ascii')
-
-    err, prog = nvrtc.nvrtcCreateProgram(str.encode(allKernelStrings), b'allKernelStrings.cu', 0, None, None)
-    ASSERT_DRV(err)
-    opts = (b'--fmad=false', arch_arg)
-    err, = nvrtc.nvrtcCompileProgram(prog, len(opts), opts)
-
-    err_log, logSize = nvrtc.nvrtcGetProgramLogSize(prog)
-    ASSERT_DRV(err_log)
-    log = b' ' * logSize
-    err_log, = nvrtc.nvrtcGetProgramLog(prog, log)
-    ASSERT_DRV(err_log)
-    result = log.decode()
-    if len(result) > 1:
-        print(result)
-    ASSERT_DRV(err)
-
-    if use_cubin:
-        err, dataSize = nvrtc.nvrtcGetCUBINSize(prog)
-        ASSERT_DRV(err)
-        data = b' ' * dataSize
-        err, = nvrtc.nvrtcGetCUBIN(prog, data)
-        ASSERT_DRV(err)
-    else:
-        err, dataSize = nvrtc.nvrtcGetPTXSize(prog)
-        ASSERT_DRV(err)
-        data = b' ' * dataSize
-        err, = nvrtc.nvrtcGetPTX(prog, data)
-        ASSERT_DRV(err)
-
-    err, module = cuda.cuModuleLoadData(np.char.array(data))
-    ASSERT_DRV(err)
-
-    return module
-
-def test_kernelParams_empty():
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-
-    kernelString = '''\
-    static __device__ bool isDone;
-    extern "C" __global__
-    void empty_kernel()
-    {
-        isDone = true;
-        if (isDone) return;
-    }
-    '''
-
-    module = common_nvrtc(kernelString, cuDevice)
-
-    # cudaStructs kernel
-    err, kernel = cuda.cuModuleGetFunction(module, b'empty_kernel')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               ((), ()), 0) # arguments
-    ASSERT_DRV(err)
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               None, 0) # arguments
-    ASSERT_DRV(err)
-
-    # Retrieve global and validate
-    isDone_host = ctypes.c_bool()
-    err, isDonePtr_device, isDonePtr_device_size = cuda.cuModuleGetGlobal(module, b'isDone')
-    ASSERT_DRV(err)
-    assert(isDonePtr_device_size == ctypes.sizeof(ctypes.c_bool))
-    err, = cuda.cuMemcpyDtoHAsync(isDone_host, isDonePtr_device, ctypes.sizeof(ctypes.c_bool), stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-    assert(isDone_host.value == True)
-
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-def kernelParams_basic(use_ctypes_as_values):
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-
-    if use_ctypes_as_values:
-        assertValues_host = (ctypes.c_bool(True),
-                             ctypes.c_char(b'Z'), ctypes.c_wchar('Ā'),
-                             ctypes.c_byte(-127), ctypes.c_ubyte(255),
-                             ctypes.c_short(1), ctypes.c_ushort(1),
-                             ctypes.c_int(2), ctypes.c_uint(2),
-                             ctypes.c_long(3), ctypes.c_ulong(3),
-                             ctypes.c_longlong(4), ctypes.c_ulonglong(4),
-                             ctypes.c_size_t(5),
-                             ctypes.c_float(float(123.456)), ctypes.c_float(float(123.456)),
-                             ctypes.c_void_p(0xdeadbeef))
-    else:
-        assertValues_host = (True,
-                             b'Z', 'Ā',
-                             -127, 255,
-                             90, 72,
-                             85, 82,
-                             66, 65,
-                             86, 90,
-                             33,
-                             float(123.456), float(123.456),
-                             0xdeadbeef)
-    assertTypes_host = (ctypes.c_bool,
-                        ctypes.c_char, ctypes.c_wchar,
-                        ctypes.c_byte, ctypes.c_ubyte,
-                        ctypes.c_short, ctypes.c_ushort,
-                        ctypes.c_int, ctypes.c_uint,
-                        ctypes.c_long, ctypes.c_ulong,
-                        ctypes.c_longlong, ctypes.c_ulonglong,
-                        ctypes.c_size_t,
-                        ctypes.c_float, ctypes.c_double,
-                        ctypes.c_void_p)
-
-    basicKernelString = '''\
-    extern "C" __global__
-    void basic(bool b,
-               char c, wchar_t wc,
-               signed char byte, unsigned char ubyte,
-               short s, unsigned short us,
-               int i, unsigned int ui,
-               long l, unsigned long ul,
-               long long ll, unsigned long long ull,
-               size_t size,
-               float f, double d,
-               void *p,
-               bool *pb,
-               char *pc, wchar_t *pwc,
-               signed char *pbyte, unsigned char *pubyte,
-               short *ps, unsigned short *pus,
-               int *pi, unsigned int *pui,
-               long *pl, unsigned long *pul,
-               long long *pll, unsigned long long *pull,
-               size_t *psize,
-               float *pf, double *pd)
-    {
-        assert(b == {});
-        assert(c == {});
-        assert(wc == {});
-        assert(byte == {});
-        assert(ubyte == {});
-        assert(s == {});
-        assert(us == {});
-        assert(i == {});
-        assert(ui == {});
-        assert(l == {});
-        assert(ul == {});
-        assert(ll == {});
-        assert(ull == {});
-        assert(size == {});
-        assert(f == {});
-        assert(d == {});
-        assert(p == (void*){});
-        *pb = b;
-        *pc = c;
-        *pwc = wc;
-        *pbyte = byte;
-        *pubyte = ubyte;
-        *ps = s;
-        *pus = us;
-        *pi = i;
-        *pui = ui;
-        *pl = l;
-        *pul = ul;
-        *pll = ll;
-        *pull = ull;
-        *psize = size;
-        *pf = f;
-        *pd = d;
-    }
-    '''
-    idx = 0
-    while '{}' in basicKernelString:
-        val = assertValues_host[idx].value if use_ctypes_as_values else assertValues_host[idx]
-        if assertTypes_host[idx] == ctypes.c_float:
-            basicKernelString = basicKernelString.replace('{}', str(float(val)) + 'f', 1)
-        elif assertTypes_host[idx] == ctypes.c_double:
-            basicKernelString = basicKernelString.replace('{}', str(float(val)), 1)
-        elif assertTypes_host[idx] == ctypes.c_char:
-            basicKernelString = basicKernelString.replace('{}', str(val)[1:], 1)
-        elif assertTypes_host[idx] == ctypes.c_wchar:
-            basicKernelString = basicKernelString.replace('{}', str(ord(val)), 1)
-        else:
-            basicKernelString = basicKernelString.replace('{}', str(int(val)), 1)
-        idx += 1
-
-    module = common_nvrtc(basicKernelString, cuDevice)
-
-    err, kernel = cuda.cuModuleGetFunction(module, b'basic')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    # Prepare kernel
-    err, pb = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_bool))
-    ASSERT_DRV(err)
-    err, pc = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_char))
-    ASSERT_DRV(err)
-    err, pwc = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_wchar))
-    ASSERT_DRV(err)
-    err, pbyte = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_byte))
-    ASSERT_DRV(err)
-    err, pubyte = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_ubyte))
-    ASSERT_DRV(err)
-    err, ps = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_short))
-    ASSERT_DRV(err)
-    err, pus = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_ushort))
-    ASSERT_DRV(err)
-    err, pi = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_int))
-    ASSERT_DRV(err)
-    err, pui = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_uint))
-    ASSERT_DRV(err)
-    err, pl = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_long))
-    ASSERT_DRV(err)
-    err, pul = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_ulong))
-    ASSERT_DRV(err)
-    err, pll = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_longlong))
-    ASSERT_DRV(err)
-    err, pull = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_ulonglong))
-    ASSERT_DRV(err)
-    err, psize = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_size_t))
-    ASSERT_DRV(err)
-    err, pf = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_float))
-    ASSERT_DRV(err)
-    err, pd = cuda.cuMemAlloc(ctypes.sizeof(ctypes.c_double))
-    ASSERT_DRV(err)
-
-    assertValues_device = (pb,
-                           pc, pwc,
-                           pbyte, pubyte,
-                           ps, pus,
-                           pi, pui,
-                           pl, pul,
-                           pll, pull,
-                           psize,
-                           pf, pd)
-    assertTypes_device = (None,
-                          None, None,
-                          None, None,
-                          None, None,
-                          None, None,
-                          None, None,
-                          None, None,
-                          None,
-                          None, None)
-
-    basicKernelValues = assertValues_host + assertValues_device
-    basicKernelTypes = assertTypes_host + assertTypes_device
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               (basicKernelValues, basicKernelTypes), 0) # arguments
-    ASSERT_DRV(err)
-
-    # Retrieve each dptr
-    host_params = tuple([valueType() for valueType in assertTypes_host[:-1]])
-    for i in range(len(host_params)):
-        err, = cuda.cuMemcpyDtoHAsync(host_params[i], assertValues_device[i], ctypes.sizeof(assertTypes_host[i]), stream)
-        ASSERT_DRV(err)
-
-    # Validate retrieved values
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-    for i in range(len(host_params)):
-        val = basicKernelValues[i].value if use_ctypes_as_values else basicKernelValues[i]
-        if basicKernelTypes[i] == ctypes.c_float:
-            if use_ctypes_as_values:
-                assert(val == host_params[i].value)
-            else:
-                assert(val == (int(host_params[i].value * 1000) / 1000))
-        else:
-            assert(val == host_params[i].value)
-
-    err, = cuda.cuMemFree(pb)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pc)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pwc)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pbyte)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pubyte)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(ps)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pus)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pi)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pui)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pl)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pul)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pll)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pull)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(psize)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pf)
-    ASSERT_DRV(err)
-    err, = cuda.cuMemFree(pd)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-def test_kernelParams_basic():
-    # Kernel is given basic Python primative values as value input
-    kernelParams_basic(use_ctypes_as_values = False)
-
-def test_kernelParams_basic_ctypes():
-    # Kernel is given basic c_type instances as primative value input
-    kernelParams_basic(use_ctypes_as_values = True)
-
-def test_kernelParams_types_cuda():
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-    err, uvaSupported = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice)
-    ASSERT_DRV(err)
-
-    err, perr = cudart.cudaMalloc(ctypes.sizeof(ctypes.c_int))
-    ASSERT_DRV(err)
-    err, pSurface_host = cudart.cudaHostAlloc(cudart.sizeof(cudart.cudaSurfaceObject_t), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-    err, pDim3_host = cudart.cudaHostAlloc(cudart.sizeof(cudart.dim3), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-
-    # Get device pointer if UVM is not enabled
-    if uvaSupported:
-        kernelValues = (cudart.cudaError_t.cudaErrorUnknown, perr,                                         # enums
-                        cudart.cudaSurfaceObject_t(248), cudart.cudaSurfaceObject_t(_ptr=pSurface_host),   # typedef of primative
-                        cudart.dim3(), cudart.dim3(_ptr=pDim3_host))                                       # struct
-    else:
-        err, pSurface_device = cudart.cudaHostGetDevicePointer(pSurface_host, 0)
-        ASSERT_DRV(err)
-        err, pDim3_device = cudart.cudaHostGetDevicePointer(pDim3_host, 0)
-        ASSERT_DRV(err)
-        kernelValues = (cudart.cudaError_t.cudaErrorUnknown, perr,                                         # enums
-                        cudart.cudaSurfaceObject_t(248), cudart.cudaSurfaceObject_t(_ptr=pSurface_device), # typedef of primative
-                        cudart.dim3(), cudart.dim3(_ptr=pDim3_device))                                     # struct
-    kernelTypes = (None, ctypes.c_void_p,
-                   None, ctypes.c_void_p,
-                   None, ctypes.c_void_p)
-    kernelValues[4].x = 1
-    kernelValues[4].y = 2
-    kernelValues[4].z = 3
-
-    kernelString = '''\
-    extern "C" __global__
-    void structsCuda(cudaError_t err, cudaError_t *perr,
-                     cudaSurfaceObject_t surface, cudaSurfaceObject_t *pSurface,
-                     dim3 dim, dim3* pdim)
-    {
-        *perr = err;
-        *pSurface = surface;
-        pdim->x = dim.x;
-        pdim->y = dim.y;
-        pdim->z = dim.z;
-    }
-    '''
-
-    module = common_nvrtc(kernelString, cuDevice)
-
-    # cudaStructs kernel
-    err, kernel = cuda.cuModuleGetFunction(module, b'structsCuda')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               (kernelValues, kernelTypes), 0) # arguments
-    ASSERT_DRV(err)
-
-    # Retrieve each dptr
-    host_err = ctypes.c_int()
-    err, = cudart.cudaMemcpyAsync(ctypes.addressof(host_err), perr, ctypes.sizeof(ctypes.c_int()), cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost, stream)
-    ASSERT_DRV(err)
-
-    # Validate kernel values
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-    cuda_err = cudart.cudaError_t(host_err.value)
-
-    if uvaSupported:
-        assert(kernelValues[0] == cuda_err)
-        assert(int(kernelValues[2]) == int(kernelValues[3]))
-        assert(kernelValues[4].x == kernelValues[5].x)
-        assert(kernelValues[4].y == kernelValues[5].y)
-        assert(kernelValues[4].z == kernelValues[5].z)
-    else:
-        surface_host = cudart.cudaSurfaceObject_t(_ptr=pSurface_host)
-        dim3_host = cudart.dim3(_ptr=pDim3_host)
-        assert(kernelValues[0] == cuda_err)
-        assert(int(kernelValues[2]) == int(surface_host))
-        assert(kernelValues[4].x == dim3_host.x)
-        assert(kernelValues[4].y == dim3_host.y)
-        assert(kernelValues[4].z == dim3_host.z)
-
-    err, = cudart.cudaFree(perr)
-    ASSERT_DRV(err)
-    err, = cudart.cudaFreeHost(pSurface_host)
-    ASSERT_DRV(err)
-    err, = cudart.cudaFreeHost(pDim3_host)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-def test_kernelParams_struct_custom():
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-    err, uvaSupported = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice)
-    ASSERT_DRV(err)
-
-    kernelString = '''\
-    struct testStruct {
-        int value;
-    };
-
-    extern "C" __global__
-    void structCustom(struct testStruct src, struct testStruct *dst)
-    {
-        dst->value = src.value;
-    }
-    '''
-
-    module = common_nvrtc(kernelString, cuDevice)
-
-    err, kernel = cuda.cuModuleGetFunction(module, b'structCustom')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    # structCustom kernel
-    class testStruct(ctypes.Structure):
-        _fields_ = [('value',ctypes.c_int)]
-
-    err, pStruct_host = cudart.cudaHostAlloc(ctypes.sizeof(testStruct), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-
-    # Get device pointer if UVM is not enabled
-    if uvaSupported:
-        kernelValues = (testStruct(5), pStruct_host)
-    else:
-        err, pStruct_device = cudart.cudaHostGetDevicePointer(pStruct_host, 0)
-        ASSERT_DRV(err)
-        kernelValues = (testStruct(5), pStruct_device)
-    kernelTypes = (None, ctypes.c_void_p)
-
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               (kernelValues, kernelTypes), 0) # arguments
-    ASSERT_DRV(err)
-
-    # Validate kernel values
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-    struct_shared = testStruct.from_address(pStruct_host)
-    assert(kernelValues[0].value == struct_shared.value)
-
-    err, = cudart.cudaFreeHost(pStruct_host)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-def kernelParams_buffer_protocol_ctypes_common(pass_by_address):
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-    err, uvaSupported = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice)
-    ASSERT_DRV(err)
-
-    kernelString = '''\
-    struct testStruct {
-        int value;
-    };
-    extern "C" __global__
-    void testkernel(int i, int *pi,
-                    float f, float *pf,
-                    struct testStruct s, struct testStruct *ps)
-    {
-        *pi = i;
-        *pf = f;
-        ps->value = s.value;
-    }
-    '''
-
-    module = common_nvrtc(kernelString, cuDevice)
-
-    err, kernel = cuda.cuModuleGetFunction(module, b'testkernel')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    # testkernel kernel
-    class testStruct(ctypes.Structure):
-        _fields_ = [('value',ctypes.c_int)]
-
-    err, pInt_host = cudart.cudaHostAlloc(ctypes.sizeof(ctypes.c_int), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-    err, pFloat_host = cudart.cudaHostAlloc(ctypes.sizeof(ctypes.c_float), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-    err, pStruct_host = cudart.cudaHostAlloc(ctypes.sizeof(testStruct), cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-
-    # Get device pointer if UVM is not enabled
-    if uvaSupported:
-        kernelValues = (ctypes.c_int(1), ctypes.c_void_p(pInt_host),
-                        ctypes.c_float(float(123.456)), ctypes.c_void_p(pFloat_host),
-                        testStruct(5), ctypes.c_void_p(pStruct_host))
-    else:
-        err, pInt_device = cudart.cudaHostGetDevicePointer(pInt_host, 0)
-        ASSERT_DRV(err)
-        err, pFloat_device = cudart.cudaHostGetDevicePointer(pFloat_host, 0)
-        ASSERT_DRV(err)
-        err, pStruct_device = cudart.cudaHostGetDevicePointer(pStruct_host, 0)
-        ASSERT_DRV(err)
-        kernelValues = (ctypes.c_int(1), ctypes.c_void_p(pInt_device),
-                        ctypes.c_float(float(123.456)), ctypes.c_void_p(pFloat_device),
-                        testStruct(5), ctypes.c_void_p(pStruct_device))
-
-    packagedParams = (ctypes.c_void_p*len(kernelValues))()
-    for idx in range(len(packagedParams)):
-        packagedParams[idx] = ctypes.addressof(kernelValues[idx])
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               ctypes.addressof(packagedParams) if pass_by_address else packagedParams, 0) # arguments
-    ASSERT_DRV(err)
-
-    # Validate kernel values
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-    assert(kernelValues[0].value == ctypes.c_int.from_address(pInt_host).value)
-    assert(kernelValues[2].value == ctypes.c_float.from_address(pFloat_host).value)
-    assert(kernelValues[4].value == testStruct.from_address(pStruct_host).value)
-
-    err, = cudart.cudaFreeHost(pStruct_host)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
-
-def test_kernelParams_buffer_protocol_ctypes():
-    kernelParams_buffer_protocol_ctypes_common(pass_by_address=True)
-    kernelParams_buffer_protocol_ctypes_common(pass_by_address=False)
-
-def test_kernelParams_buffer_protocol_numpy():
-    err, = cuda.cuInit(0)
-    ASSERT_DRV(err)
-    err, cuDevice = cuda.cuDeviceGet(0)
-    ASSERT_DRV(err)
-    err, context = cuda.cuCtxCreate(0, cuDevice)
-    ASSERT_DRV(err)
-    err, uvaSupported = cuda.cuDeviceGetAttribute(cuda.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cuDevice)
-    ASSERT_DRV(err)
-
-    kernelString = '''\
-    struct testStruct {
-        int value;
-    };
-    extern "C" __global__
-    void testkernel(int i, int *pi,
-                    float f, float *pf,
-                    struct testStruct s, struct testStruct *ps)
-    {
-        *pi = i;
-        *pf = f;
-        ps->value = s.value;
-    }
-    '''
-
-    module = common_nvrtc(kernelString, cuDevice)
-
-    err, kernel = cuda.cuModuleGetFunction(module, b'testkernel')
-    ASSERT_DRV(err)
-
-    err, stream = cuda.cuStreamCreate(0)
-    ASSERT_DRV(err)
-
-    # testkernel kernel
-    testStruct = np.dtype([('value', np.int32)])
-
-    err, pInt_host = cudart.cudaHostAlloc(np.dtype(np.int32).itemsize, cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-    err, pFloat_host = cudart.cudaHostAlloc(np.dtype(np.float32).itemsize, cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-    err, pStruct_host = cudart.cudaHostAlloc(testStruct.itemsize, cudart.cudaHostAllocMapped)
-    ASSERT_DRV(err)
-
-    # Get device pointer if UVM is not enabled
-    if uvaSupported:
-        kernelValues = (np.array(1, dtype=np.uint32), np.array([pInt_host], dtype=np.uint64),
-                        np.array(float(123.456), dtype=np.float32), np.array([pFloat_host], dtype=np.uint64),
-                        np.array([5], testStruct), np.array([pStruct_host], dtype=np.uint64))
-    else:
-        err, pInt_device = cudart.cudaHostGetDevicePointer(pInt_host, 0)
-        ASSERT_DRV(err)
-        err, pFloat_device = cudart.cudaHostGetDevicePointer(pFloat_host, 0)
-        ASSERT_DRV(err)
-        err, pStruct_device = cudart.cudaHostGetDevicePointer(pStruct_host, 0)
-        ASSERT_DRV(err)
-        kernelValues = (np.array(1, dtype=np.int32), np.array([pInt_device], dtype=np.uint64),
-                        np.array(float(123.456), dtype=np.float32), np.array([pFloat_device], dtype=np.uint64),
-                        np.array([5], testStruct), np.array([pStruct_device], dtype=np.uint64))
-
-    packagedParams = np.array([arg.ctypes.data for arg in kernelValues], dtype=np.uint64)
-    err, = cuda.cuLaunchKernel(kernel,
-                               1, 1, 1,   # grid dim
-                               1, 1, 1,   # block dim
-                               0, stream, # shared mem and stream
-                               packagedParams, 0) # arguments
-    ASSERT_DRV(err)
-
-    # Validate kernel values
-    err, = cuda.cuStreamSynchronize(stream)
-    ASSERT_DRV(err)
-
-    class numpy_address_wrapper():
-        def __init__(self, address, typestr):
-            self.__array_interface__ = {'data': (address, False),
-                                        'typestr': typestr,
-                                        'shape': (1,)}
-
-    assert(kernelValues[0] == np.array(numpy_address_wrapper(pInt_host, '<i4')))
-    assert(kernelValues[2] == np.array(numpy_address_wrapper(pFloat_host, '<f4')))
-    assert(kernelValues[4]['value'] == np.array(numpy_address_wrapper(pStruct_host, '<i4'), dtype=testStruct)['value'])
-
-    err, = cudart.cudaFreeHost(pStruct_host)
-    ASSERT_DRV(err)
-    err, = cuda.cuStreamDestroy(stream)
-    ASSERT_DRV(err)
-    err, = cuda.cuModuleUnload(module)
-    ASSERT_DRV(err)
-    err, = cuda.cuCtxDestroy(context)
-    ASSERT_DRV(err)
diff --git a/cuda_bindings/tests/test_nvrtc.py b/cuda_bindings/tests/test_nvrtc.py
deleted file mode 100644
index a4d91150..00000000
--- a/cuda_bindings/tests/test_nvrtc.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
-#
-# Please refer to the NVIDIA end user license agreement (EULA) associated
-# with this source code for terms and conditions that govern your use of
-# this software. Any use, reproduction, disclosure, or distribution of
-# this software and related documentation outside the terms of the EULA
-# is strictly prohibited.
-import pytest
-from cuda import nvrtc
-
-def ASSERT_DRV(err):
-    if isinstance(err, nvrtc.nvrtcResult):
-        if err != nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            raise RuntimeError('Nvrtc Error: {}'.format(err))
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(err))
-
-def nvrtcVersionLessThan(major, minor):
-    err, major_version, minor_version = nvrtc.nvrtcVersion()
-    ASSERT_DRV(err)
-    return major_version < major or (major == major_version and minor_version < minor)
-
-@pytest.mark.skipif(nvrtcVersionLessThan(11, 3), reason='When nvrtcGetSupportedArchs was introduced')
-def test_nvrtcGetSupportedArchs():
-    err, supportedArchs = nvrtc.nvrtcGetSupportedArchs()
-    ASSERT_DRV(err)
-    assert len(supportedArchs) != 0
diff --git a/cuda_core/MANIFEST.in b/cuda_core/MANIFEST.in
deleted file mode 100644
index f0b33548..00000000
--- a/cuda_core/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-recursive-include cuda/core *.pyx *.pxd
diff --git a/cuda_core/README.md b/cuda_core/README.md
deleted file mode 100644
index e979fb73..00000000
--- a/cuda_core/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# `cuda.core`: (experimental) pythonic CUDA module
-
-Currently under active development. To build from source, just do:
-```shell
-$ git clone https://github.com/NVIDIA/cuda-python
-$ cd cuda-python/cuda_core  # move to the directory where this README locates
-$ pip install .
-```
-For now `cuda-python` is a required dependency.
diff --git a/cuda_core/cuda/core/__init__.pxd b/cuda_core/cuda/core/__init__.pxd
deleted file mode 100644
index e69de29b..00000000
diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py
deleted file mode 100644
index cec6e8d9..00000000
--- a/cuda_core/cuda/core/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cuda.core._device import Device
-from cuda.core._event import EventOptions
-from cuda.core._launcher import LaunchConfig, launch
-from cuda.core._program import Program
-from cuda.core._stream import Stream, StreamOptions
-from cuda.core._version import __version__
diff --git a/cuda_core/cuda/core/_context.py b/cuda_core/cuda/core/_context.py
deleted file mode 100644
index 5d0f5adf..00000000
--- a/cuda_core/cuda/core/_context.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from dataclasses import dataclass
-
-from cuda import cuda, cudart
-from cuda.core._utils import handle_return
-
-
-@dataclass
-class ContextOptions:
-    pass  # TODO
-
-
-class Context:
-
-    __slots__ = ("_handle", "_id")
-
-    def __init__(self):
-        raise NotImplementedError("TODO")
-
-    @staticmethod
-    def _from_ctx(obj, dev_id):
-        assert isinstance(obj, cuda.CUcontext)
-        ctx = Context.__new__(Context)
-        ctx._handle = obj
-        ctx._id = dev_id
-        return ctx
diff --git a/cuda_core/cuda/core/_device.py b/cuda_core/cuda/core/_device.py
deleted file mode 100644
index 1268da32..00000000
--- a/cuda_core/cuda/core/_device.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-import threading
-from typing import Optional, Union
-import warnings
-
-from cuda import cuda, cudart
-from cuda.core._utils import handle_return, ComputeCapability, CUDAError, \
-                             precondition
-from cuda.core._context import Context, ContextOptions
-from cuda.core._memory import _DefaultAsyncMempool, Buffer, MemoryResource
-from cuda.core._stream import default_stream, Stream, StreamOptions
-
-
-_tls = threading.local()
-_tls_lock = threading.Lock()
-
-
-class Device:
-
-    __slots__ = ("_id", "_mr", "_has_inited")
-
-    def __new__(cls, device_id=None):
-        # important: creating a Device instance does not initialize the GPU!
-        if device_id is None:
-            device_id = handle_return(cudart.cudaGetDevice())
-            assert isinstance(device_id, int), f"{device_id=}"
-        else:
-            total = handle_return(cudart.cudaGetDeviceCount())
-            if not isinstance(device_id, int) or not (0 <= device_id < total):
-                raise ValueError(
-                    f"device_id must be within [0, {total}), got {device_id}")
-
-        # ensure Device is singleton
-        with _tls_lock:
-            if not hasattr(_tls, "devices"):
-                total = handle_return(cudart.cudaGetDeviceCount())
-                _tls.devices = []
-                for dev_id in range(total):
-                    dev = super().__new__(cls)
-                    dev._id = dev_id
-                    dev._mr = _DefaultAsyncMempool(dev_id)
-                    dev._has_inited = False
-                    _tls.devices.append(dev)
-
-        return _tls.devices[device_id]
-
-    def _check_context_initialized(self, *args, **kwargs):
-        if not self._has_inited:
-            raise CUDAError("the device is not yet initialized, "
-                            "perhaps you forgot to call .set_current() first?")
-
-    @property
-    def device_id(self) -> int:
-        return self._id
-
-    @property
-    def pci_bus_id(self) -> str:
-        bus_id = handle_return(cudart.cudaDeviceGetPCIBusId(13, self._id))
-        return bus_id[:12].decode()
-
-    @property
-    def uuid(self) -> str:
-        driver_ver = handle_return(cuda.cuDriverGetVersion())
-        if driver_ver >= 11040:
-            uuid = handle_return(cuda.cuDeviceGetUuid_v2(self._id))
-        else:
-            uuid = handle_return(cuda.cuDeviceGetUuid(self._id))
-        uuid = uuid.bytes.hex()
-        # 8-4-4-4-12
-        return f"{uuid[:8]}-{uuid[8:12]}-{uuid[12:16]}-{uuid[16:20]}-{uuid[20:]}"
-
-    @property
-    def name(self) -> str:
-        # assuming a GPU name is less than 128 characters...
-        name = handle_return(cuda.cuDeviceGetName(128, self._id))
-        name = name.split(b'\0')[0]
-        return name.decode()
-
-    @property
-    def properties(self) -> dict:
-        # TODO: pythonize the key names
-        return handle_return(cudart.cudaGetDeviceProperties(self._id))
-
-    @property
-    def compute_capability(self) -> ComputeCapability:
-        """Returns a named tuple with 2 fields: major and minor. """
-        major = handle_return(cudart.cudaDeviceGetAttribute(
-            cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, self._id))
-        minor = handle_return(cudart.cudaDeviceGetAttribute(
-            cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, self._id))
-        return ComputeCapability(major, minor)
-
-    @property
-    @precondition(_check_context_initialized)
-    def context(self) -> Context:
-        ctx = handle_return(cuda.cuCtxGetCurrent())
-        assert int(ctx) != 0
-        return Context._from_ctx(ctx, self._id)
-
-    @property
-    def memory_resource(self) -> MemoryResource:
-        return self._mr
-
-    @memory_resource.setter
-    def memory_resource(self, mr):
-        if not isinstance(mr, MemoryResource):
-            raise TypeError
-        self._mr = mr
-
-    @property
-    def default_stream(self) -> Stream:
-        return default_stream()
-
-    def __int__(self):
-        return self._id
-
-    def __repr__(self):
-        return f"<Device {self._id} ({self.name})>"
-
-    def set_current(self, ctx: Context=None) -> Union[Context, None]:
-        """
-        Entry point of this object. Users always start a code by
-        calling this method, e.g.
-        
-        >>> from cuda.core import Device
-        >>> dev0 = Device(0)
-        >>> dev0.set_current()
-        >>> # ... do work on device 0 ...
-        
-        The optional ctx argument is for advanced users to bind a
-        CUDA context with the device. In this case, the previously
-        set context is popped and returned to the user.
-        """
-        if ctx is not None:
-            if not isinstance(ctx, Context):
-                raise TypeError("a Context object is required")
-            if ctx._id != self._id:
-                raise RuntimeError("the provided context was created on a different "
-                                  f"device {ctx._id} other than the target {self._id}")
-            prev_ctx = handle_return(cuda.cuCtxPopCurrent())
-            handle_return(cuda.cuCtxPushCurrent(ctx._handle))
-            self._has_inited = True
-            if int(prev_ctx) != 0:
-                return Context._from_ctx(prev_ctx, self._id)
-        else:
-            ctx = handle_return(cuda.cuCtxGetCurrent())
-            if int(ctx) == 0:
-                # use primary ctx
-                ctx = handle_return(cuda.cuDevicePrimaryCtxRetain(self._id))
-                handle_return(cuda.cuCtxPushCurrent(ctx))
-            else:
-                ctx_id = handle_return(cuda.cuCtxGetDevice())
-                if ctx_id != self._id:
-                    # use primary ctx
-                    ctx = handle_return(cuda.cuDevicePrimaryCtxRetain(self._id))
-                    handle_return(cuda.cuCtxPushCurrent(ctx))
-                else:
-                    # no-op, a valid context already exists and is set current
-                    pass
-            self._has_inited = True
-
-    def create_context(self, options: ContextOptions = None) -> Context:
-        # Create a Context object (but do NOT set it current yet!).
-        # ContextOptions is a dataclass for setting e.g. affinity or CIG
-        # options. 
-        raise NotImplementedError("TODO")
-
-    @precondition(_check_context_initialized)
-    def create_stream(self, obj=None, options: StreamOptions=None) -> Stream:
-        # Create a Stream object by either holding a newly created
-        # CUDA stream or wrapping an existing foreign object supporting
-        # the __cuda_stream__ protocol. In the latter case, a reference
-        # to obj is held internally so that its lifetime is managed.
-        return Stream._init(obj=obj, options=options)
-
-    @precondition(_check_context_initialized)
-    def allocate(self, size, stream=None) -> Buffer:
-        if stream is None:
-            stream = default_stream()
-        return self._mr.allocate(size, stream)
-
-    @precondition(_check_context_initialized)
-    def sync(self):
-        handle_return(cudart.cudaDeviceSynchronize())
diff --git a/cuda_core/cuda/core/_dlpack.pxd b/cuda_core/cuda/core/_dlpack.pxd
deleted file mode 100644
index 1868287a..00000000
--- a/cuda_core/cuda/core/_dlpack.pxd
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-cimport cpython
-
-from libc cimport stdlib
-from libc.stdint cimport uint8_t
-from libc.stdint cimport uint16_t
-from libc.stdint cimport uint32_t
-from libc.stdint cimport int32_t
-from libc.stdint cimport int64_t
-from libc.stdint cimport uint64_t
-from libc.stdint cimport intptr_t
-
-
-cdef extern from "dlpack.h" nogil:
-    """
-    #define DLPACK_TENSOR_UNUSED_NAME "dltensor"
-    #define DLPACK_VERSIONED_TENSOR_UNUSED_NAME "dltensor_versioned"
-    #define DLPACK_TENSOR_USED_NAME "used_dltensor"
-    #define DLPACK_VERSIONED_TENSOR_USED_NAME "used_dltensor_versioned"
-    """
-    ctypedef enum _DLDeviceType "DLDeviceType":
-        _kDLCPU "kDLCPU"
-        _kDLCUDA "kDLCUDA"
-        _kDLCUDAHost "kDLCUDAHost"
-        _kDLCUDAManaged "kDLCUDAManaged"
-
-    ctypedef struct DLDevice:
-        _DLDeviceType device_type
-        int32_t device_id
-
-    cdef enum DLDataTypeCode:
-        kDLInt
-        kDLUInt
-        kDLFloat
-        kDLBfloat
-        kDLComplex
-        kDLBool
-
-    ctypedef struct DLDataType:
-        uint8_t code
-        uint8_t bits
-        uint16_t lanes
-
-    ctypedef struct DLTensor:
-        void* data
-        DLDevice device
-        int32_t ndim
-        DLDataType dtype
-        int64_t* shape
-        int64_t* strides
-        uint64_t byte_offset
-
-    ctypedef struct DLManagedTensor:
-        DLTensor dl_tensor
-        void* manager_ctx
-        void (*deleter)(DLManagedTensor*)
-
-    ctypedef struct DLPackVersion:
-        uint32_t major
-        uint32_t minor
-
-    ctypedef struct DLManagedTensorVersioned:
-        DLPackVersion version
-        void* manager_ctx
-        void (*deleter)(DLManagedTensorVersioned*)
-        uint64_t flags
-        DLTensor dl_tensor
-
-    int DLPACK_MAJOR_VERSION
-    int DLPACK_MINOR_VERSION
-    int DLPACK_FLAG_BITMASK_READ_ONLY
-
-    const char* DLPACK_TENSOR_UNUSED_NAME
-    const char* DLPACK_VERSIONED_TENSOR_UNUSED_NAME
-    const char* DLPACK_TENSOR_USED_NAME
-    const char* DLPACK_VERSIONED_TENSOR_USED_NAME
diff --git a/cuda_core/cuda/core/_dlpack.pyx b/cuda_core/cuda/core/_dlpack.pyx
deleted file mode 100644
index dda08eab..00000000
--- a/cuda_core/cuda/core/_dlpack.pyx
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from enum import IntEnum
-
-
-cdef void pycapsule_deleter(object capsule) noexcept:
-    cdef DLManagedTensor* dlm_tensor
-    cdef DLManagedTensorVersioned* dlm_tensor_ver
-    # Do not invoke the deleter on a used capsule.
-    if cpython.PyCapsule_IsValid(
-            capsule, DLPACK_TENSOR_UNUSED_NAME):
-        dlm_tensor = <DLManagedTensor*>(
-            cpython.PyCapsule_GetPointer(
-                capsule, DLPACK_TENSOR_UNUSED_NAME))
-        if dlm_tensor.deleter:
-            dlm_tensor.deleter(dlm_tensor)
-    elif cpython.PyCapsule_IsValid(
-            capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
-        dlm_tensor_ver = <DLManagedTensorVersioned*>(
-            cpython.PyCapsule_GetPointer(
-                capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME))
-        if dlm_tensor_ver.deleter:
-            dlm_tensor_ver.deleter(dlm_tensor_ver)
-
-
-cdef void deleter(DLManagedTensor* tensor) noexcept with gil:
-    stdlib.free(tensor.dl_tensor.shape)
-    if tensor.manager_ctx:
-        cpython.Py_DECREF(<object>tensor.manager_ctx)
-        tensor.manager_ctx = NULL
-    stdlib.free(tensor)
-
-
-cdef void versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil:
-    stdlib.free(tensor.dl_tensor.shape)
-    if tensor.manager_ctx:
-        cpython.Py_DECREF(<object>tensor.manager_ctx)
-        tensor.manager_ctx = NULL
-    stdlib.free(tensor)
-
-
-cpdef object make_py_capsule(object buf, bint versioned):
-    cdef DLManagedTensor* dlm_tensor
-    cdef DLManagedTensorVersioned* dlm_tensor_ver
-    cdef DLTensor* dl_tensor
-    cdef void* tensor_ptr
-    cdef const char* capsule_name
-
-    if versioned:
-        dlm_tensor_ver = <DLManagedTensorVersioned*>(
-            stdlib.malloc(sizeof(DLManagedTensorVersioned)))
-        dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION
-        dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION
-        dlm_tensor_ver.manager_ctx = <void*>buf
-        dlm_tensor_ver.deleter = versioned_deleter
-        dlm_tensor_ver.flags = 0
-        dl_tensor = &dlm_tensor_ver.dl_tensor
-        tensor_ptr = dlm_tensor_ver
-        capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME
-    else:
-        dlm_tensor = <DLManagedTensor*>(
-            stdlib.malloc(sizeof(DLManagedTensor)))
-        dl_tensor = &dlm_tensor.dl_tensor
-        dlm_tensor.manager_ctx = <void*>buf
-        dlm_tensor.deleter = deleter
-        tensor_ptr = dlm_tensor
-        capsule_name = DLPACK_TENSOR_UNUSED_NAME
-
-    dl_tensor.data = <void*><intptr_t>(int(buf.handle))
-    dl_tensor.ndim = 1
-    cdef int64_t* shape_strides = \
-        <int64_t*>stdlib.malloc(sizeof(int64_t) * 2)
-    shape_strides[0] = <int64_t>buf.size
-    shape_strides[1] = 1  # redundant
-    dl_tensor.shape = shape_strides
-    dl_tensor.strides = NULL
-    dl_tensor.byte_offset = 0
-
-    cdef DLDevice* device = &dl_tensor.device
-    # buf should be a Buffer instance
-    if buf.is_device_accessible and not buf.is_host_accessible:
-        device.device_type = _kDLCUDA
-        device.device_id = buf.device_id
-    elif buf.is_device_accessible and buf.is_host_accessible:
-        device.device_type = _kDLCUDAHost
-        device.device_id = 0
-    elif not buf.is_device_accessible and buf.is_host_accessible:
-        device.device_type = _kDLCPU
-        device.device_id = 0
-    else:  # not buf.is_device_accessible and not buf.is_host_accessible
-        raise BufferError("invalid buffer")
-
-    cdef DLDataType* dtype = &dl_tensor.dtype
-    dtype.code = <uint8_t>kDLInt
-    dtype.lanes = <uint16_t>1
-    dtype.bits = <uint8_t>8
-
-    cpython.Py_INCREF(buf)
-    return cpython.PyCapsule_New(tensor_ptr, capsule_name, pycapsule_deleter)
-
-
-class DLDeviceType(IntEnum):
-    kDLCPU = _kDLCPU
-    kDLCUDA = _kDLCUDA
-    kDLCUDAHost = _kDLCUDAHost
-    kDLCUDAManaged = _kDLCUDAManaged
diff --git a/cuda_core/cuda/core/_event.py b/cuda_core/cuda/core/_event.py
deleted file mode 100644
index 5fbacae1..00000000
--- a/cuda_core/cuda/core/_event.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from dataclasses import dataclass
-from typing import Optional
-
-from cuda import cuda
-from cuda.core._utils import check_or_create_options
-from cuda.core._utils import CUDAError
-from cuda.core._utils import handle_return
-
-
-@dataclass
-class EventOptions:
-    enable_timing: Optional[bool] = False
-    busy_waited_sync: Optional[bool] = False
-    support_ipc: Optional[bool] = False
-
-
-class Event:
-
-    __slots__ = ("_handle", "_timing_disabled", "_busy_waited")
-
-    def __init__(self):
-        # minimal requirements for the destructor
-        self._handle = None
-        raise NotImplementedError(
-            "directly creating an Event object can be ambiguous. Please call "
-            "call Stream.record().")
-
-    @staticmethod
-    def _init(options: Optional[EventOptions]=None):
-        self = Event.__new__(Event)
-        # minimal requirements for the destructor
-        self._handle = None
-
-        options = check_or_create_options(EventOptions, options, "Event options")
-        flags = 0x0
-        self._timing_disabled = False
-        self._busy_waited = False
-        if not options.enable_timing:
-            flags |= cuda.CUevent_flags.CU_EVENT_DISABLE_TIMING
-            self._timing_disabled = True
-        if options.busy_waited_sync:
-            flags |= cuda.CUevent_flags.CU_EVENT_BLOCKING_SYNC
-            self._busy_waited = True
-        if options.support_ipc:
-            raise NotImplementedError("TODO")
-        self._handle = handle_return(cuda.cuEventCreate(flags))
-        return self
-
-    def __del__(self):
-        self.close()
-
-    def close(self):
-        # Destroy the event.
-        if self._handle:
-            handle_return(cuda.cuEventDestroy(self._handle))
-            self._handle = None
-
-    @property
-    def is_timing_disabled(self) -> bool:
-        # Check if this instance can be used for the timing purpose.
-        return self._timing_disabled
-
-    @property
-    def is_sync_busy_waited(self) -> bool:
-        # Check if the event synchronization would keep the CPU busy-waiting.
-        return self._busy_waited
-
-    @property
-    def is_ipc_supported(self) -> bool:
-        # Check if this instance can be used for IPC.
-        raise NotImplementedError("TODO")
-
-    def sync(self):
-        # Sync over the event.
-        handle_return(cuda.cuEventSynchronize(self._handle))
-
-    @property
-    def is_done(self) -> bool:
-        # Return True if all captured works have been completed,
-        # otherwise False.
-        result, = cuda.cuEventQuery(self._handle)
-        if result == cuda.CUresult.CUDA_SUCCESS:
-            return True
-        elif result == cuda.CUresult.CUDA_ERROR_NOT_READY:
-            return False
-        else:
-            raise CUDAError(f"unexpected error: {result}")
-
-    @property
-    def handle(self) -> int:
-        return int(self._handle)
diff --git a/cuda_core/cuda/core/_kernel_arg_handler.pyx b/cuda_core/cuda/core/_kernel_arg_handler.pyx
deleted file mode 100644
index f2d392a8..00000000
--- a/cuda_core/cuda/core/_kernel_arg_handler.pyx
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cpython.mem cimport PyMem_Malloc, PyMem_Free
-from libc.stdint cimport (intptr_t,
-                          int8_t, int16_t, int32_t, int64_t,
-                          uint8_t, uint16_t, uint32_t, uint64_t,)
-from libcpp cimport bool as cpp_bool
-from libcpp.complex cimport complex as cpp_complex
-from libcpp cimport nullptr
-from libcpp cimport vector
-
-import ctypes
-
-import numpy
-
-from cuda.core._memory import Buffer
-
-
-ctypedef cpp_complex.complex[float] cpp_single_complex
-ctypedef cpp_complex.complex[double] cpp_double_complex
-
-
-ctypedef fused supported_type:
-    cpp_bool
-    int8_t
-    int16_t
-    int32_t
-    int64_t
-    uint8_t
-    uint16_t
-    uint32_t
-    uint64_t
-    float
-    double
-    intptr_t
-    cpp_single_complex
-    cpp_double_complex
-
-
-# cache ctypes/numpy type objects to avoid attribute access
-cdef object ctypes_bool = ctypes.c_bool
-cdef object ctypes_int8 = ctypes.c_int8
-cdef object ctypes_int16 = ctypes.c_int16
-cdef object ctypes_int32 = ctypes.c_int32
-cdef object ctypes_int64 = ctypes.c_int64
-cdef object ctypes_uint8 = ctypes.c_uint8
-cdef object ctypes_uint16 = ctypes.c_uint16
-cdef object ctypes_uint32 = ctypes.c_uint32
-cdef object ctypes_uint64 = ctypes.c_uint64
-cdef object ctypes_float = ctypes.c_float
-cdef object ctypes_double = ctypes.c_double
-cdef object numpy_bool = numpy.bool_
-cdef object numpy_int8 = numpy.int8
-cdef object numpy_int16 = numpy.int16
-cdef object numpy_int32 = numpy.int32
-cdef object numpy_int64 = numpy.int64
-cdef object numpy_uint8 = numpy.uint8
-cdef object numpy_uint16 = numpy.uint16
-cdef object numpy_uint32 = numpy.uint32
-cdef object numpy_uint64 = numpy.uint64
-cdef object numpy_float16 = numpy.float16
-cdef object numpy_float32 = numpy.float32
-cdef object numpy_float64 = numpy.float64
-cdef object numpy_complex64 = numpy.complex64
-cdef object numpy_complex128 = numpy.complex128
-
-
-# limitation due to cython/cython#534
-ctypedef void* voidptr
-
-
-# Cython can't infer the overload without at least one input argument with fused type
-cdef inline int prepare_arg(
-        vector.vector[void*]& data,
-        vector.vector[void*]& data_addresses,
-        arg,  # important: keep it a Python object and don't cast
-        const size_t idx,
-        const supported_type* __unused=NULL) except -1:
-    cdef void* ptr = PyMem_Malloc(sizeof(supported_type))
-    # note: this should also work once ctypes has complex support:
-    # python/cpython#121248
-    if supported_type is cpp_single_complex:
-        (<supported_type*>ptr)[0] = cpp_complex.complex[float](arg.real, arg.imag)
-    elif supported_type is cpp_double_complex:
-        (<supported_type*>ptr)[0] = cpp_complex.complex[double](arg.real, arg.imag)
-    else:
-        (<supported_type*>ptr)[0] = <supported_type>(arg)
-    data_addresses[idx] = ptr  # take the address to the scalar
-    data[idx] = ptr  # for later dealloc
-    return 0
-
-
-cdef inline int prepare_ctypes_arg(
-        vector.vector[void*]& data,
-        vector.vector[void*]& data_addresses,
-        arg,
-        const size_t idx) except -1:
-    if isinstance(arg, ctypes_bool):
-        return prepare_arg[cpp_bool](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_int8):
-        return prepare_arg[int8_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_int16):
-        return prepare_arg[int16_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_int32):
-        return prepare_arg[int32_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_int64):
-        return prepare_arg[int64_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_uint8):
-        return prepare_arg[uint8_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_uint16):
-        return prepare_arg[uint16_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_uint32):
-        return prepare_arg[uint32_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_uint64):
-        return prepare_arg[uint64_t](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_float):
-        return prepare_arg[float](data, data_addresses, arg.value, idx)
-    elif isinstance(arg, ctypes_double):
-        return prepare_arg[double](data, data_addresses, arg.value, idx)
-    else:
-        return 1
-
-
-cdef inline int prepare_numpy_arg(
-        vector.vector[void*]& data,
-        vector.vector[void*]& data_addresses,
-        arg,
-        const size_t idx) except -1:
-    if isinstance(arg, numpy_bool):
-        return prepare_arg[cpp_bool](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_int8):
-        return prepare_arg[int8_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_int16):
-        return prepare_arg[int16_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_int32):
-        return prepare_arg[int32_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_int64):
-        return prepare_arg[int64_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_uint8):
-        return prepare_arg[uint8_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_uint16):
-        return prepare_arg[uint16_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_uint32):
-        return prepare_arg[uint32_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_uint64):
-        return prepare_arg[uint64_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_float16):
-        # use int16 as a proxy
-        return prepare_arg[int16_t](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_float32):
-        return prepare_arg[float](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_float64):
-        return prepare_arg[double](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_complex64):
-        return prepare_arg[cpp_single_complex](data, data_addresses, arg, idx)
-    elif isinstance(arg, numpy_complex128):
-        return prepare_arg[cpp_double_complex](data, data_addresses, arg, idx)
-    else:
-        return 1
-
-
-cdef class ParamHolder:
-
-    cdef:
-        vector.vector[void*] data
-        vector.vector[void*] data_addresses
-        object kernel_args
-        readonly intptr_t ptr
-
-    def __init__(self, kernel_args):
-        if len(kernel_args) == 0:
-            self.ptr = 0
-            return
-
-        cdef size_t n_args = len(kernel_args)
-        cdef size_t i
-        cdef int not_prepared
-        self.data = vector.vector[voidptr](n_args, nullptr)
-        self.data_addresses = vector.vector[voidptr](n_args)
-        for i, arg in enumerate(kernel_args):
-            if isinstance(arg, Buffer):
-                # we need the address of where the actual buffer address is stored
-                self.data_addresses[i] = <void*><intptr_t>(arg._ptr.getPtr())
-                continue
-            elif isinstance(arg, int):
-                # Here's the dilemma: We want to have a fast path to pass in Python
-                # integers as pointer addresses, but one could also (mistakenly) pass
-                # it with the intention of passing a scalar integer. It's a mistake
-                # bacause a Python int is ambiguous (arbitrary width). Our judgement
-                # call here is to treat it as a pointer address, without any warning!
-                prepare_arg[intptr_t](self.data, self.data_addresses, arg, i)
-                continue
-            elif isinstance(arg, float):
-                prepare_arg[double](self.data, self.data_addresses, arg, i)
-                continue
-            elif isinstance(arg, complex):
-                prepare_arg[cpp_double_complex](self.data, self.data_addresses, arg, i)
-                continue
-            elif isinstance(arg, bool):
-                prepare_arg[cpp_bool](self.data, self.data_addresses, arg, i)
-                continue
-
-            not_prepared = prepare_numpy_arg(self.data, self.data_addresses, arg, i)
-            if not_prepared:
-                not_prepared = prepare_ctypes_arg(self.data, self.data_addresses, arg, i)
-            if not_prepared:
-                # TODO: support ctypes/numpy struct
-                raise TypeError
-
-        self.kernel_args = kernel_args
-        self.ptr = <intptr_t>self.data_addresses.data()
-
-    def __dealloc__(self):
-        for data in self.data:
-            if data:
-                PyMem_Free(data)
diff --git a/cuda_core/cuda/core/_launcher.py b/cuda_core/cuda/core/_launcher.py
deleted file mode 100644
index 03d7fc08..00000000
--- a/cuda_core/cuda/core/_launcher.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from dataclasses import dataclass
-from typing import Optional, Union
-
-import numpy as np
-
-from cuda import cuda, cudart
-from cuda.core._kernel_arg_handler import ParamHolder
-from cuda.core._memory import Buffer
-from cuda.core._module import Kernel
-from cuda.core._stream import Stream
-from cuda.core._utils import CUDAError, check_or_create_options, handle_return
-
-
-@dataclass
-class LaunchConfig:
-    """
-    """
-    grid: Union[tuple, int] = None
-    block: Union[tuple, int] = None
-    stream: Stream = None
-    shmem_size: Optional[int] = None
-
-    def __post_init__(self):
-        self.grid = self._cast_to_3_tuple(self.grid)
-        self.block = self._cast_to_3_tuple(self.block)
-        # we handle "stream=None" in the launch API
-        if self.stream is not None:
-            if not isinstance(self.stream, Stream):
-                try:
-                    self.stream = Stream._init(self.stream)
-                except Exception as e:
-                    raise ValueError(
-                        "stream must either be a Stream object "
-                        "or support __cuda_stream__") from e
-        if self.shmem_size is None:
-            self.shmem_size = 0
-
-    def _cast_to_3_tuple(self, cfg):
-        if isinstance(cfg, int):
-            if cfg < 1:
-                raise ValueError
-            return (cfg, 1, 1)
-        elif isinstance(cfg, tuple):
-            size = len(cfg)
-            if size == 1:
-                cfg = cfg[0]
-                if cfg < 1:
-                    raise ValueError
-                return (cfg, 1, 1)
-            elif size == 2:
-                if cfg[0] < 1 or cfg[1] < 1:
-                    raise ValueError
-                return (*cfg, 1)
-            elif size == 3:
-                if cfg[0] < 1 or cfg[1] < 1 or cfg[2] < 1:
-                    raise ValueError
-                return cfg
-        else:
-            raise ValueError
-
-
-def launch(kernel, config, *kernel_args):
-    if not isinstance(kernel, Kernel):
-        raise ValueError
-    config = check_or_create_options(LaunchConfig, config, "launch config")
-    # TODO: can we ensure kernel_args is valid/safe to use here?
-
-    driver_ver = handle_return(cuda.cuDriverGetVersion())
-    if driver_ver >= 12000:
-        drv_cfg = cuda.CUlaunchConfig()
-        drv_cfg.gridDimX, drv_cfg.gridDimY, drv_cfg.gridDimZ = config.grid
-        drv_cfg.blockDimX, drv_cfg.blockDimY, drv_cfg.blockDimZ = config.block
-        if config.stream is None:
-            raise CUDAError("stream cannot be None")
-        drv_cfg.hStream = config.stream._handle
-        drv_cfg.sharedMemBytes = config.shmem_size
-        drv_cfg.numAttrs = 0  # FIXME
-
-        # TODO: merge with HelperKernelParams?
-        kernel_args = ParamHolder(kernel_args)
-        args_ptr = kernel_args.ptr
-
-        handle_return(cuda.cuLaunchKernelEx(
-            drv_cfg, int(kernel._handle), args_ptr, 0))
-    else:
-        raise NotImplementedError("TODO")
diff --git a/cuda_core/cuda/core/_memory.py b/cuda_core/cuda/core/_memory.py
deleted file mode 100644
index 0d5dd0d1..00000000
--- a/cuda_core/cuda/core/_memory.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from __future__ import annotations
-
-import abc
-from typing import Optional, Tuple, TypeVar
-import warnings
-
-from cuda import cuda
-from cuda.core._dlpack import DLDeviceType, make_py_capsule
-from cuda.core._stream import default_stream
-from cuda.core._utils import handle_return
-
-
-PyCapsule = TypeVar("PyCapsule")
-
-
-# TODO: define a memory property mixin class and make Buffer and
-# MemoryResource both inherit from it
-
-
-class Buffer:
-
-    # TODO: handle ownership? (_mr could be None)
-    __slots__ = ("_ptr", "_size", "_mr",)
-
-    def __init__(self, ptr, size, mr: MemoryResource=None):
-        self._ptr = ptr
-        self._size = size
-        self._mr = mr
-
-    def __del__(self):
-        self.close(default_stream())
-
-    def close(self, stream=None):
-        if self._ptr and self._mr is not None:
-            if stream is None:
-                stream = default_stream()
-            self._mr.deallocate(self._ptr, self._size, stream)
-            self._ptr = 0
-            self._mr = None
-
-    @property
-    def handle(self):
-        return self._ptr
-
-    @property
-    def size(self):
-        return self._size
-
-    @property
-    def memory_resource(self) -> MemoryResource:
-        # Return the memory resource from which this buffer was allocated.
-        return self._mr
-
-    @property
-    def is_device_accessible(self) -> bool:
-        # Check if this buffer can be accessed from GPUs.
-        if self._mr is not None:
-            return self._mr.is_device_accessible
-        raise NotImplementedError
-
-    @property
-    def is_host_accessible(self) -> bool:
-        # Check if this buffer can be accessed from CPUs.
-        if self._mr is not None:
-            return self._mr.is_host_accessible
-        raise NotImplementedError
-
-    @property
-    def device_id(self) -> int:
-        if self._mr is not None:
-            return self._mr.device_id
-        raise NotImplementedError
-
-    def copy_to(self, dst: Buffer=None, *, stream) -> Buffer:
-        # Copy from this buffer to the dst buffer asynchronously on the
-        # given stream. The dst buffer is returned. If the dst is not provided,
-        # allocate one from self.memory_resource. Raise an exception if the
-        # stream is not provided.
-        if stream is None:
-            raise ValueError("stream must be provided")
-        if dst is None:
-            if self._mr is None:
-                raise ValueError("a destination buffer must be provided")
-            dst = self._mr.allocate(self._size, stream)
-        if dst._size != self._size:
-            raise ValueError("buffer sizes mismatch between src and dst")
-        handle_return(
-            cuda.cuMemcpyAsync(dst._ptr, self._ptr, self._size, stream._handle))
-        return dst
-
-    def copy_from(self, src: Buffer, *, stream):
-        # Copy from the src buffer to this buffer asynchronously on the
-        # given stream. Raise an exception if the stream is not provided. 
-        if stream is None:
-            raise ValueError("stream must be provided")
-        if src._size != self._size:
-            raise ValueError("buffer sizes mismatch between src and dst")
-        handle_return(
-            cuda.cuMemcpyAsync(self._ptr, src._ptr, self._size, stream._handle))
-
-    def __dlpack__(self, *,
-                   stream: Optional[int] = None,
-                   max_version: Optional[Tuple[int, int]] = None, 
-                   dl_device: Optional[Tuple[int, int]] = None, 
-                   copy: Optional[bool] = None) -> PyCapsule:
-        # Note: we ignore the stream argument entirely (as if it is -1).
-        # It is the user's responsibility to maintain stream order.
-        if dl_device is not None or copy is True:
-            raise BufferError
-        if max_version is None:
-            versioned = False
-        else:
-            assert len(max_version) == 2
-            if max_version >= (1, 0):
-                versioned = True
-            else:
-                versioned = False
-        capsule = make_py_capsule(self, versioned)
-        return capsule
-
-    def __dlpack_device__(self) -> Tuple[int, int]:
-        if self.is_device_accessible and not self.is_host_accessible:
-            return (DLDeviceType.kDLCUDA, self.device_id)
-        elif self.is_device_accessible and self.is_host_accessible:
-            # TODO: this can also be kDLCUDAManaged, we need more fine-grained checks
-            return (DLDeviceType.kDLCUDAHost, 0)
-        elif not self.is_device_accessible and self.is_host_accessible:
-            return (DLDeviceType.kDLCPU, 0)
-        else:  # not self.is_device_accessible and not self.is_host_accessible
-            raise BufferError("invalid buffer")
-
-    def __buffer__(self, flags: int, /) -> memoryview:
-        # Support for Python-level buffer protocol as per PEP 688. 
-        # This raises a BufferError unless: 
-        #   1. Python is 3.12+
-        #   2. This Buffer object is host accessible 
-        raise NotImplementedError("TODO")
-
-    def __release_buffer__(self, buffer: memoryview, /):
-        # Supporting methond paired with __buffer__.
-        raise NotImplementedError("TODO")
-
-
-class MemoryResource(abc.ABC):
-
-    __slots__ = ("_handle",)
-
-    @abc.abstractmethod
-    def __init__(self, *args, **kwargs):
-        ...
-
-    @abc.abstractmethod
-    def allocate(self, size, stream=None) -> Buffer:
-        ...
-
-    @abc.abstractmethod
-    def deallocate(self, ptr, size, stream=None):
-        ...
-
-    @property
-    @abc.abstractmethod
-    def is_device_accessible(self) -> bool:
-        # Check if the buffers allocated from this MR can be accessed from
-        # GPUs.
-        ...
-
-    @property
-    @abc.abstractmethod
-    def is_host_accessible(self) -> bool:
-        # Check if the buffers allocated from this MR can be accessed from
-        # CPUs.
-        ...
-
-    @property
-    @abc.abstractmethod
-    def device_id(self) -> int:
-        # Return the device ID if this MR is for single devices. Raise an
-        # exception if it is not.
-        ...
-
-
-class _DefaultAsyncMempool(MemoryResource):
-
-    __slots__ = ("_dev_id",)
-
-    def __init__(self, dev_id):
-        self._handle = handle_return(cuda.cuDeviceGetMemPool(dev_id))
-        self._dev_id = dev_id
-
-    def allocate(self, size, stream=None) -> Buffer:
-        if stream is None:
-            stream = default_stream()
-        ptr = handle_return(cuda.cuMemAllocFromPoolAsync(size, self._handle, stream._handle))
-        return Buffer(ptr, size, self)
-
-    def deallocate(self, ptr, size, stream=None):
-        if stream is None:
-            stream = default_stream()
-        handle_return(cuda.cuMemFreeAsync(ptr, stream._handle))
-
-    @property
-    def is_device_accessible(self) -> bool:
-        return True
-
-    @property
-    def is_host_accessible(self) -> bool:
-        return False
-
-    @property
-    def device_id(self) -> int:
-        return self._dev_id
-
-
-class _DefaultPinnedMemorySource(MemoryResource):
-
-    def __init__(self):
-        # TODO: support flags from cuMemHostAlloc?
-        self._handle = None
-
-    def allocate(self, size, stream=None) -> Buffer:
-        ptr = handle_return(cuda.cuMemAllocHost(size))
-        return Buffer(ptr, size, self)
-
-    def deallocate(self, ptr, size, stream=None):
-        handle_return(cuda.cuMemFreeHost(ptr))
-
-    @property
-    def is_device_accessible(self) -> bool:
-        return True
-
-    @property
-    def is_host_accessible(self) -> bool:
-        return True
-
-    @property
-    def device_id(self) -> int:
-        raise RuntimeError("the pinned memory resource is not bound to any GPU")
diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx
deleted file mode 100644
index 8f7cc948..00000000
--- a/cuda_core/cuda/core/_memoryview.pyx
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-cimport cython
-
-from ._dlpack cimport *
-
-import functools
-from typing import Any, Optional
-
-from cuda import cuda
-import numpy
-
-from cuda.core._utils import handle_return
-
-
-# TODO(leofang): support NumPy structured dtypes
-
-
-@cython.dataclasses.dataclass
-cdef class StridedMemoryView:
-
-    # TODO: switch to use Cython's cdef typing?
-    ptr: int = None
-    shape: tuple = None
-    strides: tuple = None  # in counts, not bytes
-    dtype: numpy.dtype = None
-    device_id: int = None  # -1 for CPU
-    device_accessible: bool = None
-    readonly: bool = None
-    obj: Any = None
-
-    def __init__(self, obj=None, stream_ptr=None):
-        if obj is not None:
-            # populate self's attributes
-            if check_has_dlpack(obj):
-                view_as_dlpack(obj, stream_ptr, self)
-            else:
-                view_as_cai(obj, stream_ptr, self)
-        else:
-            # default construct
-            pass
-
-    def __repr__(self):
-        return (f"StridedMemoryView(ptr={self.ptr},\n"
-              + f"                  shape={self.shape},\n"
-              + f"                  strides={self.strides},\n"
-              + f"                  dtype={get_simple_repr(self.dtype)},\n"
-              + f"                  device_id={self.device_id},\n"
-              + f"                  device_accessible={self.device_accessible},\n"
-              + f"                  readonly={self.readonly},\n"
-              + f"                  obj={get_simple_repr(self.obj)})")
-
-
-cdef str get_simple_repr(obj):
-    # TODO: better handling in np.dtype objects
-    cdef object obj_class
-    cdef str obj_repr
-    if isinstance(obj, type):
-        obj_class = obj
-    else:
-        obj_class = obj.__class__
-    if obj_class.__module__ in (None, "builtins"):
-        obj_repr = obj_class.__name__
-    else:
-        obj_repr = f"{obj_class.__module__}.{obj_class.__name__}"
-    return obj_repr
-
-
-cdef bint check_has_dlpack(obj) except*:
-    cdef bint has_dlpack
-    if hasattr(obj, "__dlpack__") and hasattr(obj, "__dlpack_device__"):
-        has_dlpack = True
-    elif hasattr(obj, "__cuda_array_interface__"):
-        has_dlpack = False
-    else:
-        raise RuntimeError(
-            "the input object does not support any data exchange protocol")
-    return has_dlpack
-
-
-cdef class _StridedMemoryViewProxy:
-
-    cdef:
-        object obj
-        bint has_dlpack
-
-    def __init__(self, obj):
-        self.obj = obj
-        self.has_dlpack = check_has_dlpack(obj)
-
-    cpdef StridedMemoryView view(self, stream_ptr=None):
-        if self.has_dlpack:
-            return view_as_dlpack(self.obj, stream_ptr)
-        else:
-            return view_as_cai(self.obj, stream_ptr)
-
-
-cdef StridedMemoryView view_as_dlpack(obj, stream_ptr, view=None):
-    cdef int dldevice, device_id, i
-    cdef bint device_accessible, versioned, is_readonly
-    dldevice, device_id = obj.__dlpack_device__()
-    if dldevice == _kDLCPU:
-        device_accessible = False
-        assert device_id == 0
-        if stream_ptr is None:
-            raise BufferError("stream=None is ambiguous with view()")
-        elif stream_ptr == -1:
-            stream_ptr = None
-    elif dldevice == _kDLCUDA:
-        device_accessible = True
-        # no need to check other stream values, it's a pass-through
-        if stream_ptr is None:
-            raise BufferError("stream=None is ambiguous with view()")
-    elif dldevice == _kDLCUDAHost:
-        device_accessible = True
-        assert device_id == 0
-        # just do a pass-through without any checks, as pinned memory can be
-        # accessed on both host and device
-    elif dldevice == _kDLCUDAManaged:
-        device_accessible = True
-        # just do a pass-through without any checks, as managed memory can be
-        # accessed on both host and device
-    else:
-        raise BufferError("device not supported")
-
-    cdef object capsule
-    try:
-        capsule = obj.__dlpack__(
-            stream=stream_ptr,
-            max_version=(DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION))
-        versioned = True
-    except TypeError:
-        capsule = obj.__dlpack__(
-            stream=stream_ptr)
-        versioned = False
-
-    cdef void* data = NULL
-    if versioned and cpython.PyCapsule_IsValid(
-            capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
-        data = cpython.PyCapsule_GetPointer(
-            capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME)
-    elif not versioned and cpython.PyCapsule_IsValid(
-            capsule, DLPACK_TENSOR_UNUSED_NAME):
-        data = cpython.PyCapsule_GetPointer(
-            capsule, DLPACK_TENSOR_UNUSED_NAME)
-    else:
-        assert False
-
-    cdef DLManagedTensor* dlm_tensor
-    cdef DLManagedTensorVersioned* dlm_tensor_ver
-    cdef DLTensor* dl_tensor
-    if versioned:
-        dlm_tensor_ver = <DLManagedTensorVersioned*>data
-        dl_tensor = &dlm_tensor_ver.dl_tensor
-        is_readonly = bool((dlm_tensor_ver.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0)
-    else:
-        dlm_tensor = <DLManagedTensor*>data
-        dl_tensor = &dlm_tensor.dl_tensor
-        is_readonly = False
-
-    cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
-    buf.ptr = <intptr_t>(dl_tensor.data)
-    buf.shape = tuple(int(dl_tensor.shape[i]) for i in range(dl_tensor.ndim))
-    if dl_tensor.strides:
-        buf.strides = tuple(
-            int(dl_tensor.strides[i]) for i in range(dl_tensor.ndim))
-    else:
-        # C-order
-        buf.strides = None
-    buf.dtype = dtype_dlpack_to_numpy(&dl_tensor.dtype)
-    buf.device_id = device_id
-    buf.device_accessible = device_accessible
-    buf.readonly = is_readonly
-    buf.obj = obj
-
-    cdef const char* used_name = (
-        DLPACK_VERSIONED_TENSOR_USED_NAME if versioned else DLPACK_TENSOR_USED_NAME)
-    cpython.PyCapsule_SetName(capsule, used_name)
-
-    return buf
-
-
-cdef object dtype_dlpack_to_numpy(DLDataType* dtype):
-    cdef int bits = dtype.bits
-    if dtype.lanes != 1:
-        # TODO: return a NumPy structured dtype?
-        raise NotImplementedError(
-            f'vector dtypes (lanes={dtype.lanes}) is not supported')
-    if dtype.code == kDLUInt:
-        if bits == 8:
-            np_dtype = numpy.uint8
-        elif bits == 16:
-            np_dtype = numpy.uint16
-        elif bits == 32:
-            np_dtype = numpy.uint32
-        elif bits == 64:
-            np_dtype = numpy.uint64
-        else:
-            raise TypeError('uint{} is not supported.'.format(bits))
-    elif dtype.code == kDLInt:
-        if bits == 8:
-            np_dtype = numpy.int8
-        elif bits == 16:
-            np_dtype = numpy.int16
-        elif bits == 32:
-            np_dtype = numpy.int32
-        elif bits == 64:
-            np_dtype = numpy.int64
-        else:
-            raise TypeError('int{} is not supported.'.format(bits))
-    elif dtype.code == kDLFloat:
-        if bits == 16:
-            np_dtype = numpy.float16
-        elif bits == 32:
-            np_dtype = numpy.float32
-        elif bits == 64:
-            np_dtype = numpy.float64
-        else:
-            raise TypeError('float{} is not supported.'.format(bits))
-    elif dtype.code == kDLComplex:
-        # TODO(leofang): support complex32
-        if bits == 64:
-            np_dtype = numpy.complex64
-        elif bits == 128:
-            np_dtype = numpy.complex128
-        else:
-            raise TypeError('complex{} is not supported.'.format(bits))
-    elif dtype.code == kDLBool:
-        if bits == 8:
-            np_dtype = numpy.bool_
-        else:
-            raise TypeError(f'{bits}-bit bool is not supported')
-    elif dtype.code == kDLBfloat:
-        # TODO(leofang): use ml_dtype.bfloat16?
-        raise NotImplementedError('bfloat is not supported yet')
-    else:
-        raise TypeError('Unsupported dtype. dtype code: {}'.format(dtype.code))
-
-    # We want the dtype object not just the type object
-    return numpy.dtype(np_dtype)
-
-
-cdef StridedMemoryView view_as_cai(obj, stream_ptr, view=None):
-    cdef dict cai_data = obj.__cuda_array_interface__
-    if cai_data["version"] < 3:
-        raise BufferError("only CUDA Array Interface v3 or above is supported")
-    if cai_data.get("mask") is not None:
-        raise BufferError("mask is not supported")
-    if stream_ptr is None:
-        raise BufferError("stream=None is ambiguous with view()")
-
-    cdef StridedMemoryView buf = StridedMemoryView() if view is None else view
-    buf.obj = obj
-    buf.ptr, buf.readonly = cai_data["data"]
-    buf.shape = cai_data["shape"]
-    # TODO: this only works for built-in numeric types
-    buf.dtype = numpy.dtype(cai_data["typestr"])
-    buf.strides = cai_data.get("strides")
-    if buf.strides is not None:
-        # convert to counts
-        buf.strides = tuple(s // buf.dtype.itemsize for s in buf.strides)
-    buf.device_accessible = True
-    buf.device_id = handle_return(
-        cuda.cuPointerGetAttribute(
-            cuda.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
-            buf.ptr))
-
-    cdef intptr_t producer_s, consumer_s
-    stream = cai_data.get("stream")
-    if stream is not None:
-        producer_s = <intptr_t>(stream)
-        consumer_s = <intptr_t>(stream_ptr)
-        assert producer_s > 0
-        # establish stream order
-        if producer_s != consumer_s:
-            e = handle_return(cuda.cuEventCreate(
-                cuda.CUevent_flags.CU_EVENT_DISABLE_TIMING))
-            handle_return(cuda.cuEventRecord(e, producer_s))
-            handle_return(cuda.cuStreamWaitEvent(consumer_s, e, 0))
-            handle_return(cuda.cuEventDestroy(e))
-
-    return buf
-
-
-def viewable(tuple arg_indices):
-    def wrapped_func_with_indices(func):
-        @functools.wraps(func)
-        def wrapped_func(*args, **kwargs):
-            args = list(args)
-            cdef int idx
-            for idx in arg_indices:
-                args[idx] = _StridedMemoryViewProxy(args[idx])
-            return func(*args, **kwargs)
-        return wrapped_func
-    return wrapped_func_with_indices
diff --git a/cuda_core/cuda/core/_module.py b/cuda_core/cuda/core/_module.py
deleted file mode 100644
index 98926363..00000000
--- a/cuda_core/cuda/core/_module.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cuda import cuda, cudart
-from cuda.core._utils import handle_return
-
-
-_backend = {
-    "new": {
-        "file": cuda.cuLibraryLoadFromFile,
-        "data": cuda.cuLibraryLoadData,
-        "kernel": cuda.cuLibraryGetKernel,
-    },
-    "old": {
-        "file": cuda.cuModuleLoad,
-        "data": cuda.cuModuleLoadDataEx,
-        "kernel": cuda.cuModuleGetFunction,
-    },
-}
-
-
-class Kernel:
-
-    __slots__ = ("_handle", "_module",)
-
-    def __init__(self):
-        raise NotImplementedError("directly constructing a Kernel instance is not supported")
-
-    @staticmethod
-    def _from_obj(obj, mod):
-        assert isinstance(obj, (cuda.CUkernel, cuda.CUfunction))
-        assert isinstance(mod, ObjectCode)
-        ker = Kernel.__new__(Kernel)
-        ker._handle = obj
-        ker._module = mod
-        return ker
-
-
-class ObjectCode:
-
-    __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map")
-    _supported_code_type = ("cubin", "ptx", "fatbin")
-
-    def __init__(self, module, code_type, jit_options=None, *,
-                 symbol_mapping=None):
-        if code_type not in self._supported_code_type:
-            raise ValueError
-        self._handle = None
-
-        driver_ver = handle_return(cuda.cuDriverGetVersion())
-        self._loader = _backend["new"] if driver_ver >= 12000 else _backend["old"]
-
-        if isinstance(module, str):
-            if driver_ver < 12000 and jit_options is not None:
-                raise ValueError
-            module = module.encode()
-            self._handle = handle_return(self._loader["file"](module))
-        else:
-            assert isinstance(module, bytes)
-            if jit_options is None:
-                jit_options = {}
-            if driver_ver >= 12000:
-                args = (module, list(jit_options.keys()), list(jit_options.values()), len(jit_options),
-                        # TODO: support library options
-                        [], [], 0)
-            else:
-                args = (module, len(jit_options), jit_options.keys(), jit_options.values())
-            self._handle = handle_return(self._loader["data"](*args))
-
-        self._code_type = code_type
-        self._module = module
-        self._sym_map = {} if symbol_mapping is None else symbol_mapping
-
-    def __del__(self):
-        # TODO: do we want to unload? Probably not..
-        pass
-
-    def get_kernel(self, name):
-        try:
-            name = self._sym_map[name]
-        except KeyError:
-            name = name.encode()
-        data = handle_return(self._loader["kernel"](self._handle, name))
-        return Kernel._from_obj(data, self)
diff --git a/cuda_core/cuda/core/_program.py b/cuda_core/cuda/core/_program.py
deleted file mode 100644
index 0c0f02d7..00000000
--- a/cuda_core/cuda/core/_program.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cuda import nvrtc
-from cuda.core._utils import handle_return
-from cuda.core._module import ObjectCode
-
-
-class Program:
-
-    __slots__ = ("_handle", "_backend", )
-    _supported_code_type = ("c++", )
-    _supported_target_type = ("ptx", "cubin", "ltoir", )
-
-    def __init__(self, code, code_type):
-        if code_type not in self._supported_code_type:
-            raise NotImplementedError
-        self._handle = None
-
-        if code_type.lower() == "c++":
-            if not isinstance(code, str):
-                raise TypeError
-            # TODO: support pre-loaded headers & include names
-            # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
-            self._handle = handle_return(
-                nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
-            self._backend = "nvrtc"
-        else:
-            raise NotImplementedError
-
-    def __del__(self):
-        self.close()
-
-    def close(self):
-        if self._handle is not None:
-            handle_return(nvrtc.nvrtcDestroyProgram(self._handle))
-            self._handle = None
-
-    def compile(self, target_type, options=(), name_expressions=(), logs=None):
-        if target_type not in self._supported_target_type:
-            raise NotImplementedError
-
-        if self._backend == "nvrtc":
-            if name_expressions:
-                for n in name_expressions:
-                    handle_return(
-                        nvrtc.nvrtcAddNameExpression(self._handle, n.encode()),
-                        handle=self._handle)
-            # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved
-            options = list(o.encode() for o in options)
-            handle_return(
-                nvrtc.nvrtcCompileProgram(self._handle, len(options), options),
-                handle=self._handle)
-
-            size_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}Size")
-            comp_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}")
-            size = handle_return(size_func(self._handle), handle=self._handle)
-            data = b" " * size
-            handle_return(comp_func(self._handle, data), handle=self._handle)
-
-            symbol_mapping = {}
-            if name_expressions:
-                for n in name_expressions:
-                    symbol_mapping[n] = handle_return(nvrtc.nvrtcGetLoweredName(
-                        self._handle, n.encode()))
-
-            if logs is not None:
-                logsize = handle_return(nvrtc.nvrtcGetProgramLogSize(self._handle))
-                if logsize > 1:
-                    log = b" " * logsize
-                    handle_return(nvrtc.nvrtcGetProgramLog(self._handle, log))
-                    logs.write(log.decode())
-
-            # TODO: handle jit_options for ptx?
-
-            return ObjectCode(data, target_type, symbol_mapping=symbol_mapping)
-
-    @property
-    def backend(self):
-        return self._backend
-
-    @property
-    def handle(self):
-        return self._handle
diff --git a/cuda_core/cuda/core/_stream.py b/cuda_core/cuda/core/_stream.py
deleted file mode 100644
index e815f9a8..00000000
--- a/cuda_core/cuda/core/_stream.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-import os
-from typing import Optional, Tuple, TYPE_CHECKING, Union
-
-if TYPE_CHECKING:
-    from cuda.core._device import Device
-from cuda import cuda, cudart
-from cuda.core._context import Context
-from cuda.core._event import Event, EventOptions
-from cuda.core._utils import check_or_create_options
-from cuda.core._utils import get_device_from_ctx
-from cuda.core._utils import handle_return
-
-
-@dataclass
-class StreamOptions:
-
-    nonblocking: bool = True
-    priority: Optional[int] = None
-
-
-class Stream:
-
-    __slots__ = ("_handle", "_nonblocking", "_priority", "_owner", "_builtin",
-                 "_device_id", "_ctx_handle")
-
-    def __init__(self):
-        # minimal requirements for the destructor
-        self._handle = None
-        self._owner = None
-        self._builtin = False
-        raise NotImplementedError(
-            "directly creating a Stream object can be ambiguous. Please either "
-            "call Device.create_stream() or, if a stream pointer is already "
-            "available from somewhere else, Stream.from_handle()")
-
-    @staticmethod
-    def _init(obj=None, *, options: Optional[StreamOptions]=None):
-        self = Stream.__new__(Stream)
-
-        # minimal requirements for the destructor
-        self._handle = None
-        self._owner = None
-        self._builtin = False
-
-        if obj is not None and options is not None:
-            raise ValueError("obj and options cannot be both specified")
-        if obj is not None:
-            if not hasattr(obj, "__cuda_stream__"):
-                raise ValueError
-            info = obj.__cuda_stream__
-            assert info[0] == 0
-            self._handle = cuda.CUstream(info[1])
-            # TODO: check if obj is created under the current context/device
-            self._owner = obj
-            self._nonblocking = None  # delayed
-            self._priority = None  # delayed
-            self._device_id = None  # delayed
-            self._ctx_handle = None  # delayed
-            return self
-
-        options = check_or_create_options(StreamOptions, options, "Stream options")
-        nonblocking = options.nonblocking
-        priority = options.priority
-
-        if nonblocking:
-            flags = cuda.CUstream_flags.CU_STREAM_NON_BLOCKING
-        else:
-            flags = cuda.CUstream_flags.CU_STREAM_DEFAULT
-
-        if priority is not None:
-            high, low = handle_return(
-                cudart.cudaDeviceGetStreamPriorityRange())
-            if not (low <= priority <= high):
-                raise ValueError(f"{priority=} is out of range {[low, high]}")
-        else:
-            priority = 0
-
-        self._handle = handle_return(
-            cuda.cuStreamCreateWithPriority(flags, priority))
-        self._owner = None
-        self._nonblocking = nonblocking
-        self._priority = priority
-        # don't defer this because we will have to pay a cost for context
-        # switch later
-        self._device_id = int(handle_return(cuda.cuCtxGetDevice()))
-        self._ctx_handle = None  # delayed
-        return self
-
-    def __del__(self):
-        self.close()
-
-    def close(self):
-        if self._owner is None:
-            if self._handle and not self._builtin:
-                handle_return(cuda.cuStreamDestroy(self._handle))
-        else:
-            self._owner = None
-        self._handle = None
-
-    @property
-    def __cuda_stream__(self) -> Tuple[int, int]:
-        return (0, int(self._handle))
-
-    @property
-    def handle(self) -> int:
-        # Return the underlying cudaStream_t pointer address as Python int.
-        return int(self._handle)
-
-    @property
-    def is_nonblocking(self) -> bool:
-        if self._nonblocking is None:
-            flag = handle_return(cuda.cuStreamGetFlags(self._handle))
-            if flag == cuda.CUstream_flags.CU_STREAM_NON_BLOCKING:
-                self._nonblocking = True
-            else:
-                self._nonblocking = False
-        return self._nonblocking
-
-    @property
-    def priority(self) -> int:
-        if self._priority is None:
-            prio = handle_return(cuda.cuStreamGetPriority(self._handle))
-            self._priority = prio
-        return self._priority
-
-    def sync(self):
-        handle_return(cuda.cuStreamSynchronize(self._handle))
-
-    def record(self, event: Event=None, options: EventOptions=None) -> Event:
-        # Create an Event object (or reusing the given one) by recording
-        # on the stream. Event flags such as disabling timing, nonblocking,
-        # and CU_EVENT_RECORD_EXTERNAL, can be set in EventOptions.
-        if event is None:
-            event = Event._init(options)
-        elif not isinstance(event, Event):
-            raise TypeError("record only takes an Event object")
-        handle_return(cuda.cuEventRecord(event.handle, self._handle))
-        return event
-
-    def wait(self, event_or_stream: Union[Event, Stream]):
-        # Wait for a CUDA event or a CUDA stream to establish a stream order.
-        #
-        # If a Stream instance is provided, the effect is as if an event is
-        # recorded on the given stream, and then self waits on the recorded
-        # event.
-        if isinstance(event_or_stream, Event):
-            event = event_or_stream.handle
-            discard_event = False
-        else:
-            if not isinstance(event_or_stream, Stream):
-                try:
-                    stream = Stream._init(event_or_stream)
-                except Exception as e:
-                    raise ValueError(
-                        "only an Event, Stream, or object supporting "
-                        "__cuda_stream__ can be waited") from e
-            else:
-                stream = event_or_stream
-            event = handle_return(
-                cuda.cuEventCreate(cuda.CUevent_flags.CU_EVENT_DISABLE_TIMING))
-            handle_return(cuda.cuEventRecord(event, stream.handle))
-            discard_event = True
-
-        # TODO: support flags other than 0?
-        handle_return(cuda.cuStreamWaitEvent(self._handle, event, 0))
-        if discard_event:
-            handle_return(cuda.cuEventDestroy(event))
-
-    @property
-    def device(self) -> Device:
-        # Inverse look-up to find on which device this stream instance was
-        # created.
-        #
-        # Note that Stream.device.context might not necessarily agree with
-        # Stream.context, in cases where a different CUDA context is set
-        # current after a stream was created.
-        from cuda.core._device import Device  # avoid circular import
-        if self._device_id is None:
-            # Get the stream context first
-            if self._ctx_handle is None:
-                self._ctx_handle = handle_return(
-                    cuda.cuStreamGetCtx(self._handle))
-            self._device_id = get_device_from_ctx(self._ctx_handle)
-        return Device(self._device_id)
-
-    @property
-    def context(self) -> Context:
-        # Inverse look-up to find in which CUDA context this stream instance
-        # was created
-        if self._ctx_handle is None:
-            self._ctx_handle = handle_return(
-                cuda.cuStreamGetCtx(self._handle))
-        if self._device_id is None:
-            self._device_id = get_device_from_ctx(self._ctx_handle)
-        return Context._from_ctx(self._ctx_handle, self._device_id)
-
-    @staticmethod
-    def from_handle(handle: int) -> Stream:
-        class _stream_holder:
-            @property
-            def __cuda_stream__(self):
-                return (0, handle)
-        return Stream._init(obj=_stream_holder())
-
-
-class _LegacyDefaultStream(Stream):
-
-    def __init__(self):
-        self._handle = cuda.CUstream(cuda.CU_STREAM_LEGACY)
-        self._owner = None
-        self._nonblocking = None  # delayed
-        self._priority = None  # delayed
-        self._builtin = True
-
-
-class _PerThreadDefaultStream(Stream):
-
-    def __init__(self):
-        self._handle = cuda.CUstream(cuda.CU_STREAM_PER_THREAD)
-        self._owner = None
-        self._nonblocking = None  # delayed
-        self._priority = None  # delayed
-        self._builtin = True
-
-
-LEGACY_DEFAULT_STREAM = _LegacyDefaultStream()
-PER_THREAD_DEFAULT_STREAM = _PerThreadDefaultStream()
-
-
-def default_stream():
-    # TODO: flip the default
-    use_ptds = int(os.environ.get('CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM', 0))
-    if use_ptds:
-        return PER_THREAD_DEFAULT_STREAM
-    else:
-        return LEGACY_DEFAULT_STREAM
diff --git a/cuda_core/cuda/core/_utils.py b/cuda_core/cuda/core/_utils.py
deleted file mode 100644
index bd3c5cd6..00000000
--- a/cuda_core/cuda/core/_utils.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from collections import namedtuple
-import functools
-from typing import Callable, Dict
-
-from cuda import cuda, cudart, nvrtc
-
-
-class CUDAError(Exception): pass
-
-
-class NVRTCError(CUDAError): pass
-
-
-ComputeCapability = namedtuple("ComputeCapability", ("major", "minor"))
-
-
-def _check_error(error, handle=None):
-    if isinstance(error, cuda.CUresult):
-        if error == cuda.CUresult.CUDA_SUCCESS:
-            return
-        err, name = cuda.cuGetErrorName(error)
-        if err == cuda.CUresult.CUDA_SUCCESS:
-            err, desc = cuda.cuGetErrorString(error)
-        if err == cuda.CUresult.CUDA_SUCCESS:
-            raise CUDAError(f"{name.decode()}: {desc.decode()}")
-        else:
-            raise CUDAError(f"unknown error: {error}")
-    elif isinstance(error, cudart.cudaError_t):
-        if error == cudart.cudaError_t.cudaSuccess:
-            return
-        err, name = cudart.cudaGetErrorName(error)
-        if err == cudart.cudaError_t.cudaSuccess:
-            err, desc = cudart.cudaGetErrorString(error)
-        if err == cudart.cudaError_t.cudaSuccess:
-            raise CUDAError(f"{name.decode()}: {desc.decode()}")
-        else:
-            raise CUDAError(f"unknown error: {error}")
-    elif isinstance(error, nvrtc.nvrtcResult):
-        if error == nvrtc.nvrtcResult.NVRTC_SUCCESS:
-            return
-        assert handle is not None
-        _, logsize = nvrtc.nvrtcGetProgramLogSize(handle)
-        log = b" " * logsize
-        _ = nvrtc.nvrtcGetProgramLog(handle, log)
-        err = f"{error}: {nvrtc.nvrtcGetErrorString(error)[1].decode()}, " \
-              f"compilation log:\n\n{log.decode()}"
-        raise NVRTCError(err)
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(error))
-
-
-def handle_return(result, handle=None):
-    _check_error(result[0], handle=handle)
-    if len(result) == 1:
-        return
-    elif len(result) == 2:
-        return result[1]
-    else:
-        return result[1:]
-
-
-def check_or_create_options(cls, options, options_description, *, keep_none=False):
-    """
-    Create the specified options dataclass from a dictionary of options or None.
-    """
-
-    if options is None:
-        if keep_none:
-            return options
-        options = cls()
-    elif isinstance(options, Dict):
-        options = cls(**options)
-
-    if not isinstance(options, cls):
-        raise TypeError(f"The {options_description} must be provided as an object "
-                        f"of type {cls.__name__} or as a dict with valid {options_description}. "
-                        f"The provided object is '{options}'.")
-
-    return options
-
-
-def precondition(checker: Callable[..., None], what: str = "") -> Callable:
-    """
-    A decorator that adds checks to ensure any preconditions are met.
-
-    Args:
-        checker: The function to call to check whether the preconditions are met. It has the same signature as the wrapped
-            function with the addition of the keyword argument `what`.
-        what: A string that is passed in to `checker` to provide context information.
-
-    Returns:
-        Callable: A decorator that creates the wrapping.
-    """
-    def outer(wrapped_function):
-        """
-        A decorator that actually wraps the function for checking preconditions.
-        """
-        @functools.wraps(wrapped_function)
-        def inner(*args, **kwargs):
-            """
-            Check preconditions and if they are met, call the wrapped function.
-            """
-            checker(*args, **kwargs, what=what)
-            result = wrapped_function(*args, **kwargs)
-
-            return result
-
-        return inner
-
-    return outer
-
-
-def get_device_from_ctx(ctx_handle) -> int:
-    """Get device ID from the given ctx."""
-    prev_ctx = Device().context.handle
-    if ctx_handle != prev_ctx:
-        switch_context = True
-    else:
-        switch_context = False
-    if switch_context:
-        assert prev_ctx == handle_return(cuda.cuCtxPopCurrent())
-        handle_return(cuda.cuCtxPushCurrent(ctx_handle))
-    device_id = int(handle_return(cuda.cuCtxGetDevice()))
-    if switch_context:
-        assert ctx_handle ==  handle_return(cuda.cuCtxPopCurrent())
-        handle_return(cuda.cuCtxPushCurrent(prev_ctx))
-    return device_id
diff --git a/cuda_core/cuda/core/_version.py b/cuda_core/cuda/core/_version.py
deleted file mode 100644
index cc83b468..00000000
--- a/cuda_core/cuda/core/_version.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-__version__ = "0.0.1"
diff --git a/cuda_core/cuda/core/dlpack.h b/cuda_core/cuda/core/dlpack.h
deleted file mode 100644
index bcb77949..00000000
--- a/cuda_core/cuda/core/dlpack.h
+++ /dev/null
@@ -1,332 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file dlpack.h
- * \brief The common header of DLPack.
- */
-#ifndef DLPACK_DLPACK_H_
-#define DLPACK_DLPACK_H_
-
-/**
- * \brief Compatibility with C++
- */
-#ifdef __cplusplus
-#define DLPACK_EXTERN_C extern "C"
-#else
-#define DLPACK_EXTERN_C
-#endif
-
-/*! \brief The current major version of dlpack */
-#define DLPACK_MAJOR_VERSION 1
-
-/*! \brief The current minor version of dlpack */
-#define DLPACK_MINOR_VERSION 0
-
-/*! \brief DLPACK_DLL prefix for windows */
-#ifdef _WIN32
-#ifdef DLPACK_EXPORTS
-#define DLPACK_DLL __declspec(dllexport)
-#else
-#define DLPACK_DLL __declspec(dllimport)
-#endif
-#else
-#define DLPACK_DLL
-#endif
-
-#include <stdint.h>
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!
- * \brief The DLPack version.
- *
- * A change in major version indicates that we have changed the
- * data layout of the ABI - DLManagedTensorVersioned.
- *
- * A change in minor version indicates that we have added new
- * code, such as a new device type, but the ABI is kept the same.
- *
- * If an obtained DLPack tensor has a major version that disagrees
- * with the version number specified in this header file
- * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
- * (and it is safe to do so). It is not safe to access any other fields
- * as the memory layout will have changed.
- *
- * In the case of a minor version mismatch, the tensor can be safely used as
- * long as the consumer knows how to interpret all fields. Minor version
- * updates indicate the addition of enumeration values.
- */
-typedef struct {
-  /*! \brief DLPack major version. */
-  uint32_t major;
-  /*! \brief DLPack minor version. */
-  uint32_t minor;
-} DLPackVersion;
-
-/*!
- * \brief The device type in DLDevice.
- */
-#ifdef __cplusplus
-typedef enum : int32_t {
-#else
-typedef enum {
-#endif
-  /*! \brief CPU device */
-  kDLCPU = 1,
-  /*! \brief CUDA GPU device */
-  kDLCUDA = 2,
-  /*!
-   * \brief Pinned CUDA CPU memory by cudaMallocHost
-   */
-  kDLCUDAHost = 3,
-  /*! \brief OpenCL devices. */
-  kDLOpenCL = 4,
-  /*! \brief Vulkan buffer for next generation graphics. */
-  kDLVulkan = 7,
-  /*! \brief Metal for Apple GPU. */
-  kDLMetal = 8,
-  /*! \brief Verilog simulator buffer */
-  kDLVPI = 9,
-  /*! \brief ROCm GPUs for AMD GPUs */
-  kDLROCM = 10,
-  /*!
-   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
-   */
-  kDLROCMHost = 11,
-  /*!
-   * \brief Reserved extension device type,
-   * used for quickly test extension device
-   * The semantics can differ depending on the implementation.
-   */
-  kDLExtDev = 12,
-  /*!
-   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
-   */
-  kDLCUDAManaged = 13,
-  /*!
-   * \brief Unified shared memory allocated on a oneAPI non-partititioned
-   * device. Call to oneAPI runtime is required to determine the device
-   * type, the USM allocation type and the sycl context it is bound to.
-   *
-   */
-  kDLOneAPI = 14,
-  /*! \brief GPU support for next generation WebGPU standard. */
-  kDLWebGPU = 15,
-  /*! \brief Qualcomm Hexagon DSP */
-  kDLHexagon = 16,
-  /*! \brief Microsoft MAIA devices */
-  kDLMAIA = 17,
-} DLDeviceType;
-
-/*!
- * \brief A Device for Tensor and operator.
- */
-typedef struct {
-  /*! \brief The device type used in the device. */
-  DLDeviceType device_type;
-  /*!
-   * \brief The device index.
-   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
-   */
-  int32_t device_id;
-} DLDevice;
-
-/*!
- * \brief The type code options DLDataType.
- */
-typedef enum {
-  /*! \brief signed integer */
-  kDLInt = 0U,
-  /*! \brief unsigned integer */
-  kDLUInt = 1U,
-  /*! \brief IEEE floating point */
-  kDLFloat = 2U,
-  /*!
-   * \brief Opaque handle type, reserved for testing purposes.
-   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
-   */
-  kDLOpaqueHandle = 3U,
-  /*! \brief bfloat16 */
-  kDLBfloat = 4U,
-  /*!
-   * \brief complex number
-   * (C/C++/Python layout: compact struct per complex number)
-   */
-  kDLComplex = 5U,
-  /*! \brief boolean */
-  kDLBool = 6U,
-} DLDataTypeCode;
-
-/*!
- * \brief The data type the tensor can hold. The data type is assumed to follow the
- * native endian-ness. An explicit error message should be raised when attempting to
- * export an array with non-native endianness
- *
- *  Examples
- *   - float: type_code = 2, bits = 32, lanes = 1
- *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
- *   - int8: type_code = 0, bits = 8, lanes = 1
- *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
- *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
- */
-typedef struct {
-  /*!
-   * \brief Type code of base types.
-   * We keep it uint8_t instead of DLDataTypeCode for minimal memory
-   * footprint, but the value should be one of DLDataTypeCode enum values.
-   * */
-  uint8_t code;
-  /*!
-   * \brief Number of bits, common choices are 8, 16, 32.
-   */
-  uint8_t bits;
-  /*! \brief Number of lanes in the type, used for vector types. */
-  uint16_t lanes;
-} DLDataType;
-
-/*!
- * \brief Plain C Tensor object, does not manage memory.
- */
-typedef struct {
-  /*!
-   * \brief The data pointer points to the allocated data. This will be CUDA
-   * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
-   * types. This pointer is always aligned to 256 bytes as in CUDA. The
-   * `byte_offset` field should be used to point to the beginning of the data.
-   *
-   * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
-   * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
-   * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
-   * (after which this note will be updated); at the moment it is recommended
-   * to not rely on the data pointer being correctly aligned.
-   *
-   * For given DLTensor, the size of memory required to store the contents of
-   * data is calculated as follows:
-   *
-   * \code{.c}
-   * static inline size_t GetDataSize(const DLTensor* t) {
-   *   size_t size = 1;
-   *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
-   *     size *= t->shape[i];
-   *   }
-   *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
-   *   return size;
-   * }
-   * \endcode
-   *
-   * Note that if the tensor is of size zero, then the data pointer should be
-   * set to `NULL`.
-   */
-  void* data;
-  /*! \brief The device of the tensor */
-  DLDevice device;
-  /*! \brief Number of dimensions */
-  int32_t ndim;
-  /*! \brief The data type of the pointer*/
-  DLDataType dtype;
-  /*! \brief The shape of the tensor */
-  int64_t* shape;
-  /*!
-   * \brief strides of the tensor (in number of elements, not bytes)
-   *  can be NULL, indicating tensor is compact and row-majored.
-   */
-  int64_t* strides;
-  /*! \brief The offset in bytes to the beginning pointer to data */
-  uint64_t byte_offset;
-} DLTensor;
-
-/*!
- * \brief C Tensor object, manage memory of DLTensor. This data structure is
- *  intended to facilitate the borrowing of DLTensor by another framework. It is
- *  not meant to transfer the tensor. When the borrowing framework doesn't need
- *  the tensor, it should call the deleter to notify the host that the resource
- *  is no longer needed.
- *
- * \note This data structure is used as Legacy DLManagedTensor
- *       in DLPack exchange and is deprecated after DLPack v0.8
- *       Use DLManagedTensorVersioned instead.
- *       This data structure may get renamed or deleted in future versions.
- *
- * \sa DLManagedTensorVersioned
- */
-typedef struct DLManagedTensor {
-  /*! \brief DLTensor which is being memory managed */
-  DLTensor dl_tensor;
-  /*! \brief the context of the original host framework of DLManagedTensor in
-   *   which DLManagedTensor is used in the framework. It can also be NULL.
-   */
-  void * manager_ctx;
-  /*!
-   * \brief Destructor - this should be called
-   * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
-   * NULL if there is no way for the caller to provide a reasonable destructor.
-   * The destructor deletes the argument self as well.
-   */
-  void (*deleter)(struct DLManagedTensor * self);
-} DLManagedTensor;
-
-// bit masks used in in the DLManagedTensorVersioned
-
-/*! \brief bit mask to indicate that the tensor is read only. */
-#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
-
-/*!
- * \brief bit mask to indicate that the tensor is a copy made by the producer.
- *
- * If set, the tensor is considered solely owned throughout its lifetime by the
- * consumer, until the producer-provided deleter is invoked.
- */
-#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
-
-/*!
- * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
- *
- * This data structure is intended to facilitate the borrowing of DLTensor by
- * another framework. It is not meant to transfer the tensor. When the borrowing
- * framework doesn't need the tensor, it should call the deleter to notify the
- * host that the resource is no longer needed.
- *
- * \note This is the current standard DLPack exchange data structure.
- */
-struct DLManagedTensorVersioned {
-  /*!
-   * \brief The API and ABI version of the current managed Tensor
-   */
-  DLPackVersion version;
-  /*!
-   * \brief the context of the original host framework.
-   *
-   * Stores DLManagedTensorVersioned is used in the
-   * framework. It can also be NULL.
-   */
-  void *manager_ctx;
-  /*!
-   * \brief Destructor.
-   *
-   * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
-   * It can be NULL if there is no way for the caller to provide a reasonable
-   * destructor. The destructor deletes the argument self as well.
-   */
-  void (*deleter)(struct DLManagedTensorVersioned *self);
-  /*!
-   * \brief Additional bitmask flags information about the tensor.
-   *
-   * By default the flags should be set to 0.
-   *
-   * \note Future ABI changes should keep everything until this field
-   *       stable, to ensure that deleter can be correctly called.
-   *
-   * \sa DLPACK_FLAG_BITMASK_READ_ONLY
-   * \sa DLPACK_FLAG_BITMASK_IS_COPIED
-   */
-  uint64_t flags;
-  /*! \brief DLTensor which is being memory managed */
-  DLTensor dl_tensor;
-};
-
-#ifdef __cplusplus
-}  // DLPACK_EXTERN_C
-#endif
-#endif  // DLPACK_DLPACK_H_
diff --git a/cuda_core/cuda/core/utils.py b/cuda_core/cuda/core/utils.py
deleted file mode 100644
index 3debe1df..00000000
--- a/cuda_core/cuda/core/utils.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cuda.core._memoryview import StridedMemoryView, viewable
diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py
deleted file mode 100644
index 7d296deb..00000000
--- a/cuda_core/examples/saxpy.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-import sys
-
-from cuda.core import Device
-from cuda.core import LaunchConfig, launch
-from cuda.core import Program
-
-import cupy as cp
-
-
-# compute out = a * x + y
-code = """
-template<typename T>
-__global__ void saxpy(const T a,
-                      const T* x,
-                      const T* y,
-                      T* out,
-                      size_t N) {
-    const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
-    for (size_t i=tid; i<N; i+=gridDim.x*blockDim.x) {
-        out[tid] = a * x[tid] + y[tid];
-    }
-}
-"""
-
-
-dev = Device()
-dev.set_current()
-s = dev.create_stream()
-
-# prepare program
-prog = Program(code, code_type="c++")
-mod = prog.compile(
-    "cubin",
-    options=("-std=c++11", "-arch=sm_" + "".join(f"{i}" for i in dev.compute_capability),),
-    logs=sys.stdout,
-    name_expressions=("saxpy<float>", "saxpy<double>"))
-
-# run in single precision
-ker = mod.get_kernel("saxpy<float>")
-dtype = cp.float32
-
-# prepare input/output
-size = cp.uint64(64)
-a = dtype(10)
-x = cp.random.random(size, dtype=dtype)
-y = cp.random.random(size, dtype=dtype)
-out = cp.empty_like(x)
-dev.sync()  # cupy runs on a different stream from s, so sync before accessing
-
-# prepare launch
-block = 32
-grid = int((size + block - 1) // block)
-config = LaunchConfig(grid=grid, block=block, stream=s)
-ker_args = (a, x.data.ptr, y.data.ptr, out.data.ptr, size)
-
-# launch kernel on stream s
-launch(ker, config, *ker_args)
-s.sync()
-
-# check result
-assert cp.allclose(out, a*x+y)
-
-# let's repeat again, this time allocates our own out buffer instead of cupy's
-# run in double precision
-ker = mod.get_kernel("saxpy<double>")
-dtype = cp.float64
-
-# prepare input
-size = cp.uint64(128)
-a = dtype(42)
-x = cp.random.random(size, dtype=dtype)
-y = cp.random.random(size, dtype=dtype)
-dev.sync()
-
-# prepare output
-buf = dev.allocate(size * 8,  # = dtype.itemsize
-                   stream=s)
-
-# prepare launch
-block = 64
-grid = int((size + block - 1) // block)
-config = LaunchConfig(grid=grid, block=block, stream=s)
-ker_args = (a, x.data.ptr, y.data.ptr, buf, size)
-
-# launch kernel on stream s
-launch(ker, config, *ker_args)
-s.sync()
-
-# check result
-# we wrap output buffer as a cupy array for simplicity
-out = cp.ndarray(size, dtype=dtype,
-                 memptr=cp.cuda.MemoryPointer(cp.cuda.UnownedMemory(int(buf.handle), buf.size, buf), 0))
-assert cp.allclose(out, a*x+y)
-
-# clean up resources that we allocate
-# cupy cleans up automatically the rest
-buf.close(s)
-s.close()
-
-print("done!")
diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py
deleted file mode 100644
index 8248ad3b..00000000
--- a/cuda_core/examples/vector_add.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-from cuda.core import Device
-from cuda.core import LaunchConfig, launch
-from cuda.core import Program
-
-import cupy as cp
-
-
-# compute c = a + b
-code = """
-template<typename T>
-__global__ void vector_add(const T* A,
-                           const T* B,
-                           T* C,
-                           size_t N) {
-    const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
-    for (size_t i=tid; i<N; i+=gridDim.x*blockDim.x) {
-        C[tid] = A[tid] + B[tid];
-    }
-}
-"""
-
-
-dev = Device()
-dev.set_current()
-s = dev.create_stream()
-
-# prepare program
-prog = Program(code, code_type="c++")
-mod = prog.compile(
-    "cubin",
-    options=("-std=c++17", "-arch=sm_" + "".join(f"{i}" for i in dev.compute_capability),),
-    name_expressions=("vector_add<float>",))
-
-# run in single precision
-ker = mod.get_kernel("vector_add<float>")
-dtype = cp.float32
-
-# prepare input/output
-size = 50000
-a = cp.random.random(size, dtype=dtype)
-b = cp.random.random(size, dtype=dtype)
-c = cp.empty_like(a)
-
-# cupy runs on a different stream from s, so sync before accessing
-dev.sync()
-
-# prepare launch
-block = 256
-grid = (size + block - 1) // block
-config = LaunchConfig(grid=grid, block=block, stream=s)
-
-# launch kernel on stream s
-launch(ker, config, a.data.ptr, b.data.ptr, c.data.ptr, cp.uint64(size))
-s.sync()
-
-# check result
-assert cp.allclose(c, a+b)
-print("done!")
diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
deleted file mode 100644
index cf1e5b4e..00000000
--- a/cuda_core/pyproject.toml
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-[build-system]
-requires = ["setuptools", "Cython>=3.0"]
-build-backend = "setuptools.build_meta"
-
-
-[project]
-name = "cuda-core"
-dynamic = [
-    "version",
-    "readme",
-]
-requires-python = '>=3.9'
-description = "cuda.core: (experimental) pythonic CUDA module"
-authors = [
-    { name = "NVIDIA Corporation" }
-]
-license = {text = "NVIDIA Software License"}
-classifiers = [
-    "Development Status :: 3 - Alpha",
-    "Intended Audience :: Developers",
-    "Intended Audience :: Science/Research",
-    "Intended Audience :: End Users/Desktop",
-    "Natural Language :: English",
-    "License :: Other/Proprietary License",
-    "Operating System :: POSIX :: Linux",
-    "Operating System :: Microsoft :: Windows",
-    "Topic :: Education",
-    "Topic :: Scientific/Engineering",
-    "Topic :: Software Development :: Libraries",
-    "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.9",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: Implementation :: CPython",
-    "Environment :: GPU :: NVIDIA CUDA",
-    "Environment :: GPU :: NVIDIA CUDA :: 11",
-    "Environment :: GPU :: NVIDIA CUDA :: 12",
-]
-dependencies = [
-    "numpy",
-]
-
-
-[tool.setuptools]
-packages = ["cuda", "cuda.core"]
-
-
-[tool.setuptools.dynamic]
-version = { attr = "cuda.core._version.__version__" }
-readme = { file = ["README.md"], content-type = "text/markdown" }
diff --git a/cuda_core/setup.py b/cuda_core/setup.py
deleted file mode 100644
index 862d38d3..00000000
--- a/cuda_core/setup.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
-#
-# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
-import os
-
-from Cython.Build import cythonize
-from setuptools import setup, Extension, find_packages
-from setuptools.command.build_ext import build_ext as _build_ext
-
-
-ext_modules = (
-    Extension(
-        "cuda.core._dlpack",
-        sources=["cuda/core/_dlpack.pyx"],
-        language="c++",
-    ),
-    Extension(
-        "cuda.core._memoryview",
-        sources=["cuda/core/_memoryview.pyx"],
-        language="c++",
-    ),
-    Extension(
-        "cuda.core._kernel_arg_handler",
-        sources=["cuda/core/_kernel_arg_handler.pyx"],
-        language="c++",
-    ),
-)
-
-
-class build_ext(_build_ext):
-
-    def build_extensions(self):
-        self.parallel = os.cpu_count() // 2
-        super().build_extensions()
-
-
-setup(
-    ext_modules=cythonize(ext_modules,
-        verbose=True, language_level=3,
-        compiler_directives={'embedsignature': True}),
-    packages=find_packages(include=['cuda.core', 'cuda.core.*']),
-    package_data=dict.fromkeys(
-        find_packages(include=["cuda.core.*"]),
-        ["*.pxd", "*.pyx", "*.py"],
-    ),
-    cmdclass = {'build_ext': build_ext,},
-    zip_safe=False,
-)
diff --git a/docs_src/Makefile b/docs_src/Makefile
deleted file mode 100644
index d0c3cbf1..00000000
--- a/docs_src/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs_src/environment-docs.yml b/docs_src/environment-docs.yml
deleted file mode 100644
index 2a3a8ad3..00000000
--- a/docs_src/environment-docs.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: cuda-python-docs
-channels:
-  - conda-forge
-dependencies:
-  - cython
-  - myst-parser
-  - numpy
-  - numpydoc
-  - pip
-  - pydata-sphinx-theme
-  - pytest
-  - scipy
-  - sphinx
-  - pip:
-    - furo
-    - myst-nb
-    - enum-tools
-    - sphinx_toolbox
-    - pyclibrary
diff --git a/docs_src/make.bat b/docs_src/make.bat
deleted file mode 100644
index 9534b018..00000000
--- a/docs_src/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs_src/source/_static/images/Nsigth-Compute-CLI-625x473.png b/docs_src/source/_static/images/Nsigth-Compute-CLI-625x473.png
deleted file mode 100644
index 9895798f7ccaa059cc47a21364630be10c1d95e1..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 339615
zcmV)LK)Jt(P)<h;3K|Lk000e1NJLTq00MCU00G$u1^@s6=_+j|00009a7bBm000ie
z000ie0hKEb8vp<x07*naRCt`+y?K}&MZNg_sXk}+J(-zIGMVgqAS($8kcE8{WM5<x
z5m1(kpkDVYii-PnyLzuz{3!^62qK6=AVPux0$Cv>WS?Xv$t0O1GnxG?UGE>=RbAEH
zXC^^^@AJHG=gFKtT~*(GtLo}LV(j>dQVI}K0s<)j|6B-=L=Ufv-y#G80O=3&zL_w*
zR{xJMU?4F@gH*%)RvB~_1`Hu&?mjXgLD3Yz3`i5g=fNL`^mq??83YFd0v3ebGmw<%
z=X)RwjmsuD=;g~-YTgCHZ<P_Kfjn#uEJQ(1#fJ!FFAUmZNtQ0P<7Y9Uq=VPjZbdr6
zhbIlpVH%{#00@)XFwHjYTI8BGnFF8KSqdekK<2&#EgLiWltM{WL6Z>(0Ug*a(ud{S
zk?`?j`ha<a^o)foq&MBS9~%mNH|1pWU`kjbB1SF^EW)@Zd^-`ozu6ECNye}$wQa@G
z8t~}Dl+uek76lmbK<UBs6Pp4Vidn+9344TXGa7#cW=M~gKH|AnJZl)zhiBl3*q_xN
zvVVMQH~BWykkbz^<z)JQgkq%hV~Wu~B-I!_+qPiR5m2Jyrb%lkE*c^Z>o%vITlSdK
z`h1?v*sa=>7Z|?m+n(@=v3xmXUqZF3NFhQzS+N1jhfN;sychNV09r0SeccH(f3ZI7
z<J33|TL^n}PFY&9HxvQP+!7>4vqwMh`_tiZbTML0RIXug(fAkP39xlx+io}y#Mz08
zL*aJm*Dhfni)M3!4?hrEu_dGrF?Pa4DWyv`>jL0734{|0biRSBz=#f5ULPyn2$f2G
zW_;UGV*;lUI*EplppXepq$|m~K8=ZlM|>y<TQGvU%Zk4qOVI!xZ!ozEjZi_M9n@EH
zsBp@ilye<XW!kQ77@Lx(PCi~l3d>N@70BGjq2vJuoH%jhDj=dpI53~_P9H>=aIqXt
zMj_nBgu<pT7Y%W=E4=YZ!c5Kqi^|9_gslz{0-wv4!tL?&52%l@!JFa?IlrZQkqf4Y
zKXg5(edppe?&!x>B*Ar!R#@F5vBRQaV7%c8WZVQKSFk<)v0;Auqq9Fd_#nO(2lrU?
z6YUqo9Ua{<6I(FR><_X{b5v4sICJ?)nD^W~Ms?=nw~97{J{iIhQ?N;cW7|vn`^2zi
z!&j9YMStT%_+txBn<}r5fio;8D{g?5aSV|tO{-#8(W^S-j#APH6+v851(h^t3gW9L
ze{+&;f0~|1OaV@a2LG==P^cW~BUckeO=Coo($<2i`eS4oEJlU$iU>!EuwCZ%$!KWv
zY4_!ms2W^-fAQq=dFrR1<HQ+raQCfeS8ERfD5)LKIj2mbsc|<MDUc$`)VZf~*8Hg)
z-n*NQ-VB9R!#U@ivza?%GMTm(4tDf{6pWd^fD6t%l`=ZmbD#|=CH0f$a^bm)D0kc0
zbD&+5n=Gl}oO93MX!902GYKZnJdLv!pUUXkBAS}^l6D0{rk=(<x8KAqcYlK3b+5AH
zNRK6i@C->MVr8G`>p=iY7FRK9Xc=usyFq|cI)qcEk7WP8gJcZT#QZcA?X?SEVS<K<
zCvo<f3z;-xF#DVL(3AEYNEb&^cw-Yq%vp2}i)K&Y=z+bob*B)3D_t^~tS$<L0GuM`
zop~k&M_V}Dl_rs2%=D8Na{8%rD3P7)Yi*~fY9tq3dJ*$x&t%4o>C_gc*}eB5u6CS&
zZzsQud8ePloQZX`H1DA|8_XK5B83DKXPw5G^G~F!bstB%QmB<Dv?2ROl`Lk?;?pVZ
zIY?_~O3NdcU3#;~7}=^r(IO0aTSlw`pSmt|V9`+_5WIodq$fn9q0w*=1VmS`G+qVM
z2=>yv9`v<ljlPb9g#uxTHEe4bj;tpQCXEV~Hu!*EVeu=Cz5#t69mWMqeoEdxv8G<6
zAAUpnu~(3cWM#=;F&xkpT=K4aUM*|^XnzXe=>%xNjs*3H?;2VU$r^JN&y1oLUk1(~
z(}l(c`ia94joBf3iOR<w8)wI%1Y_Yf3kPAEM9@wct}ipKD^mtR{6nu!i;zr}Ek$V#
z?A5q#$U%hp&~jFMn0^@}AHTl|$Ku1nYtW}J2}3yOex`i~Vpp_<{Km9(O$5Gut0XQW
z_8WymAV&tUJ%6k)Wn{#nV8SW_krD8u#G*<jp?uDW+bX9C5iNkwDiRjW0sD#vX@1l6
zX@9m_L-O|d)GMSRUTlqd{j_Il85+H`JdG}feS7tVswqT>-N5#O7%z1kQzmPLGBXbv
zxth>wo!P|*>wcTSL4t7dXj=0EU%&rrEZ@=w0urSSeEyr?<*tuhK(XV3%rfTGtGWBb
zC(ztc##g_34`srosA>pBnKqhQ(|qQ^Z!)elLFJTl`Pyw~ux-;8F1-6|oHf3RlF_H}
z)w?cX-}Xk%zxA^$not8k)^#}b@{e=x$FE~Vg+L_osH>@>rD+#~W?jbTZaAHUkQ~^w
zg6}-|EnZyTLedG?5C$^{aI;y`=`^WSnoQP3N{O4zcm)mMW;0}6iR)&lnzWE>&NzWg
zDn-_nB&$Yo$$6)c%%n-D(@1|sNV-{4sT8Sn#;>#`nM|5Y#>3;f5`;r_Z8hyJ%}_s|
zFW-G7B}rdlDpCS&)@AXvcX83Q!R+g;;GTOvPF-<=M9C1Yz3o08xc`2>{*})#d(<E#
z`J8_3T|DrGdzdl20)X<m(M%g%!@ia_F2D2hoHu<0nckyp*|d>&-`PO2{sd->u0^_D
z^)I!M1c)SOT=PlJom7HbH;4OfxrjnxMsVL)q|2E3mvY<1vpA9}=k9xMV|ZDfKkxDQ
z&@(6}pM_W6&I4cg6thQG`917D`qRH|IZm!a<mh5vxB95!!^iDkm*KH-9&ViL{wADU
z^w{1M2~@Ft^r=ToxRw>FBGLV&)gA-_8NfDkpr$1rZzOn`F-?TVMr0$j1xOKmdNV_x
z6q661Oc|Do1vgB54)OITTa~Or?U7lz{wj$axpZb0kB0pRcBBehb8M)OkoH*BFRVi;
zgMvo&2}g31zMM1*jY<1~wEJ29;p<N-JVp|dMt|x5Q$;!k44rgy+MN%@)PZHMp%`R*
zs<<NsND<IrGf6<98F#`lz?6Zq_n5YU4q@t9NkH>s`Y^um4L=sa7(Fo##mFFT=b%f5
z;q}u$w%yj1h|z)S7kAkN1My?SF$QAdaeQgkFHRGHcKaOhBYFgSip3$wEmMM(=v?$!
zg`|k`6qsBNU$JBoPnyD`;3tC#n#7es1z1&I&+?}pX6Oae0C2Mo3r?TIn=Agp$dUrK
zWXCYAp^z0jR`cXGmt;u|=U;mz6$OH}{6abpG_rQx8m1rYqc9&vj-E-&x+T22Vi}zc
zr*q-S<9KEBeyS&)#>|p^tlU1r3l8ZXUVY&iTq&7cJe8BCRr$GBTE3vgs#ck#M7kLk
z-tc*@npVbvE=f=G+x*vWUf{HwKSOQfGd#L%JM*siH>UT##;b=Kxb^1CnOc?N+hb1R
z_dovyTY!rC6ZrIhe49~2;RoORF7NK`;p}Vg;Np|VLh1;=`T5USyI=5`FMf{vLoHO*
z59N)ge!;UVcd};rU&*?XVMnVt`?3+_IUyqu;K~xFj~~P<e|VTBoBB9u-dC73qMG7)
zSCiVbgeO*4F?IYz7N4_#O}~AGWZOpm{EuN8L|fB)JpAYeq?@JW#KnxPt0J>*HyhTj
zM&=J<(TCHlczZo*1dg^Q?<84J$=nlaSi5`+BD0CYvrcA6`E%@U&ssFfY=SxSPGr-@
zwUo&;d(+dHI;M)Ibq7MTPl=OPOrm`&&nz1fGjk&J8DpB^qUr-5A^;|G_#i;ctx~Hw
zs2cpgG5tRvy)j|V`p_%30|w1tP|O7QUw4F9*TE;Qlg)}I%39&E2-OwN2cqSlJB|lX
z$bln<r30v)wPZ}6Fj{kkEmpQ4{D{=eVY{~4<83aI7(=Tx!I5KIOk{r8UsV#qZQ(KF
ziRj04L@b6}qW$Nkoh!=pm!SK{9$DSXiSh$bA5zHwt+0^;T?A)b`@a%bqz}2uBeUH+
zV)WV{exr+sp((PMehe`!zKgIrI5J!n`NzS{Z}QTbof@m8fZFl|oo*%bXO5(;vyaM(
zQb2IRS=aK@|NI_hjW4nBaF#>wub`=HKL7cnpE9>#3vce|;K-r<)HIBtFj>I338NV@
zs1i3(&J~v|WXba{lQIgl{Nh^v?fxI~wVO`n&wu>~S${PvwmKDFfv1+1^UJDeeP=1(
z{Kog`ZCJp(@ujSJV-2$wE~2Ekg2i)3^2VyIY+3nNe)8~Ncx&mCeD(hCv0;BFAQ8QX
z`R&gh;IVZnPMz97MZ+{MoLR>AzxoCKXUQI}{O}_36M~Y`65e>`A-?~+Wt_5LJ_U}%
zNtSW_y<g+|pT2^BEd4tjX+ze&Rt3Er?39ccT0>b?4PzSWsIDKyn4(U0Fpeu0j;Ax7
zpuV;SC)>`FzrM)f9;3+e_aK%Gox#F}6tAw|fs0pc)J>Snkn~PAG_@P~LwW_S1SeTa
zc``*_)d(gJub?lHPf=mMG5b@W1q?1p&?m~7dBO;K(;3RjN<+rw75v%0HkQ7)gx2n?
z-7XE7GJvhx6itZWdt_|rGulq5T?9NBV{_TKHC7sQnX5>CTZAVAzs*x*mJ#U-(SFij
zh0&`HQm<Ra%R}Y=2ymNUsc`8&((0qH%Amg<Cy14uO^+}Y5ibK{_8Z91)@xLkhFvH|
zEgGdwtDfP7@XJs?OTPX5GxQN>n;8A9)gNQUCE`~yM7O@RZOhHV7m|O(24iuA`dYdl
z7)IRK{yv4_cQ%|bd=0!GpAYn_$A;t)=rNFK$ie(YO+!Bs-<5<Z)7FubS36e4^PEF|
zVcCb`PPng4hXvZ^C&os@cr?vI?b!<HBSc3WKXLNSdBHy)sHn0gc;O@Nnj$n7R<8a=
zpPZ160ly{hm|-G_ae8dHfHh)-HL=jK%Y-XKAY|xM0mWOa318Ir)vY5#R|@EBVdc7A
z6c*&s+t&w3-e2}d?z{I>Y$!ODGbfjG!dcf+wE0Os{h53D)AnkvSTvTsYo6zY-IaX(
zTlX_Id6a!^?Tngp5%WgmF=p0jjIOWaq`5OG5RmC@<$u2Q1wQqSC%N**%g9fP9IEr<
zd$2nahhFyVJ480!&5_OwgR06o)VPY>d1G04%3`Vxtz%1Tk0%U?@=TqO+79id?Pw2e
zN4m)`Oi)r*gWJBB16{owXxvS~plTA1Al-L_)+4?2b@w3h@^J*nY!`p{uWxbZ=YPt{
z7hOtCX#xqjZWcG|sm@8bJomfbQ8o7pzHsYh^tK<Q&rOg`bu)Na9h+9Z!qO#wXUCBq
zf3g{#)j_hnfxGXzhNX}Gn)h2e072GGFn`fpHm_Pmdv6wX3)juMxXJ+CKKk4Q?JYZa
zf7fA>GD|w+?f#Q)mTcB9tjN%lmbg6!Si4~tj^mI@_4)F4$z-!CUlLbdd-1})r6Z(y
z3r2J1U9)cCcrd-op_L5<%P-i4iOa9J6drS*glQjoLEP89zSS)PUeq2ebAiN<fv8^k
z2{g@;vFF0{M-l9M3*v&>$6<8U%i}`Jz_fZVMhM+EkW%nnv4tUPp__etGLXI2p$tT9
z(Jy^iigt;u7on1_L6^EOYs4k#Te1pEL3lE7`IC*{fhj{y#;r3oDNwqS#<>SOfU9jw
z$wT_*cytOG`mg@U0A9c-zRd;Kw5`a%#!UI?6=^Agu^P5*$L4+`(|0sJKTbvV2Pu13
z_qUCyei)C(zx_0+l8co`G_5M}8!`wC#nXid!9bl$Tj$Dt6b#>9ls*I7R^ydV3;pD|
zf~|C<4}Z^<W<eDZTC%}@GfNjb1}PskuIktj*t-`Ggx#*>WyTmSKhuH&y-5SMen3a}
z^>=}qy0Al5#D;|NT^%aEXzQ}#aVUQH-P9A13Yk^n`fWiBGbc{i@mmBj!>CmwpSobz
z{f!BzaZ6wKM<S)or-tvXl0@-XlLIUsv{e~{^ml_FMWY_1<f8RrKQyCV(V{3g%)9LY
zuWGy+BSJg`mK28R#IJ8joMZtNWhEq&d6bovk)KSnYW;5JOse9gb^AGCa4(y8by7QG
z9HZ*0DX*$#XibGztO$pyL4zqSDq_&!N*oD-EN}nqF~0xfU(sD4Si5Eihqk@WZ=PC4
zM`stAOoraxKAgN#CQO<@RYf^<!|N$B{5OtHRRJr;va*EPbEi==bR^@4m9V$v2=uk{
z@`|0@anE(EdSw}Xt|zxtDn)5UB?Wm&9OGWLAzj)JHIb;D#KfW1OrA5Hu6=t*XJw?X
zNE9$;;zVkyDySVgjI#WsuMwAV^DgG&S1zU~;o(TN@8$7_{)>lSTt#2UUN&srL$bJ>
z{oA%O?vx9;`kG6bP~)v~7L}G!n3qRUNhw7INfJf1+<wm;G%b6Iw;S6j$V(!GB!AE-
z=8P)jt#w;){h;SM1uVYwCN5bp1xG@r?+EYjlZ?)5VN0r<g7#)w+fxWB88hbsZolSC
ziWA_76z{yhkBP%eSiP;4`U;mFyAFepl+~Ze9k*XkeObP5e}dwYQi=-lDJm|dxFBg6
zs~126^TRS%5W-KphVRC1t?})^vR2{iA9HVsL~XQ<yaK=+uc{kkPoNMIsrD~Pd*UFB
zzeu%V)##xkgB?bM?SDoQA~kxISao<6=0M+})iopP=)ZoTkp%1nMVb=6vHvb~NP(pT
z6Svlnk}V2X@W??}%;CK-rkIQ}OY^E>nRXJ?G=WS|_7o&+MPKBIj8{du3?|J&lkDkT
z2C_8q1&sOEL;Z}Mf_6}_X5{9%Nv90G9tZ1O=Sic6vfv9(21=l|ed`S8@u&4_mZH8~
zM9h<YBA7_nI*~@vs>T}f_6q`KhuYOtAJwiXF=*BH>kqQ^D}xlKWg7UQuw>%fl|WDM
z5P>_WC-}7=S^-B9Y{=W16=4t1zUeE=!XYp!A1$AF`|=wUTdH_f@xsto$f=@sWy#!#
zMTVbwYz46~JUEyrD0a-v_R0_K34rPP7N0TUQ5D7Nx|#%AtGu29wYs%Fb-Vd#h(S&2
ztpePzd|zF`Ltg>iQUo&bVzenRW5TA@9nKYo&O_o=MQ-4lK?favb-}IdS_Ff`dIEID
zb+c*u`uY%g`6L{{;4vq0*DaTm5e3|R*RA~Mr$1-SbH8T7$8O>7n`ZFLfB%@KUcvZb
z6S)5Tc@*Udw!H9PEZx#Urtx7`)PIcoKJyvcn_lLjW!rH@35zeifoa3a*|7XEmb|y0
zlq*^O$}Y0WQbtS|&s%SAAnjCf(ixX9wZ4K(M+@Km>BFS5(tEfdohI$S3dInYbqKeU
zuDns)b>~F3FMW*FyE_pM>{z#kgZoGE+IuZ}Z0p8<@ZM>+@VPI3ia-3~e|W#|C_5VW
zkwwta+Qgnz7rpy;@aw1F;?i61A$z2e|MSQ)ve`VEcJ8D*E6Mh>v14~WE=eZMJ&XAh
zhayt#JoJl)IMC(YVk{^wr?S%Zo+3d~Ts?;CZ@QTBY&(DY?Qhw!=OF9$!Kq{5(I=iG
zFCqAQPba-j6<6K-NoLd*z@;CfS3Jb4`)e36sEko(U&+jK1h4<;SN!?SEmYT!X3zRp
z*><qo2#kW_vMS1Z3IU1C_VN6qzu|@(uH>p@2fzL8BebWzu6adeR95!k_z`gZi;pw<
zBiD25wUb!>$4A(7z*`wi<P}m`Sw)`X1sEr<l8bM=l{v#pp!x<n<!^cXm5mtDk084Q
z*Mj#KnVHrw<fyIM7lp0=U^XOT`g&hvOcR(-HW~KhldM<Z;^0}N2z#JGhcG|=6J6Vx
z=Y|l>RY(yc%35L3mqQW@SC2|YAv!JjX}&~=CoDJ?t{`gq!O5&5{TBT=zO-Q)e3`Z&
zWejSozj-R6A3Ctd$VV8mKoC3~Ckz<EDCC4KGarVz(j-DWX<M|es};s@u@uN!1@!O}
zf*!p#|5l?)d@5muWE1CSfvjR)<Dqkgq6htOie(qNxD4YpZ94i*!4Av8>uD!0bL|oE
zSFV)om2X7lEDU=#H=FCstVh}Q5J#)YfwZM*z%?f!s<UVFmUKh0!KT>~#Q;7T_j-yj
zy`k$O;K5vpG;x~J4(!<OqjBiTtZ^|MhV+Rlq}9WAVcMcp1^#QVK85ufJR}zNo47+y
zL`(}c>@m()V&MkC)6jho6J$(Rg&RF_Gh>vUQ)F1JBIL`y&x^4WCd$Gw3;F4%FJbS2
zHeP=0CoJD`1dwDiSzq9SWFi6hw+1C7{CBk>z4y0C2~NV%D_X9bMY<B<Byb#$LN}Ae
z#d{Z>|E^d~tm|fR9Pe>X*UkC?#37OJ?)cVCS-{6GUCgkCTAq0DE4;S54H0mKL?Sa8
z&b{jahHrd=|6RHPkx1YOL1n{KuD{_1^0xe$AOG<U{bErmJzTB}iDUwS#C0W(<A8K=
zaXdPudPkigk#G<|)^)vp#u(|kxYFyV-(`pQ>JTXfj-y;o`tRivI6gclzl;UvUc{->
z$C5}L;CH|LEj!zKakE){JJd-eaDZ$!tCj8eJR&8o#KU34SU2k}J31n8cQ>0w`n;&w
zwnw|baU4Cn@$}%oJJ1LK*{t-(dH^jNeSO=jVEqJ2>(pqsW=OsZTg6RiaNITHHmThn
zz7{SV`Yk1+nBwEUWt(QXc`UGx1&K;ZB1V=m0CIyqb|S1x;kT|FJB(w;Z1)e13xgR@
zAwf4Sx41FB-2^sa+hH_?52V`xOPU;v*zlw;^xO$b2IPy-xCq_3&>ydEqWC}Pc(tNp
z$*Nz!jN*&%YORLmjtlWJ8b}to;!l{yV~9V5i>`rW>5qs^c;d$8bQEiZ8fDU=_iw4z
zoHMLA63xC+Z*%d#p`<NCW30k4mi`c$P(c<qLf^KJ$xCLthuVxWHop-Yjm<F%ysb|=
z3xw$j;H#V*wuqqJrcsQYFj1=F1kgo|rYlGM%n{d#VJCXTm8b@hY9?&Ya0Lm+r!kww
zHH4t|(nv|=&=C}NAEKqbOAjrr8Nv9vVz#`$g|4(~!8I#z`y6AbHrlQebX<FEVUo+(
ze14@wN*9DfLg2p=CA9R7A)6Q}%aF1Z)@Z<pr>Q^savc``!Nqu)#f|wV+YOXK@fy=U
z@c0475Ov3aWM_}>U$z6M3xzX2EX2Z(iMQ5x9kg=bjFVFinq$*hoJ<Eu0t4%Q0Gf}-
zUUk6ipJv;}j~#v-O#|EBvHNR(?f<Ng4@mQXws-74_<{V<^7jE^^)cei#osa0{DFM@
zc&1VV7c2dZ2@0}-@clD|JOdYc9|Zh>j2=gE`GF7&)VqJb{l9sLvEwI%?%4b3KU}bR
z4gR@NrTeQudVmOx@vr47SOXlUSNdc8bLF5GjY8nMS-ql_NO~($QcArtlt?7KU!QQj
z&G-^;1<_j(^N7l1GB}B(w^|rS79jk7B9ZV{-;Pb)Ih80SS%XH$5lGi1YpilQ!pWJe
zq?;v^brFt3!e1rxWRV5okW3_U1}HTH$YwJLCrQGIkz;>Z60eBPN<2J1Oa;#oUf~($
zJ+44j*FAVHj+4;*#L^yDl(;Su5wAbXj-*eQF&mQJYjqsQ$<3~mxUTE1#QH1JY6EaI
z$;CLbvPV~X`bs1a<`ct?{cKU(46^^Cp`YS4I*r47FvZ)FkVq!=lTEIhCF@F@gl7|R
z6A}w9G1qh9?e{R0^g#kU9OsJQ|2L1|p$~gCJF4@jlf<vq#QWy|?~V^zj~wqJoHpb5
ziMF}W5Nsv+7trzl*72{R<5=X1-A6wNqRRtm?Pb#-V`_-7Ui9~nK0X-n@`DARe^tTh
ze<TJ~SKqIntn>eCCIN)7Ubu|#uBlJe$NO&J@xRNUYyZrKZk+`iU-63>n?~tMPF!>e
zw_JY}=Pa1X(LLK~Y3s%*sN|9xZsj8vpMhv^W>-rawd3b-%g1iw;&T>Lo@r%!^C2XX
zWa&`u{QP~;xSqX7`-rnJ0k{Q&`1n1aWrS>D%br91+|skhhi@IK(pMBza`APy@R1Ln
zfzz><#+D=0j+@QRAH9i-&s{`?+s5|hgGL=3ShkdciSy3q_K$sp6NVPBW&1ArGJ+Eq
ze29<VbS<Y&AItt-+d15o3g|S9Mo35$4(7&tKEsIA4z}#;WXdTQaLWx>bI!uq^c-kp
z?~%@s90DF(DVTNk)!cZ-INo{peNrwIln>)$_k5D_L)+MOsM|LOn|HIwF^i6ZvRW>_
z_M=>X*@qZaA=$EH7a18ZPp}<MO3Cmka~M}8X*twk{cM<49?JfNuMzFV<d@a3X#R=p
z+to}qY@BM@I<JTYAG(&0UVAC0OdHL<#%<oOSA?7}y2S4BTTnKH%dfwMt3P}$Q%9Dw
zb<+-dvy!UelbJn!5Y2mAt=Uc3Mf%&P6W~?nR@4e+-YVD$htadn<o27d<D5k&BaiH2
z*TD{)yb{j2>SNq+`MH#(_Oorz;Q(-4@;7}sj<kLXhUj$zP(Kq7djqmfM|70(v>=YF
zzal5$;p`Df!!i9$SXk}Hk{oi=Wzzv@SixWAS2kY4-wD$Z6XSBOG9C}lxvKsC<ef`%
zW--)XuVeTwa?q`lZw|VTaTPG!KiE?i7yrU~9Dt7qqJi4`J3>`WEIi_bjX`iM#K7MM
zUgZtnVT;_Z71%$i=li|QrijL0<Bz^IF0kaIVxiQi;<x>%Zy2<Py%fOw)dqbRETj@6
z73e<ZovS#FV2%+4`;0A^LEv-Dff3AV#^A4lNeQKu)zl9gN@Zz*cfrYHP<1WCh7O^$
zAc>TcyrMEHO7cNU3QH>~D@;NnpUTP#YK9D@y0R3IB=U=?EGzO>*_;GbRYb-q7XSbt
z07*naR26}srS!S+pS#of(G!J4AW}zo?hlXf%&NoOapy;=O1jLv@FwO~w(^G;R&(hs
zw=$wMNivaT<&r<~n<ro9+>hVO3Dt$*6m#uepW>WT=1}K9X(^>G2Jf27py5-PKVu3D
z7tf|30SJfkiVCU*)lgSkP136gNaPn&URuJSnjzFxSCR0q2TBI%y39ELdgcyl<Ea<d
z@}cW*W^7df$wZd7mp;qGPyL-m*L{*%Lre8o&oDjX%&(oqZPzT|wZA+~V#;}3Ja-(0
zwd1+@szto=_(QziK7@~6x|n?b4S+?Z<&+f{XhNiLIC0^5ESNEkk+t3vu%z1f+tZKn
z%*qySzU^j)6eW;S;^Y-lRZ)h6dOd|p#jweocg|T%8QVb8kvMs!T>R17xp2We>Z^)8
z-Dtwqb9;eHnDUTLAs@Q&lT36P`PD<eW%bs5{!^F<DhJn6S6fY<<H0B>DWjsIlA%NE
zC@oAPrJ$^;hEvYIh()ufQeRg?fxm)OP+U&k(4kb66nOSnSWJa)XZc0tRFo8uEGT8{
z)Y)A6p|cq=yq=0;&t9dX*0+7fap*p{lgA(a4eO6q^NE`-rVv*<oSJZ0d}-TBaLOe&
zF^;YL>es(z#fDwD5{k;Im~q-UT(Dp^_4RcW<$157DJd(bq{O4W(x=@?7EoI^jGC%)
z{|D)z(C<?-s2m50aPk>cUCof%YAUO0w13&b(hETex0{##^n3oe^gV96_de?KBn>BB
z#3fS%fB4f<&iv@zOsGtv-!Byk3#JeHD)#**>^Y$^$_9*B64;6Ke_ce{?RuQ~dIT&R
zGx4hHS}*=ZK@fg}amG_{n$Dn)x^ti`LcQ8R(P+y;MEEpsy9M@O(kfARFoZ!12+Y?g
zNQ3vFJxq6qcSANRj0X5kx}<e3EdGP^8uT=EB(w~5oAMt+2bwlZH>Nyo`w_}6La*#F
z<z~v$v=2o$x#;%K%|vhZ%}uK*EA*WqsS~HrN!r#&ST_>eZdF_j_%Qnic~BkpcTJ!1
z-I9}Kd&0u0t|1Nj%%_<R8JPBC#`8ci!tQ72M?J|IXJdfhE-ZapKNBK+Z7JPK=|3Z3
zYf$=(D34O(rKtphy27MO_f>Q%+I+pL7@|4Xh!stKZyEGaU^Q2ML22DJQJ^dx8wH(y
zGTY9hLX+iPNt%PTkb@#mSGZETR8KgauiSAi+qSpB!FTxSV}GZp;RJ5JY9T#6Sw;>k
z;0F)<kiH=&@U`3K@xZr#$0xr0IhOw78?5C7e*K+WS@PVA)J&Mns}KK}wW;xZ_4c{^
z$CrLc-k61a^`;5@`&a&(Ua%`|-=~FnUBJ6<E(atfZUZTXgUo04i4DB>PBUkoSVLn+
zDHBFjuw=tp_6l$c2hrN?>L)8#|K&FnmfjTzPdSZ=+jW^RdoJ%U`!iJ&r_fNI&$d(r
zpZ)faaW^lc#i=2)bqNnW`xf;lUCcLbI+>NLHzLwU`TdiBqO~&>7zB`Y^O-fZo-JDs
zvS3Ozd%6l4KeCFyu3gVw0g2)oTDvmjB|<Yf|L?F-Q%E&$X3D~I*tcajGfx<eeCJ*E
zAMT>8vWk*I*tfrxtOS`T<|Frhi_!a^=l=hBp0tFLA(L1%sffR?+`=FM;Ie7;>j0Fd
zYUp#vkq`o;%cyx*@{Ny9=1ZUYDm#v*k%<x(pM4rzRxW4olrkkC-dXxb3Ja2vRWh}j
zYR+)X1)hS6q0AUt#2+49#_ogNzO5yhbN-EdWbr7D^yJaAV;MhwWC;te`wW*h9A(uW
z7+ZRP2Y>iDqo>Yd`h*cw4VlBDOp+zfzeMk#soZz>4P@I|DK3&c_KTmhw`dY~-*`H|
z|JCnVblt~kS^5ymn~Rx0|0IUhkK&9o7qj}cms!2zfFGp;xo7&iSpDV-kDpO{Ib&#n
z?Hd>YSLs;<K{~&nkmCFz+`djWy}O5WRxo<>6y{7D&xrCm&RpEbtABfe#ty-C_kNoR
zVmn**bZ~h4>-=#=GuPksX(pF+(3@Yzy5}C^nKk=aeBqT$sL7|WY6wlQJ<fAmEBWS^
zFQesX0mYrW$RzXl#Sec#)6t9}dI>Viwsmg+P+Xg&Gm|7KlAJJODjU}?=aTbgvUZot
zwDE&@|E>KYeWGvY=@ocOr#WsFX?@2(VHULdF#JiI_4Kl?sHC1jn{b1}yhRdHl7&~r
zIm0^D44vS%y9HmEK{Vz`Qfq}Jj4w=!eS1j51f2-Ne(zmXnZc%8ovR0ctyLj2X&8m2
z{Ul#dfQ46#6dbca5wUW!W3zc;M>7A4+12O}Ua3)hnZ=7qqn*g3bc0HiSbiPdtErm8
zy7B#4*`WSSZdEu6gfI#Ui!Wm}SeuD$SLV$UB4Y!4CNy3Tj#LW<@0xC97y(O$nOJrN
z_&?SzLmf=qzK_`&HS8{sQ9x=aH)tYE06`66(&xca?OILtT?o_Gjqe(~(nT(LhV4en
z&BkHtDP+f1{0aMlaTx7k*<;in!Uk^9Wky*i0HlbN{_Ft1l2x%&Qd-kMe%Bsee*TaA
z?^Dac5u{ri*|6&n$^0aP$DhiH!}4g``8p4+=;cTM^&Jkp^nX~n;|NG-ZCuA=k3P)9
zf8ETvXU*owjx}uV8On??)yz5LG*-RzGQCFC>+$3VWUE@0YM~I46<mMa#Vr5x(;Vze
zP+9<K3aKAjNh*_}u&BUyz9jQ6ybkxB=XmE}4>H@sd+%)~W4^J($)l!r2t(`Y7+P0L
zaek7lC}ZA<Ls|RQo9sMVzzGwEf`nAh0sj2xZ+Ym~Pcz}v#grF%PpTeiT+c)Q`%8ZE
z&=a(Fq$nz>qOPuvp>=iM%8AIMq(G3SfMG)_NoQONiVA%#C7E~L6(qO*oz;6fVlAMc
zu$XKrMfK2H5^kFO;$mc9CwmTMIrHKxSU9DcroF9RaLIJ@^e?~7&z@XP8i7a@a>fN`
zvuW894)$b?zzt4*IhSAYAyzMWmX>aBZ{6-y&+^6lf5Pt0v{%&4Sjb3sH?MEnr*-6}
zyV?By7JAKJ?h?`EZf$3V8k2dY6p21M(pg>bC5r~J_|$Pc`jhYR;17RA`IJ)`S(OBt
zV$IU$dFbKa(_Jx^q2*cDFMolT-`v2;SDxe1C;!HQF3G$z&mpzt@BHk){)c5-Qk;J3
z6b^1*%^zNE;`0xDjpALe@YmJ5Y2Uw@r=NbFEt}W#@Na&{s_pwVjp|m2AaL^H>akpQ
z#tFRe+~4SyPKYnztyY_}I0=wp*^|FwV|FZG`R4a|@bkA)SK_c^{Xcka=}O*x>+k&b
zk*8=p(&ME`C(X)dALr-4{BQoS^j(IIoz47)PX6=9zu=K4m$Uf%vv4wfY})ibJz0U1
zSIL4?XOZW)?A^MSmsY&Xwso(v;jmz6S&=`36ah~@FHT+w=U#RV+h2Hu=5&&h;(U5i
z4h_Sr>Fw#GxVR{)JKqOAUJXV1fkMwLr4cmra#}F6jogN^J{7%Y>7tN(^jq(2ir80<
z1RZL(@dq>nx>rHcrqiNL*loeNovch*sCNRYm+nUP`J&hqA(%FO!fG?y1>_p5?$3mZ
zB=cI3&*FSn^zYXL9J*Mu=rYcuxip~NpwGVj<rhey;?hE)YA+#T^l#8-Rtc(LmHujn
zZ`-Cld#g;+l9`#e{K0x$;5TtH=wGfvn6_uzgRUcddNAXV_DS?BH8tHnjJS2YkoV=5
zSI#bK!e-dGiW7Q*CUuc%x`AiIO5O%(igpvfo_5(Xvu!OvFTlocjW*(aWUM)X$i=g2
zjPtJmzVxfpU;!{(*TmQ`hW)8!ID^R0N?g=|bvly{jc-1|FWiNkam6Q?HfTTh{`*gv
zbJd;9%D%?`Jp5O#9x|Q0q&KSlU@H}sCA1zss%IgoR4=XznN%P7g+(Ok;-y#L<@yg@
zf>Y=4zmIG;FgvyYLNj*&DGIs#_Iq&Ndxoc1Y(*eA+%0jscJbWO4D&CUOjk!Y;BeyU
zS8>7EPQLe}7f89mOkd|1DWSNAi?6tn+Om8cC&iM-ALX5{8m3JcPyOXrQ#@`QU5BRd
z{DxJy*%Ya?L}pWnWP$hOtdtyTJxJR1H`+_d$mxst@Tn6JLeSQ{o<|;giNoCz(Yc$y
zzS_$v7mcN>qYH_{w8fWm?$|zl^3&((%_^(2jjF4wjr`(D{`$8i6xB_qr@f8p(Q}zq
zeSrHP_-~5FU&!aLUd*fOw$qVzD66Wbw5N}j!yOb3p1?V$P2=#eVrnPNpyF^hOIH7a
z?T0g5aQ$tRH^0I!Uwx0P?<{$RWelk;r)BScQnG{vix)Gwu9w@U)-ZKqIcJ~pDo?-i
z9zp%4Br23KVY`@Rd!+l?=*SjRU7XL(Ht!c9gvh5rWa&z0a8rGxq#!RZiIf?-ds2WT
z>%Xog$XQ;1b4gHCT*{~kbGg1~G>+_H!^VR+NV?kF$S*0Rqpgjs494oj7@3jzHYO$c
zrFGnN>y2!G=@DMtxF6v-noR+?2AqIT?_1x|y*&2c-{Y}FF*pDF11y+U#}g}>EPU!b
z+uOnZ_Ac+i8!5;yD5Y}92(Gy9dK>|(SFNLT@ObXI<wAb>gCEf|<TO5gUWq4e*Y$p#
z1Bn!Zgb<NEw^B+(qL77`-OQBiTE6?tDul@6NP9PRiFTg)^9n|ub_H$i9mF2?)ZMz4
zYvS2|%8!qTIro!=SIIx{5$M>z85*J`{9VLc7diR`*9O`X*I!YQn`V=afn{m84TNu?
z_AtN4+b>RbA%BZVWgtC(2)}boceU+B#z2oysO?#;=h#jZrZ(=XJ`Z1x$zk@P_~;L=
zJwBjsAgRTbW|6ftgN|Hy;|@dq=4v@+I1R;#m=)hJD7|v3#>lYGnLNeVfN?2^Plnti
zgyX$FBCCkUYWuk&V3>bzG8bO;wQN*`VB{dxTT+nHA~9SYdjDMpkDSV2_W-Xh`v;U&
zlaKdwUqN9p_2Xu7@(B$T73Nb=J%LYsWIFeJ^2^M>;SMHN7Xy-!Q%+&QyxE+0?#aBr
z{(YpAVEdX^s5<E~lG|RVx!ZF(De^e?Bmc%Vr%&~M&Px4`1-hU|x{@<)`Ya2p4)FGl
zPR5TLO;LW9H(uYs+>=j0R5UOuZy)b99%a;=5AmrhX7KVWYZyL$B9%o6Ajzl+lNnZ3
z#)$Eg7&f>BI`{C<&ws>&-}x@zeDFuSy<<Nkrp)K)x~KWcFMq?2e)vnuCeLC}L4u-+
z(VTn!LQXyVY|?wS(%GA_1?&s-{T0vfo$r2^Z-3`I{P&|T(&eUkYvo2}&YDdApazEL
zx3Y2P5k}9wkUK7$$!o8zrEcs5s*3ZlHsAXE@7wwg=^@h@?4&vQq*1J0vyG$e2S^N>
zz-e=4aoU1e^t2u#ot4N$A&WnJ6IYx$lO)i0=zYHS`7iN{Uq8YttGBZLjb$_)>E*O5
zKgl^GQmov3h_PcwQIzK(rAyu91$^S>s~J+1M9|AKzxpm;|H04s!(W!MWAnSLd}li{
zQ9#3(2@J2PqJH!Q8fwbDBwSq0$L{$Qr%kLQQpf>FL0@|dE7rAe`Sn*ZZ|;1~J$*if
z?jg3cNG?3*bWS<*Oa`U)v+r<kC=dnT+B<s~F?uZZLkClkO!MxVH5_O?#2d?AVdeVG
z>^pdflDY|8cj?Le{F@I_HUA1u9XAMoR9_#Ww3Z2@>nSVBw{6Tnaq`Nz_RjmLJGzmr
zZF!6vHH@T?Na1k8qN}*``tvE2=5ns-ISx}!JD-crJeApV<}qnl0s9YjA_R1Hc2QkF
zno+}RDNH&BSBf*qf#wZtYHVZk+sj%0=2~_%wU9-Elcb_@2xpvr3YEo0-g^Rth&sI`
zd5I7SW}bH=*Uv3y$;zEfm^6W+WR`c{T*=f$r_-G-V9u~kR&8m=jubNH%;q#YpSLfB
zdzky-Y<hzURupGAK!y}%?SBao6Hj=sC&#`T9p}$}!+UI^xNKN5+^#=Gj_VN$O=z(=
zB1a4@7I+Is9RDHF<=`#%VI*|pnkfru$}TiAqPCneRG+!^WnLFKaGK*}jDBQnyU$Y;
zL_f#CJ-j2(NiKh~Xdx<+oGtz!Z`~pn03W}U15#{P4xm@f2cSI)!#ooa{Ymf_ux-By
z+h&~lbMj!d2RSy}Kf<FSLz{;9@8Ltkvs^EVHICr8tBFB_tM6A!bh_RY>Lr4Hwde=c
zM1CpLX3t?<Z9Y#u@+eImeeB)x9)%;PF`}f4XP$eRj`mja>n70n`g3g9v77gH_b{R|
z!`@UCb4KN{ud|Tu?aTSovUkY10yiz0I%fteo_&m+hkA8oUR8f0XG|;Ol{enQjI*AN
z2&7CfykQuLL;=HwH&9nu!1irBXx_ewR;rmiERR1v_5{0+^f7p7Jw*vY_0W16>gzbL
zb0dd)^O<+XSrm5cgR&YjM|QFMV5h!ipiE*=Z57*Azs2U=`#F4gKb@&0ZLJ5HGIJ75
zTU)8fKg{p{@H}n3SwPa$c932B4r^mJZv&`h@z$ozw8|<b4bA77C;rIJL){D>GK`YE
zB!lXP(J-u**4>+F?ab(vCdI$o*Ty^V?_=_`DQx@4b1YrEgPx=Nd3Q%Elc!I|Ik<t}
zKeLpheOZLe@XqT?SiW|<w^}B%ba!{r+0{i)Zyzm<@6&XskD<edk(bD4=<wmxRTZ;y
z`!>4M(AK<xXaBa0wv@+Ps<(&E&Q7|zd+Ba#Vbj*#xQP;GpSF<7%n|wuDi9s}*xAws
zBFXrfix}zbW%bs5SWbZYnx%2mdJZv&X_H2BxM?dpnp@bq@jZ&_Co#OJgFig}G%cMO
zq?_e%OA`k=dT=wnG&MHToq<EmjT8)<#I*54*t2aX&AYa;t6eaA)^ti_C!4o6QaWTL
zN4KqH#k$RG-+h?+A;s)!I!L;^jXmv>ljqGu9^T8IBb|Yb`7|c-iWyp0O<q9>4I@WT
zo=mZQ=Wa3*N~#(-Yw<MJEMHESn~3pNNhS-UCrn{XeFZC?dz?2mA3y?~2b$?Asb|jg
zv9#{o&XJxpuA8A{cO&h+83d5-Il{X4b~APMT!s%SVCUA&>}lD{*1g@#o-u*VEB?mD
zeeLXOY@)a4D7%{X(|%|l`wn$*pm{ePec8~$Rgxj~_2dbMy5S8BtF2_$=Jz<TZx`E+
z<TG=8HP8O;5w^8<g>GBL%-5~UK>->`^CH{>?H6$)tXru@tg|M~s=<2n-TaIJ9b11$
zYyrt=#}F(%EAJ1NIXv91KMm&kgA=>@I6yHK(`<q72Zxe?`R(`?KMfg%@9HR}s`#2q
zX5n*VoCUW(J;#bHMwT|i0gdX5z<N3`tWR?lH^Po?5@7=-*4Dx+03rA`0XDeYW*9MG
zyb?Ne3)k$X_k-nFx0Go6wQlZ*_gb0*LbnC%RZ(G}4RjmqkCVDjME?GMMB@h`yuJpF
z3Zwqvm{^*HevrVvZ5`ggU_3V<y}KUfN^m$%1(Yj%*mBplWyrz@BJm!;2=5cs?5q88
zW5-XF!BuSAiWH!sI#7uI%TA-caP$HmykR_F`ueZw_L~!BL%8j(Pct&LhOhtU<8=GK
zX(|#$ocEDiVZ)#J+nOeWY-z!atyM!LnVXOYBwZ-=1@B_geT2|Y2RbFSeD2G4@$|R8
z!RC%s6pu3gu%BvJ@)&81k8<HBmONhU@#@yO<%a&0(s(Y$GS?_>!wAhlKfqK&2tj^X
z9oJrW8EgOYDDO12#Uvu1j;InM6q&j*DYv)ENBc(UGT``tY)N9!>2aQmSK;I_>#WO|
zUbvgz{P}gH`Uw&lj3rJl1ai%F2(3IB;bkC~%L%I=oMDOUi364e_=g#g)wv9ym@{K@
z!?(ubLz2drn1V5Qa3G*1a>zi=fqe$CA)s2>d`wcb48rz(MW34e$KJNncH*6f_rusP
zDG(-D^lxj%(Pc{`gwrzuNuUQi-G<~Gk#7jE^?P(V5`YoAKv)p0zRLbJIbm6P*Q|~6
zae>nK&0i&*K&uLut-B~JsZBEQ?Ogo%G{?MpBw)$tAF+iP+ZHm!S=jbNAPjHO_7v$C
zXM=Xzz_POa%-W1^xw73B>8tU@+@|*JP8VW1U_|g@#2f{~=&J&t4I@UiA!!YSH~8-J
zBm(URu_VX|CSJREwTTEG_L3nOabrWZhp%{H@NG`ElzsTW*bpYx(nHQ8x_2d#5{<78
zc?m%}oiTogPmo`bkDE@B&Kkd@ARH>nOF7zp6xYCEyH!7gh3UkKn}#phPtyf16doTn
z(qvv9*>oBgd?80eX#{sp%Hr|#Kh~ztqdPtc#H5m3B*emwOEZ?tLUII1<QGzsCpg;G
zqx&lf#nGjLhF1WG3>~c5aO|F`kSE);dNUgH_xViQ)_ry0vB#Q)N+3mt(u#8WI@(EP
z?F4ViBvy$rHW(7RmG9#MNkj=6b|42e>$4~`XT)k7MWlc+Vf(X9;*UU%0c^9NIhK7$
zCZK3>kaK`fW7sAF;aRNup8@MZp<zD8pzJ`+*u!bcwqsK(aXw~Py4Hp5115F-#Sir@
z+Q*LD(WDUg8nHZt{3H^8j*ZfIzKr%rG7H<cS#o=a@>4>33}>8hJ!RGQ5$bcu$*^Oy
z{OM0lQWG2X;czxRmcmEH8ZT@N;5XXL{eGda03rU3i_#aG2s=p3B*nTCqugBbJ+>oP
z##PM&6d*Za#*Z0Drg0-J`5h-IVcU(z7m}crH)8Z*##;dVB2ERg$g-iel&2D_)O{j`
zYdD7My1H`q-r{c?S|pKS;F400)f@jLNa-Sdcp*tdKI3K<VcfAwlQQxv0IusQhBfZE
zNA}}Z&b+5@TwnMO=2Kt;5);A`ZS(1jRwI5p9xtX2e3-7QepA)4mVbjF=l$NRa6)r@
zUpK-UXL2KbT}edvmk^|}GBcoa9>3Q_oiv(GW92<Sp*1j8hz|}Q!1KefRjm>BpIjJn
z2EN$z*U$Bw7(U>bevW>E^#EZ$2RviI2TBBSfR3l%amXx3mgvuxKM>sm4(~q>d0oP?
z%#nbO8R<adkBRW(==ab1V;smh`#KJP7|=ckpgFdA0Q|9cRYG?*1iGLM`h~wMF#00K
zMFY3d$1hZaf-`Q?7EF3nAI$Fxw_h6sPp$eeRPkf3_8WDCSscZQU(=)}r6FGo76l9*
z)k7DdyC~)goec4yTZFJlHbkgj#}ZN>B48siXt4i_4dI$lZx&U|`(iSR0edDBIG|H9
zjQhU)6|TPcJT5wC4x3l5;z(~AWdvctsmFR%u_y{kx#`xM*u8!oJvxcO!jF8O(<*ke
zq3LLlf;4$DNOt<eVBUyNQm?Am#_h@k3og5bdvCje3(h+eabOF3oqB%y^Ph3S!c+M0
z#pfaqY-Mk%j<0>?W2|4X5;<rl4}S3~-g^CQ`m%;Xg(vNiv(Dp7pS^<%&O4u3qY7BJ
zek&<y8ihZj6NX_LN94&E%|w=2m)yzu^?j`0xnJ|BeqO_s4%1J+k}rJv6I^`mLi+Y?
zX2<@cfWzd4SMcR~Ze~JF9`9}3O4@qrM}q4z>D=46b$&T3H|)gKxLqn5=JDxU&*t5=
z>jQo@a*zI^`V;y59hdO#>ucyWl$|Ue%5_(r&z>z?=*~*R*3|HD@G*je5iIQ4d~j~f
z<*lC?2}bH~!uYONfTO5`BUdoB_e|K`Ok?jSu=c~h+OOfgBR~;m_cOvoVE3`N(NxJ9
zwlfoEoKJ@fzVJ#y9Di}L3LnOp$iytb8Q!B3hJX=NBl|~kCRk})3&RheN3R8xo2z)a
zWZIAJMLZoMR>uAD(I0Nr9M?azMi(nDzrQfYMe#(R;<J4;YI_5XH*gy(r%{>aC_rOf
zHct29BTlBa4+Xb572Y_1_HjvUUo^ifo{ihMfj&%dIaakrSVo3yEjkEfXvW1*J{Nw>
z@jeagp~Z(_T*%jAi+B^iu!n?Vj=pM4S`|$Z+#>j{;ff$)STeRu(u76i5K>rUgjIC<
zG+W~oZlS*tqB;CMi0i^4$r7O|`oby?xnj~zAYr-!qct5!8UR!fHNQ_e{R&EUJjWNG
zc$;Kl0jbVD1d`GrV_9(WiF7n=<c;-P$heRwtmNcV=TlplV%6)fv%j~Lvp;+x=bd>b
z$@Zgce(!BoyxU0i=xGcSd)U-`)JG(d$s%UYKaJs~8Qxs+2K(E47&2}Ob;Sh?A2poa
z@4m^qjRz4xvZR)|Ga6X8axGn1-@POVy(1C^Ph7~g^D6nyH~xczsXU6DUPSpIdJn$G
z55M|dl2b0`^H-eDTR(Y?lHy_<0m4a8TwG)rgW8Y?t}NoJE6?KD|NQ~08r#Uvcj(Dv
z89Md^hE<j^bm$<quV2agO^1-ep?>^yW>2bT*L$mYXL}107~C+K`7_6JaQi!~+SEu^
z!jMtZm@~PSqG8n(?o4RB`a?<zDjK--g4z832lvxbvw(Xqy@=IYf5qsz=W*3FA0peh
zlI_j=$x5(nz%SMld4&`e<QWDbNfr%e_RJA%TECILjJhhQ9XW+LQ--szc|Tq4O*CZ_
z6qi*pWBwWBIc>bYaxHH0FwXw)#hiEM3<{-S-Ky7F{eH7|Yh4?yu<XpTTFW#=?0Lv|
z?ZXPBF*pW1j<Ih9g~-+5k!9u0=-!E-tB8z^Ld_+!T=I%0T$}W8TYqwo$JhU1CbIBl
z5yBG>Q}qf&H4yIp^^yBL6p(X%Gy3Yu9QJ3wvKq*?`=jev+G7%CTw7$84Au-hI*X3s
z&Bl<^S98*vI|dA7Z(hCP=d?H)rO#ySKF9v(jNv29ci8s?{d^B!i%jDU$@EWGB-Z4N
zMKMrvn$z*b$mm$h^!vddQ<8sR)K-Lx6ZZf#=m3d8cE2f_n#8=M6D|M%AOJ~3K~!4%
zGJ}rxtxZ3k>j{ewE4+=soNss?a_Z$+X?JYn@%AC<YEzmm!HR=W?MQuYwjJKr%+%Ab
z;KI}AQJKurmvu=Dn#|X~awqv+txP@lCayYlJXxoNkAL<{oHWGYP`6;@uv*-7CtEgd
z<Z$~DHop5VJ6aASk#rv3hg&_3lP6UB9J-u!<EQ!134`b<9?uu<yP2vym+=d(=8nte
zuzPPOH{Nj@!;2C~BqjAH@bR0jVQ6s@wPvYj0lqNDPCbc(o8My3kxsgf9^z1YHxiI;
zmcHH;g{5UUZbt9<$$1D*LqeozJ|ej6ipx1=#yDhO4;j~G;^J$#^RkoJ)zronx7^0W
z>H>z(yns)C>_U1wdbr@mPjbqbDk?_J<%{>;fap5Pyi0H4yy?R!sh`28@3<1T4`$Dv
z>i_zL(t2=GTt1w^qLm#7+iBgo9WkhmvVsJAc5bHmz#+D8e20zOn#sh(uw0yF)7|Xd
z*T*S~=24R88EEOSY25S4Ye}ZFT>SCRaPjO0Brv355<@E;PCV};Tr_tancjA`yuX!0
zhYqsw-3>GyXpiDiMs2l7{V7A=V-It<H2P|1>*tUL{(is*=h8<P(^-a3;sM5|)B)&>
zpz){>*3~$w!dH$byv1EpatiA#Zk(l<<G?deTb!*NOFx<8e4zejTNqz-BJ6#tq<XBh
zhL8T_I}k0$BNO9@)K)&O@T#@q>6jDl@$h||V+|h{LeVM=+6@>&^|w-f%z1x09*CD?
zv~v}ARPwdD9gE-eL!OKqbL@T=4A12+xop!LtVA9ZXamQ=XkTND8FB`sKDN9Mol2t~
z@W%+9!YwGC$w+PU<}Ri%czdW3rA3Is&>aYAN+?!$5sWyIHW3n{){)fvq0H{q`=0#M
zy=$K3bKiM{qWTm0>i2%Y!m*W%ow<m??jh27CA7EqG5^#Vl-7=6>Y$@M`o#0R_R7n=
zzHv9%Y!8jww$t9x!L}`%*>k{qXH|FmL7Mj;LA{a7bxT+<y_%<<{TqM%<C7e%7{l=D
zB82E;&5Ac!w|*T5Qf1VXCjdeFw&mRUssCU{M;b)%Mrl*zdHDroQYrGP$MD7P{E~0q
zc_B$588m7xU;fsE+_12kU;pOM^km|N8$eS^&+^pI9^mnRZ0E$&ujauo-$7kT5=Zo~
za`_70S^X9}+VUAcx`8>ToQ&Jiij$Y5SC%tp#&{;rI)#$-VY10WI(r23=S*Sb_!)F<
z`v)&BeTl!m_6}KBM(_g&CyBttl@jS@k;1_dk^{|+>^;=RzFj+L+;hPA;dgDKIpVQE
zW_sAOyP58^s}VMgpG@DbRs3biQeJ*-9ckYvTbnoX(u*(g((Bt8S~rMnUk8mlcXOoe
zFx$3lq2+LAJSJm_UhTs$b!2^--t60<_1Z6?p7K(&DAQqLbm)7o)Lj*9yNf=TY8oJH
z;afrsd%7K%Wau;CEk{<g;=F0U;o?u|{AxZU8V-vV*)smR$+ZmQ)GHYSWN`^nhIOd$
z%Gm36dw5KUHVHPJ5%dQAVL9lv0%L>-Y$`-sR7U+Jdh>`&RXHGN++liUR0m;rA-}PC
z53t4RI7i}8)AeYpT7d|2673n>gbv%h|6TDMbvv8J5Y&F@j@V9a9oRk;*hKW5hH>(X
zk`!A)C_B|;%fQ|Bs*m3>977_pBnGins7m5jc@z&JK++bYN1LDXLt@r)RZY_9pyIL+
znm_GDQbgwIGNN#g5cLZuYSBdc1(Z;{S+x5=P5NX|!{}d6v3OJB5Jp=-e?XQAgMc0%
zr+Nvoxu~Z1jw@Aw|BD(*M$+d^(e3f3+fjKXsJQ~_{lzvPUQ!Wh-{%ch*{o~A4hi<Y
zB>a780*4IS-(AU$4R5lo`vFd!KAb-v$<p4spZy08(sJ-GtUlO;D{Bx=9u8StSAY;W
zjuW`JNxTe#m-nE7OI9RE0$Bt}A^ay<T{4-huX2G}s45;Zky|c5i6<U<l>I%KV2M-v
zxM0t&ZB(B=7IF9u9((F#?p#=mKyajK4L|zE_u13cOD5}5G`O6!n?U%*kxZwNt~8{9
zeu-$h=Ll~seU25ctl)v~eTm_<B}ft!79?>cB$G+pY?^dB#i0W&9Be(v!9V|omZnDP
zPn*k;)<YaPc!2${JjE+5yGV|`04I?^X0s%cNh?m6Gl#zJR(cYZR3ruI@<AkeI_U08
zgGd@7!JY*m2)1v5_bULdOV;%sQgB2NcwJu~SvN}}kwi)tkx1b9^<5^NBJ1K^cT6O{
zffL;XZEpm{;w|#k(3|N!!fuRicay%I#_m(Yk3FHF850LpEDr9*OJX~F#t>Y#ZaW3R
zOFz&f;%}OG{X9PPYay(@0WYR#Y@9*O6<1E>zT08QGdA&I=^*^nx9VmuM5+EclCM8j
z*+h7MUU=(`iamjh{J}bI8T1?bcSYQ7K!rTg2LncootTLqC2Qk*PA+5031QNUeM=?+
zCX}p0vwL;hM$;Rz0rQ$PWoOFUgcU()+(L>NpXy=WPKvjoU~g6cGuB6~Ys#==bYbc!
z=G7sJ3Hu>9f#pD!+<X}77hg2breM&9E0vwbkQ<1?urXEm7}upC`&g&7nt~1k+KRD<
z)UvP8zAd$tDsTxquBvzx`Mp&`r>2L;Z1D>0%p7Z)v(y!a!If#_%Ey8@Yxz$A3H39c
z5&3Fa1hFN^9RWRtH$?AV>pt-h=*X}?8C-bk2yVRwjY7x4fE5MbyO7k~R9=V#B*RET
z8z+^`3(O!3^|LSF<})VJ($>qg*+Y2ZJA2r^|3#WlznyurGo<p$+57ghy#MxgUfq)6
z-p}2~wgX-4UH2+4tlEZ~?xnf2i0iMrij}M1<joDckV4S7c>~v9cOBOkEaRCMm+`m1
zZ|26^Kf!{|5=7G)b{*`YCXpoJI0yvEWL^*u5(QLMl#?(5jP>*juzU3jY(C@P`RslF
zMyhla9Xp>#3WsDukm~CrlXXEr&(VFnyEn;QpSp*h(&6k_`xm-W%D{wvsUU8Gi*C7(
z3B}!H3TvtA-c9p?E+%CY%)8(!?rvDXn2I)jx_vL*9ZNashO?NRsUT5M%-d~^Y<OcS
zXU)8Zxw8^v^GezB+CE<2_$FNR3GTXMIO9&4%JwJ!LgWz8eRwBtyx+qo?)wZqg*9w^
z{YegYXCaZqNhbaGnguCT2&HR(bx9<ce9C!zqF@Y?3~QFZ%*su>>FwOhVW)wQ-FO{K
zmb}20_tufV;CAl*L<PfVo=D3}FM@Eq0uvBUlBDBRaeZCwbR>sx&9zssV#PmLyJZj3
zOirOV7}%_xq;ww8i6<1F;{vw-Rq8AwHEbEOVHKEzLQPn&tx^sr@spkmU|Mq~O?F7u
zdKM@Q$H#JfU6c@Rk1q#wq%{JF-bbv58R1m%q!PMHXxPP&^kHjHQLjKyvWh9#%&it?
zf;3+#7Zj@l?34-x<yhX$glYxPaK?zKUPO=@BIXl_3PV&8GVI43)T<{{VzXDm0!b<$
zc2L*wd?Q!%gkdYC3WS!W8LjQ)fYm3qAQaYz7JN`jh4%JXay8+ESL;G{Y`v;S*@bDB
z3b(RB)3XA*v_DM?H7JVc*{~BsHo>c{R9{2WmQUJtpq^5+Woz`60o~R(b(>MB<SYV$
z%m^Z*^0a!$Q2bDGQsF18Q;pM@!~_BkSk=WX_5>;%PL&GOW#fT9vpClAQO$VG_^ZY$
zx^zE9l+h<H^ce+Q+@gfWVZ?8<-Ao7O{<k<}@l$3~7KToAyp#IrQ=th7{M+oOWv2O2
zeY`ed<|+dF3f>bJBcSjEFP9v{ZHfG1hBpkSyeN;Ot$W$sas*dO3d^e*-B5><?PX8X
zZrZw1B=SobIeH|;i41!h8#&UQ0t6*hLm1OgOIK?%JNF&-6eW_37&DH_WFPNuYa}i6
z7%_S@<@qkVckZOUCq;Q}J%zn(?C(f1Z1^ya?r)|m75Z7E*wqeiB`B|`oKYhi$d|qB
z-nE-9nMXri75kd@l9GWF<`-8oYUFT|nND^#?xrUlyywn*8DMEuE%kNP<ckb@n|9IK
z-o=@>evyfX-e&oheH?0OV*lYzq!3hA52Joa73uDyG&k>|JMB_ZS<A>_HON#K%}vd8
z^k%52sb_diDMycX)7RO`!S-&g6*EvI3(FWavVml#lg3@m^k!TT4uggagQEvH+?|GS
zvET=o!pb_vjTl105eTyEZ)#-Up$;U1^1*eC95$HN=AE>(rx-qFJhc@?oVf5q^sV|G
zPpoXAzP5~gds@h^7(zv&o0h{JNGYhO8ODgAgJ^GQqUk_e<PF;Ys-r(U%{?YMd(Anc
zwQ!i&atDEb#qqC)8im*&&&Ln_SYZ9%k=3yr9~5)Et;FXTdtvN@$u{n(;(rF#G1312
zu=n2aa#i=C?{`f(y?1rgg$e>hQvp#WA*v}7V1WVSlEf`eZtUwD-|P3%%e`^pB#up7
z00W{UOcB-eCV_hI(mB%Uz0Nr^>;AFJDtphIk&vC_ectAS&dl0t{d!wv@3GoK$7PWt
z!k>uFme$c>WhZwKR?jfn(__x|US`?@GRO!j{mpDB2)B<@v-r~)6SBjXEtRA9%=DVB
z(IACI$5HW(+T&C^hY!|X_N$ulszALW3<M9o&2|oRE(WTiHyeF~Mw_=PqBnY50^zd~
z9|>V+BkVUKyAX{&{-}`gI(RP>*!|MK%gut9N^#zecT?N&9xra(?>?RB*50;wLS*Ju
z=^=y#3X0&bIJk2)Y4T%H?C=;ShY&iW@glCe<qpoDQb~LLE*^X0d0P8N{Qe4=nxuu(
zeF~S$s&9|l%rz9InlchvXq}`(jn>_`KPoCSOa>Fc$$by_B#QM+Pd%lBo2_Uv_e)Qc
z(qo3vX_M~g$<8yoQ<bSrTNu2PxlVW4dNQdcTo(Fjq}e097{Tw8P8Le`nYKn%ft6ip
zndyES`wF2bbH9_v7@W{$M|-;7qtf&{nfHWEPQ%gLtIWb`GuusB**;1J*kN$e-A^|@
ztWU6SxPC^r^^=d&r#8*i=nE8aZlIK4{2rNin9o~b^7?oT`9VP2W--}S-}TjSl~saC
z%v&l|%af+44B<!+LT&fx7@ffwS}=a8!CS%<8g%y`B|JTxt71yqXi{x~{TPjQX;P+E
z1#Kshp0qSxJlOs>jZU>}t^Z_*&a`>v%#piyjv(}WM+SHCIq>C)(zysqM`PG<4_m_O
z^n*r|g*nt^HSyV&q$H{2WU9!d23*^>isNva^zmT{%cD=afP@AsY^>5#DDRpM!vkkX
zLR1Egd1(%=wMm~|VeyHj1tmWNCPMk6!Kc0D;oul8$p=EGCd)9d3G+!$I(C^H*h;m&
z6VhgTfxZEfAu3pZ&A5_2(MaoidOFmV$sRHKr{@HNRu8V;*M?`0wJ8d_BP}Te<BcA{
z!usm4%-EsdF?Fp`VjTwv_6f%t)K}2T(&-?~#z4hSC%2}v8`A=SK%35&Pni_HKYF^d
z{UzOHMru1ZKwx%d#5${Yx|x_CCRN-C?U44n-e$!M%O}GlrEPmU)Ub?`K2|9f!|>ZJ
zHm#%a9gHdwubcmrgpXHhKrrti)xJLNIEjB~P$hHczOFNMU%Cg}!7rAiXK)@Zx~X{}
zrB9!9VHjaWc>SIK)o}~q2u<h(i`!S=wrheD!O$2ka2JnGr?C3#-@QZiLdnPe*9=+e
zvq!(rvF|J%Y@xxFK1@|xSq|vapvg@0OAm_lTv(cX3s7gSJov&6I?4!5O=)&w=vQce
zH43CDX<f_Sxi*gvVZaog<`owAi*)V4c6~q{G}XAZ3qw!To-Ns0bdB2+j{5AwGSh<q
zt$>qQ6e%zo8)##lHq$>GwMp%cRQGIvK5Y&S!fe4W?o3R0I`X6<T|4*DX%h6M=LK!d
zAZmJpt{pRo_;%p)qSK{fk}f~bN2LMZiWinnbRO*%)oN0&$2MV%)Qcj|_G~FCdc1-_
zw`Apw(io#?QT{=ok10<@vt4p2ew6nrNW#GAd4_F)R({ZIM~9%pHKC>}Yn5Ju^75mk
z!Mplqe)r;+2~Ce7IzM*12~DEq6+`}Jtkby?e!G{)gt0h+)UhgSk{U)mog20)EI;rI
zI5P)mZt8ys?*4dvg-?^>+U%>3GA&=>3|g&{fuF*>yT07?Mw<^Cr*KOM-E%Z<w5J4g
zyVVTK_-j7pFWtowjTl2=!pgHQ7GsE153<;?%|@>C>D4+0T@o>M#ta#(0`>4?nXP~L
z&><$np@QtLAd2vA*`OK)R+gx;x<;iZTV&ZV76vQLxDbt*EupZor4_T<$yS)BAoy(0
zF&N-ar-D!CbrOD(k%hp@Rd(>~)rRb@@ZSFsB0d>@PXcIWMkFWoqR`*@b*hROEk>S%
z-+d-p(#4S9VGPd*^S4FEXNkYRh3jO~k4b9q8<?yZW4+?zB=PAjVQJ8Y>D8I~Ll<`H
z@{9fu+1pKlE@5NB{N^+0b+i<#?~J-Dan^rcHLexfYN=O6HYrR6dII%l+FI4_b&}>+
z1<R|rt-s#qP{mtCt3NOD2{Hes7l^9BZM??2@$3pbgiaf-CrcS%T)_*2q2mtB{@q!A
zC|oJk{~=rl^y_v}B-yxhoKou_Ml;qGE}PM0=AaPi`)KX@Z_pXFs>@$c?&)#c6EXX<
zVafrTOuZRbz_|m0QeACYblLiFM34rXSNFcKKTB<Z4PWaUTyww@hDoPBHsd8T$SKYE
z%mtj~yYBz`3Vo<wYkxhm4AcTccD6oz)S3J{%x_2IS=Wcgf_~2luQC{3L*j|w-_LgG
zW31mH1H1tF9xWDcMtlir^WW#F3tnpU*|A;ob)RpIFJ0te@giM}H95{+PGLo#?L#JE
zURy}43@He#p#~2!q;J^x(6&)RlY!9dqv=+y9tH{5-=w4=_L=pLPlejH_SK71UO%CV
zgg`zP1ZxA~;nL}G>F~_XhZ(|upkNCGhoB#aPJD#s^In|5iUT2X@OcR9;F+gDJvr>1
zi3`qlO+~4&9$u5H?^=<dF(`1}EoL)q_j5Zc?B0Hz&bV-wERDdSorTNTmY$wdr7@JA
zv^07$8lA?Ip<7Q#>b*u`G^Uh2W=&%TS$tNyJ*U=XE2TvvRw4?c`{e1dWtNp3!}OI-
zj*-(ZTU%zhGcmi(C_vK5F&&M8w$CQpP#O{Ldk1ZLY#MRtvD+dcG`d)_@?n8YX-}>D
z@-b<2#;2?{OSYrP;&jCs^=_zBkA?QT;@nPI(HKhiX^n-aDW+r<3mHAmWT4UOt1tK1
zh3v`5G0d<1%&XhpZ^>VEdeX&|?BmS;9}5i{PirjHeq_tNVx9$#UclKymGOEbydJUY
znS5={HRMDiL?V4W`O{yrZ=`}R-*pQW1#u$L1g|{$FdLeZ+;+$3nOvEVh$Q)V{ZqWL
zqlsm=+{HPQN<m2We7K(H-rdFdSAU+RbLs#%zUy5cfAIsRF1d;8E}9AmIvcj|@MAAi
zHunndSavP~80b30Bfor_k?{-o;!R7P7Xl|*So^a_XfBw{m+rWR!gv%xFVFt+SL_`s
z<%@UROm%S%k!XTfAA6JyCq}q+^=hWp79m8E4X-@Q>)RT+@}}EZFr^ZZ?EUyvo_l*c
z=PtXA%jb_nN;tmfeV%y!eI_rup6eFPKp^OB*vg}izf8&O%UFHY0%8%tK<7c${_0tV
z>*jOUjhB%Z6O0VC@YDlqY0RC-J*%&!BrgiGhv$FwD7*WM_`>ZsQ&*BpBr?PsPd(1Z
z^+SB_*44}yQ;ZNvHoo>8uWvcdW!J6Z{OQ#|ihUd3;JG)qan=>LaMih!kVqQ$e#{fU
zewXnVT*HkQpNRlEkMH2I$JbLh>k@8Tc0RF4gn`aOJpS;r4A#!$&KoYHARb|4u!UzI
zew^d6vD|s<b(9yxkjWm_KlT_qd-Aw#^(~Am%R`7E-d^`4A0F-J#+z0%Yg`FJB>D7>
z=XrJ0F_vC)D+_1VB1MY*pS;DoH#Rf(vKv`({$!+-G#>bb$DetNG3PDkmdoaXgzl!@
zJo@Mh<V|0~=T}}xOb7<MkMa0JPtjjJhr2#^g-c^A&pq-u$0BvCS#>>?1u<l@msg*B
zjBOopZe6vSi50oP5bwY6H18ek=GvQXXU@115J|Sa^&&5Sa+t*{SFw0jEfUG0EpPMm
ztDi9Yk{elh;Z&qd(R^q#YoC0bnsct=wyWkNCG@uL<KYL_5udt{yRTnF1VN&&fwjMU
zf}V<5+<VJ1^5YSb!|kklXe~#m=C0MBqp~oDO!e{llaH~vEyAs<Z)Z|PE|B1(m!9VB
zeI2a4X$^D77b8T99q+uziyt3g(aPIca%L?cIkN2?o?O3ynU~zibr(%TN=fU{Ej+UJ
z6)MlZk~PcEMM_Cu$3Y(Y`O}EW7x1N<FF{Bc8EWE@pFToo`Aoii+tuX71f;rn`WL_A
zkgVdKJ8q_;AO;NZ`s0uCX^Ujl?Q1xrA{W6hAHV!GzuDcwikrW{S>uZlq}cWTi~Rb7
zy)3+D4VRo*13>+bcX;fD514+*E!?nhDpE>18n*J_uU?{j{xa@fc`m7>WU%K5zxc^x
zq$XX!AFRF%nUct48xQ{TFKI2E$v5u0mUu)sapot#U|+I=uY6%OW%*H}(P3VB<Uuwx
zrMUghFEFV*he#yFhL@h;wVh3@SalEcClrH_?ECN~o_%L07hZEWmz-G*2#)W1o3+os
z&$Nqg<%We*kW$cfd<zdf@)D)<mT}jLbCD8;dJglz{f{#;;XJ;2>t#fM)JPi-|J#GK
z6iwypYpx|P=ETCM?tg%N!)4re=dDx~I<fH8wU6-ei4j)ceg{)(3Y<9e^3%M&y^*VK
zzLRq&mpO6fqnCN^ot-RLeg~J%9pl8p-S6@E3-2>!@r_)!aJq^!kFBS8&gI;(d;t+)
zpzAP?KKKl+Ln-`P(8)T)tEY3{?blM2<HW*s4?N7C!D8;Y<7R4$a}m)5Z#=P<j~f!)
zylOSm>)bf=>N;NEdYsE|yp8jwRRNNH8(!nNx3+WkRkv`(*%OcwPVD`VCw~1d6Bk~`
z4U1<wG#=l^+Q-*ZH0x4sTeiT7GhK&x>{rh)RC^Y8-FP|qF+p;ug{K~Tj1xKIxqJ1s
zl;*`i_VVJxkFv9`fV*zHnK7k#L?T1H`ScTfcx-?hZ&}UEv2L7s{nxy@xt^uhuHvE@
zHAqtI|Kv^9{bmdEF8>_M7My`Z(zJggkFR^1apzyn=PsFz0J<7?vUcrD6wJ7o+g4ma
zJR%tEI?Ce@KSN{xh+ims6VJTrDXh8mdMeyF^U4#CvAr{g+gIPp`0_l2Nbv4+PxAiZ
zUar6CHfE1^Xx#MX3%vTtQ7&G23yWseAxUv?)7w1z`X**Cy@9JQaN<nU!B2VY={Koc
zu$)^jp9d0pTK4eh!_Sj5Z4qm(Sx8I>hI;CG{Gli5t(?tWH(g0#JVJ7)jprVDjHA@@
z`BgViRp`+8+Eb6SwJpl3)vK9Qkq0FB@WrQjcYh}<Z@PoI<DFQz?d=y?zu^#zSFU2o
znRQ4chqu1NQ!j7i%u8<I8aK|g9NEmHkH1Fs*;jJMva?A^=xaN`!w)<|Wb%dFd*c!!
z2!{Jlu=W>^(N#W^dv9G%K|DfgxPx`SdX&SdD(<}PCaMZ!km~34rygTd3*55$4$i1_
zV&R7M&+z7+Hm<&D4f7_HAVi9t@4d(iAMR)2)wgr;>{<YhZhwa-Uiyd`7k`fH7dkYy
z)o<nDN7hp@|0?df`dm^eNq^^I9{TquNKL+guimm0BqS5fJp7Y~X)m45SMOL!UMvE#
zhi88NAP16_+_UBuDhpx=26_F7N7&euV)Y$&y0LJCjjue#>pPoSe)HYTpHPfIvggAW
zS@+&<E?RjPOJ~;rf`*-Mv-bH9n0E2a+_-2OQbOnPtvvA13zVI;jC-zGKr$&w^d03F
zKVD0>@P04dH9!AMoCN15He9Q8Cs|fKr`359)yb;=;`xlLt#B*RBrPWz8IW<t)K(LV
z3Ir)STAJxeNUCdU$csl2NV?lw>FiHXUR6s`p7XNUf$nzNy89_Euc5Rs2Y^I>7cFhw
z<ds%YS&|RH$WSlMEgi%P%c&_Z0w6UqNONNgGPjtzsxrsvQVE(HTNsY!QCD5zzXz?g
zv5A3HjM~~N;?amnV|5LAIS!3I9j$cqB`B|~p*TN|L^9CRL0eZpMdj6$73R7$cGJ?{
zMP5lIl_do(jeWGVv=c2Tqq@8pAsiZ8np#Qa6jEDNMnni?DnV;g3ki`+T}>s?hya<S
zy}6nGRFs<PDso~Djh(G6^bDn_tgfLTCkjY<I@;*$8=|zbhLZd^AnEVvq^+xuqOvN=
zi}H{_VxXIrwk~puE2%2gY3v|UP)bck8i#3ZY9=9asjaRg7IkTCX{LW9LQPGzqOq-&
zp1~v)Rn-*a#t=w)JKN~!9ip_Nnv#MXKr+zVMQcYdg=JM#6gxBy5A@L7)=6$r1yyB*
z03;LrG&i>s$uFU%qQs;z?$Q`>Xl!e4VTd@j)m1)?9W5>NjR>l%tI3T=fE3+rt#sQo
zcD2*dGe}8AHKi7f1!a{~7Uw%O4))O8(n(HHIn}Q1C5H!SZfZs3mrz?->csiv5Y3G(
zB;y6tRhJVH0#d`YHnlJ$;?z`E5%*|pZKiKnP+eW)&?r;%bhOenFhY4%Erq#`uKT;%
zX?JOK<OoB3owT&~Qczk+Wr<7UP%q8R9mI>usVy%;0;!P!ni^XnuY@sGrH=ci5;UJ^
zW+ax+n3@WQMma)jV-thKsjG2(7*cflK3r>xkM{roAOJ~3K~zg#+?h7^wztqRFib^N
zolm3V!vhpo)KXdycl@KTljimw@=L3za%mhH>ZPf<gIG~HbuNuEIY{I27Gz#AW2?*D
z7&A=M@g|0&`HZWrbXJ)BI1^(`T@A5_8)urE=oyBZ+FEjB5yywyTj=N?ro5_-qCAhr
z4%)i<D6XtiG-`21r?I)YgQzP<FU~Z%aYm=H`FInFXg*_W9XUEQHZdS$)P=<vEf)H5
zCL|UXmDf<F$HMk*@=8KtVL=%+<;CFoM{`pv&M)RkW91+cFQBf*iG`4)&5MOKRm43X
zZfT)Ak)q0qGaY?Hek?TOOf_XiZY*@;OkQy%m8B;e3)94z=4Sd+5o)TdgR#){VUYB6
zw$a&}ptQ1zl6=pHJ8A9gbK*>qiZd;3o#YsCrjO?4j$oW=ZX=P@-wO%?iHPS}ai*ER
zkqFf_)voQO=n9K7o$Yk=hQyi9UJ6SqsnFs~OKTVLqH?M;#lqweEln*XV+GVzl?P&>
z8E0DP8%D($QgpYs(mgOjMO974SUA|%6^t{3j_v7j#*c-0CDeIwCOJe)V+$j3MI&;U
zwx$*a+<5KL*xA}b?=VzV*Z6U!r@h6Ag_SiF=EbyFI6!eljZ35Rg87#A?$9_BFD#?B
zqS%dvjt`St%oxvyQ^Pc!XbQx_w#Fuh#+qv4fqKS@h5f@+RO_*@!>MO98Wa6pG`E>F
zCKG)$HnkHgEThh)(TjyiVHRCt>U8fFBNA<m*lz;NH$VHuo4!6LG2W6dRqf{^BBV{A
zEh%Q`7}gj%IQuJ<o-Et4W+r<EKhu@HY=1X2W-Z680cK10Nq`KY=f8sfv!**+)Spp~
zVOO>sem2==JNP8}{zvL7OA<e;FZd6od6#tl`R(iLKbbFlM*TZ^RK0G?$$26DvZXN;
zOUU(Gh=rkOLg+dDH2#iqVHlRrW<0-}D-AsYiFALrTM9|c>#RyQ?ddnSDDxhjmY^L0
z@PK*y!O|To5b6gd-PcoT@8R-+xKM;M8}tw9dHB8W`fSG==T)LwAFo-e4@Q5d9aXEk
zObh?nU!nc0E_K(Fko0i}g>JqBO#!s~T6~;_%YdZG*88fK{=?|bK%>D&$fX;Z2!8Yr
z{1e=ew?X(50;{K^I$_x<;k>v>DO|Rhbv<gEH{3nrU>;`GqPZamix&fbkXIkRtCP{_
z(SC9<fJX@Zi#=Eem_{?cxYE#xryG=OSmt6%JOni%CX`OCt_=vIvq?x=JIl5)R2C=a
zr&gpGb#x}xg#UC>wsdE4Ww5if-7_^xt<j4=xaX-e$<`H`^s`IICAH|9AuI`7@&W(w
zvzd-M$V`Z#8GW_<<glyY>7#7aW*!YehbuFwabk4T2WQAf87xrGt-(>A4uXC!{2rq{
zZWOE^%$M%#A>$Nn*NuN6nM>>AW*`xQ#g|{0&dq)q>2wQt5f=Ydm&~ft`kB5S-mc6}
z5W{~#uJ8><|D7~1NPE8v<_W`JkaPP=^jNPI5U!AgCULK@b2=zv(hE2HZ-8$@a9;kj
zia+Uw53A5K$GT=|jMb&*y`DiB6}hem9iQ=nMRdhW|6ivdv0T<KhC?pxV5F}nt4wmG
z5F}2M4S}(Kk039EIEW`t{inKAL^F6$dQ<WUCO1X3)a^}!>5p*rrzD$3Ub^lu;Ysr!
zO+LCT)nQa+2a0b6-8ZD5*V#p$RJ{BAr;i7QYz#T*c7XO4FCUGXd<97I;P`VHOOG}k
z2LGN;bqZ1buYFfzJt^tQQre<dL(?S`BR)t!{rY0jEh><h+@x={+dl3;dFQs-hm-vx
z#81E?BW$OuFzi@LEl#0;8MfyKc@?XK%a=9S^ixnj60XHTPbMnd3XN9_mIdF$Aw5u9
zKbwEc7B$-vkVa0LjI^X=J5`_vlv49+v!4UOvL#P>zRy?*QDaWUys($?x^V0Y(v!VM
zr+?4p!SYpomL)Yi(?*5)g=fFguP#)Hq5GJoxqyDd3Mxgq9&1!y)W@X>Up{>*Y@7b%
z`Hxb2(B^a+by$9zFU#=w<=BQ)_Ug;R_Bq8#sC{ZYcr}BgLY0ez5P{#Eml{5S#sii-
zbokb21BUOw%I@`8Z2V$FAi?WdS2Nh~X@5YLr+n$fmdn}bebaVGKR(lJ<dm<Ik;SRX
zE#0`2g?E})15djleAX*K&MD9rG9*-NnPi#nUKnh<>6CQjEw_&{b~M=8itBW!T}uCl
z#6K&lwCe=f4bFm>(`4sa(Q~Tt*6MVse4RFz{*R0er%my{v+J~n=2NAE-@a7-vuNj}
z=*Xh;X$vKz7(=w8vg!w3$MEeTFkUwtvYO*>f4c3p)6Qk(6-$^tv6k-EMtTN@DIGI|
zl`F1f{_GhPGC*TXCn*Usn$P0PuAn&4PIGrZ`8CtI_R_gD95{k3o5=Mm&ZF_b5e8D$
zd#h!N+NtNVa@i%EHD?a<W=^1`p`PBs;n3BAFqwosZm*WHLtc>TZ!d<;w$%(EbcU*l
z)n5e^24dU^kT*9-6TJ0Vxf<`U!i4bb)8$q4-Z1@hNWrc_cZt)0hOl;0Ze|&#J@*Ff
zcOUew0WWfAvD&Q6ki8)%ZHD8op85^o&6IUwY+2Z9QA^c9zH~W<U4cT-?CQ;0tuAS$
z?3jlGM5MuMj1LMml)m6yQ$GPY`>Vd@xU{s}M<A?rJ>G=DQ@U|^_wC#gXgV9R^=&m9
zTgD-R3Aaz1#}N2JJ3e9{)aeP4hZrT1xcu8kO#*^5!76Q9+k^oCf$t%5u=+^}GtRr5
zWtS`<)!9f(_duw9n|wx@K5qd<BOP@1kDzvg!UPq<kB{E=O%slzUxB680Dq}SE_2VG
z4?QjPB~lq>8g?0r6Fx7t03CYu&=f*S2s|B6c<u@KV?a*fmtopE*$}J+^zi4~mD*Bu
z;ORNiYZFF4Pb{8~c--jpgfm582gyj=MxF@gJM-(*trgWkEo9Ot01+$T{EIJV;n}ko
zY(Gv%|8Qvdkw|1J%DjuOVq&a=W35BXU2-{BE<T^RbLKF6<`mkF?V~4Q%0D7qSkhbO
za~q^<+|II{@c;J>UL#n2aqdsK;2IWAiZhTihX3_v|ARA1bEulWn3d;^;n;~TZu!Qa
zvf`|<02J0t<4a%t25YXr5FsTclh5btU;7ppPAy^H<u~w+-@B7Bxlvc>YFk4fnR?-M
zEIK2HWA*hMYiMF{Bndz&l_Hf&l1wIX-q9lc@2QlcI+aQ}Z8AwpI`4v!DR<*fN~f=s
zNa^%TdiTKXuW)$E_h34rl%$gC9W$v^iWE|_u#g1fi(32f7MEo2oRk!vLwLrh1<Np_
z^aGg|n3c+`HYFcne-fUp1j+Z{*-Pj8-w>kA+$QyJ%FYZ3hU{GP7kXbUG1z-G;0x2=
zj`aBU_pVGCdhdP;<Zrj#%I+8&B6e`mWh15WJ<#7CE|tb@I7a`loER8y-5e$R1n=qw
zL{QElTP}tS1K@(&k_OyR0}7ZfUV1>&zwbNV|5DelumKobAKsRd5Cnu^ZjqQo=|1Zr
z*KJLqRp)?hhIBvl^e1h(hGlqfyf#U(>o0>}@CydtH!R;YCR<XH^kba#zdL+LQpuE?
zM<gjXZhA4d`RHC!W6ojG%rR~>5^kJTv03^AN%AYFar;e|L2?9e;&C#SB$-riQIIYz
zejJtn9^RC1_fo3ZoJu)1ASI-dNm8k#FK?Mj`7phAr#W#v$%x43va7FUa(SMTjfXPK
zxNY|~;)@8_q10k@NNu9^RIOpSMOfS;7&|<uLa5J<V718Y>e7>hSHGyZsFzoEQKa>e
z&bFZc``-G*#dR3<-Nh!P0S7mR7_rlTqee)KG=wHq%8;IaVAzMyVw6zDkLk=VeV)H~
zKiw&{dqRVUh_mv(KjG@xWpwl;C@m>)W#EjGT7M)|PQQ?EeeDmqdck;-QqtYl!m(rZ
zoajh!!>!j69a06LC)=2o3eji1Dj^v2ZS)_t7E*Y}<nT`gxLQ1Ma67N8+d{##AF*t~
zSYA&GI*uRUgSXa`Gj}D6r`Pc7O~;sY)*|XZe3rZ^=TjAb0U=WC-?E)q7cC)GlgH-6
zy?*8~&&oOU9zU>+_ut>*>npEzCg1t$Dq7lmsV<7J?va0IM_VEH{@$0ETo$LRVH-dG
z*%Ne&GVc4@H<((SB$Z#m#>f7a7f;mj$G6PnNB{gNlUID1bB5mIf!Fu4=(;srzi0-j
zp)Q{N)z8^bKf>4k=nsjvH&H&ej*r*<j2AZ^rgXwteC6I%lte~2yzNaMd}agp{K?mO
z?^i!&OZ^~s|KT^;`}{BXaDPi6aj3IQ%6)xfu!6Vk+{Ucga(^AReL&rMro;UhQaE|q
z_Y*>RI`(YH_@1t}s=ri!d)3M1J?xxQAk5iOFU=Kp5ZR_NNRydRh1(upoQ9L6>&YuL
zGLdVuqv{!RmK|KluypU?@m9t>|3R~hjC7<scamnMIlu)boo*4O<Pu&u1mqU7`eVW&
zU{8Jlrc*cyzsC8L49gRl&lCk4(rG(WyKid_!pwwQI7NVQJz?AUO&xf>ZH%cD%vyXc
zpI^C<Xlj7>e!Z4A4@UU<7cb}MKf0gJ*ciU~wOe`gpZ|rvvYCAO-qn;wM>w+WEgpL6
zO}d(oQQzFfY^_KrsF}fi_uavm;yA~5zQ->gTTgdFB8hPB(#vVv`ZmWphA5wP3IA=)
zQcg7WQ&%?3PyhLTj&#HFHDBbCSv4eknt1TR2iey+$ikJM=Z1@CBNN>`|Hv<Se|IBe
zXD{ZS+g4H_hS>4JdY*XcW5&+Ef-kPVf?Se(wEj_^eR~&|uK5;AY9)#M3i|fH#r;qG
zhJuOb@O$^(LTqH1iQ`N8NSZ>*6yL&{LcK<MwX5=3eU2E0L*+pYp#GO24f^gw*x9ha
zj6^ddeHv!QKjpgyI%5^w&IZC~&K#rBE)^aVlu4rwTCZhHklxdrUM>|OWO;lDOANu7
zpyM!Oi;|<1kW)~}6>|!CYHbUJu|5tQZBgSi-xNS9Qp8QyUc_7Pe8S{t1R*5#d$v+9
zQ%qR6hSpEkb969DB<jUN2S6+=NSlZ9^&@+Re6;{72o3i_(s;a;d1I>)M^n^JJC_?*
z6>;J8Vb;BRjFik_;RVzA_@#em>P^4L+{vZvB0~3xo#anjL2lFAv?a!)i^_0-REkBP
z`wErkcF=QTC;xW;!}LeujGH)y=YIA*cDE%MNepqptzV?P@g08fj~{U5-QQ-}Ipf$6
zyO`-CJNWCre}&I~|3{SOMiH@G%FBw0Ajv5xr8pX=bmBSOw78ZZ{mq{-GX4_2bnTUF
z{r6WWud3j^wLfH6=@Pzm=|#N0xq<8Nyo;l+-OqEI>dDE8GMwsT)BXW2y<i51NHVK*
zfR~SVI5Lx761I%eS_?L|boCWCw1A-HH~nC+)W&=FoJ6h^D!ti5AX=D*E0Li`?Nw2&
z*@$f)-WMazxF5onzSTlX7+lQ>r7CLdgdRq#eFfs4k6Vq9zD!Lz?9ZNH?Y<sNBM61i
zViOt+^k(qM%#qu}+LJK@k9|}igdrI$67?e1hiVmADiR3oyUv5MP<4n}D>ywkI#_#u
z$<kA}Om#h)H0WDBDp_dTJ_bu_g~+!tsV1$$!l&D;&V#;vWcPD$nA>Q6RF%rUjY%2d
zYO|H<@FSFM2cB3G_WUlS2#`ceC-M0k7V*MQ{(<c+L&W4T{r!UDBV)K=W;Htt7g5-G
zfYt%Yt-pUK4ZryX|FZEY@ti0L>BT&CrBZ@rt8Sxr%M1L^S9WvzAAExgXKmt@EhosU
zID@5g%XsRaw~!KoXl?-|kv@L-i+^L`-G9PGGiy1zLvZ=|lX?E9KV-|%P6h}2DH(q@
zH!hjL{r~$f7^q#y7q7m8ZO32Yrd8Lm^|_z$`tDY8ViK8K%Jny0#d~Z2l}~!c@SU%G
zj*m9)CBM9y?tRbllMlN2-nVXL%!?mz-u2f}|K5{4w_}L!{r9gb%~?ig=-S&Vm&$ZA
z*{lB)vM~KJ?CDGCA`>FyrtOw|wo8vgqrtLcNv8gp@3}J4iX=a8jS@Wd@L+@sR}n#3
zhCMWCO&HT*4C!X#m&D?gjGH-|Sv4=yQE?gHJ+quY{kQc<Ym}5p=3a3#u>-$hQ|>iP
ziMu>WNQr!wEx(A@p88)zBEk=S-o!O#By>#-{xYMk8P9gz6)`=q_<gLk`+~wk2HN@n
z!SGNYy+eZ1+!2~OdWn^d=e(Jd*t&Nr1w~~nI{Qp^9g`%7x_SAv_2jo~;qsXmThh^l
z9ua)}!UOzh-BwbmB!fecD+FDQ2RYo>#lVOpm5MWQ;sj<+sN~z@FQllnlr6jRsjjJ|
zx&Cdsdb&7t_&61|hFnRNl#gTl88i68?|%y!D<ImngNTSQJlH`~OD}z*mq=~}u~?k(
z)p1_f*Fax?AAS7}jLq-7!41E6DOZ-}aAf0~v<{4f8dJauP|oV{9hhCXNg=o*Vkb-O
z@-0u(w!TV2*venfBfZ3v?r9|v@<q|C8Uo{j9)(_*+OKa3djc*aEe2fou9Vj7%an6q
zq_OSfMXc}$W7HmIM)#_!j8W2?X=fFTE&Zisz`rF`MIDHgL8LMrF_zNoCk(wpP3O^(
zhYKsrAB-YP-x;v+Wa3W*J_nz&6B<pLE&FOR`zw~i>^|5gG`k4EFgoipQ%`!U6?y_N
z>?inx1bOrr)o{2?*s?XkxD#jcODf1uc5twvoxY)@cawMC+rphoE~7Sg1|P2fF$s~w
zxav5s?yaY<zYqE`bnIn|lq_K4gt5$=d_A>iT}fG40mq|pq=b2kub_X=`y6QP_hr)A
z-b_#5AU(aql;_8heMflonYX#>hI_cNc!Xd4%fHZGHkJvKr?KWM-yjvsC)Tx_-27rH
z^M`r$P$T_)1M~y&!tqq)C3xw0GaWr6^hWY2&d-6=04GkgGuS^wLdJ;4@~AG&WpCqg
zT3eGewsdAl@S3jmr;)Y1sd~Bp>M_eIH1tVCFeX{HrrDXAXraXfxA*APP#TWh?;$%w
zfh+9|3z{ASM;WiWc`E|e#8G;_rU00^BAh5aZ#d-WJ{2U!$g(1&5mQD|CK&2C#M95b
zN>9;oZup0riAB~koN~7JAwonzDrYQTSamUbUTI~~yfc_27BIeSH;vr`6xK}V{4qWJ
z$+i}7-wLNM7{pBG^nl|rX$Ey)OvVypP&hexh15=%!_^m6^7Q|E3_1BC+Us}o>T}Q0
zU-DgUyY@mJ*jhz?$2JbM^mF|E4cvI)1?1JYAxN?PotGe%&*h<_S1K4u4AR%vZ&bE2
zl~gllG0gtG`<T|<z%QTvfZWnb2AYo$n^n&l%jQ#4)xg<vr_it&68-(;m(@{QTgSZF
z)5zKM8XYZ%IDF_J&#irkrbL{g$N+;$HLvonR5HQ-`au?5Fq^|~@1>+PpT>qp2Ag)W
zwd?bI{;~r8?0b7jW>@69+@%q&FLxtdq=ykYCM4*K(Gz24=vj=&mU$n8MQ;Y2CkI=%
z-_gc}tZ-@V<{U61CIv|k5NU?0xT}(TMww@t9Qc<8!z<uyglWNl2HOZ5n+cp=Se?9?
zr3ZVm_IHx~Lm-?qu55Ys`;SWFXXC+x-`CMdZ@i9k&z!(V_5Box1SeWLso(Jdto{oY
zCARS=2U;N+<v_zA3oo3_fj9S1QsTtI5g{1rPf%W4ML~WJz5V^{+k1#)-zPlv##Rc;
z%IQ4bNKWZ^E?Y2$SMFcSh>;yFy5ey<4t~P@Ti)lEZ+wTdCe^X-;9(9OJje@AJWNA>
zl%nV$9bJ7i_U3TGIn!zUq=Djs2u-bBH1-#8&df6y>K;cqeRTE?1$d8$0gg5IGjrNB
zb`1<OWn2~SSu2XRoB|>jfv@UcV@#T=AZ=9>-eA}9MpPKZ$45gG`pT2QdRDqPW%W5F
z-wH3v!hIm@O?N?hl^-ei>3k}*H4y4lLt>zbJxy`uOs?iw`7BcH&A3PHId}P;Odr|8
zlW%M%B71rA!G|f$5ll@cNr@3sIA`YOEWMi657yHzqr@UXq|u6ssvme2t}veRb1O-d
z5$MdDpH0Iot)0l?C5xFeV-g>&dyuzw)RS8_hU!=cpYE!s;n)clT(|&{SICKvUSi#w
z8`*d85MyV~r{%~I3hCsN-3<_lGGXcz4t}zM&XH80CN8a;OkVGPb{uZUJ3AH0Dddd0
zA~tW{Ny07kn~xr#aMA)+T)u>9V=FkkXFEp^A0}tg`K-A3EOPTpY2EoITlXF%SviZV
zm(HiFxtYczJK45t4-GweT(x2u=gpl#qV)&|8@rh>X)Fh~Y@s(+%EWTXrfqvUu=gM{
z7cb+ACFhYp+{&IqCrHT=MvA5|J?9`#y}C*L>`fqn+sV~3l(4nibiSg;m~{8;*;u%>
zr}uYnLZo%cxC(tT@cjiMtw;*(J4#WL{e*LrD-96uy@%Q2H2P*D%$|9Bc9FU0%Z}f$
zq@j)1yI3Z9%5j&#0`Twd`fcO;Ex|XQp5`QQr1P_MFtS`60U*)Kf#W?~vf>&RpF5MD
z;|Dp~(#`PDAk{O^V`%TIyt%a=DJ4e_9Ax^!Wn8)VJPJnI*n98<NeP{8t;}AsoJD7j
zW&iG-?Am>Rv2!ls%B72#P?^vE-Mg5y_(o==_VVj@c96u$TJgdP#ukmRWyc{(>Lx+k
zKK6G+xa!)QxcZ7Eh{o-_@aD&~w>Qzy9p{SWS8@KlnT&MQv;Rl~2ahyz)+Nii?4tRI
zo<{Z^uIETY2Md>7!-cb|c=eel*je96`M60W8uoIybC^jJYS_PhGrJBPXU^hfoI9n8
z_SO?@-@JuZ_qS6sJ;#_<C!^^!aK>a5-G#x=(q7bY7>kdP_#TQICq9L(6-upDC(~PH
zR-hCzA<UI5@4o)@w#{~W8bcnIf`Gp4*ksTN!Lzs8j)XT%ac!=I%H%Nn4|j0gs#`c`
zN<NSL@(~(32LYIN?&VC%Y2uUJ^~lsP4Mz`i@ZbS@B9)|$z0cdbni0`LmM%S?cVAjZ
zW6y{Ye|<btr_b~jt+Kl8LM}ukK*aeiG~B0^03n>;EljxraPFg?G!er2tvvUvNy^_U
zjHoAJtgTu!nC4+`oTp43i*N)i9GW~>4!j7F2rT>ZUr@dEUwP>LV=nCuex!p(B8Ws?
zTq%cZ=Xd>5IIDdgA0Q$T|6WQ;CeL5W-JidX4<G&^>$lYV5mG-G?ih=}d(tMb(xc7%
zd_JXXHX%<Z=vzSg8Jgg^pfDrXTtjMBI%@rxMa37=l&SCwpD1Vm!VZk!t&)Zw$9Ml`
zpAkI^mtGZv<+!25Yj|7+Rm(}rNxv6zfY3RUmAOJT?q<x=R$%l8s$x2wWV5T%ZJJJ2
z9~vZmE0Lf+Y#+#Uo%VM{l6@W>y<fJopYT*-_Eb;ug<XMHDh2R4=k_8I=g@U3B_W#2
z`IlbLs%5kJ`QQI7JDdCQp8VEhq31pl>5j*J8dd8jC8-otkDEZGuZ5=0e)NZ0{BUl>
zb>Zqbg>;{SmBOVF=Zyf~xkm@DP*1J8IY;#qMof0{Maumg@g#thqq@<kx(d9Q!u4%c
zP)I)WdlUVlB={N(GahIKlD-v~E{a;`GBYQLG=biWiy4bZ4X*oLy0Ai51hwsGH3{<k
zKpM7W|JBFZp*EN&t!gjy(sQ?GkM0(#&$rVibsDg0C9f_@$$-55hfvkae(g;gn`rM!
zpN4`V+e>=3@(4i4@+@Imq+z&C!}Sg=tE%36gt=<`baHx*aiVKbb0%vx5L!fLxbHgq
zn5lEA%jsw5zQYWSq?DngNpPd$O<TNV=s!xr9`#CRvsQa}Q5YPMZq}DJ=mc!Tla=X@
z>aIO=v+<<~C?*-BiR1KKSpYk&_za+F3OxcU3EmILJUf{wosDKI`dDX|;Is(Awl$v*
zivzFf@Muh5kA{Eu+rtHA<T%od4WoNBeAdlS{xZ<ylagI^0I@jJXU!$j)4<-NO{Buo
zzAf4`{;FR4;L;ITDNb*6VTDPcj>uHZgz{$=IKe;^n&Hxt8;VKqb5c4UJ?Dkt(H}(f
z+(5s)IIfBzr%~@~#sNEDX(E#{@biITm4wu|5vWSU=EUo#Ph0IG(9bZ^#VeF#(Ebg^
zHaq|7t!C!JkcoPLC6FAo2bB!@X0SsqEUR}&szzOkQu}NaaUtXy;znAB{Y=l%NOa!6
zlT4-%B0?+{L#9SZN<l0dbv~yc7LTGAR-O|lM@C3`?{gCo;xXqvae+g;y2#QPlYQR~
zYThi(gP+xvHembWGK|c-)YF2Q3o(naU?d33ta`+fghY=~l9blPn8lb;H6zGuFFUx7
z)^0Z$pVJisnUX4=_|qil(O`sKc0vi!Q6~O^_(BGzrAJFTpy^Bmg_aFZ>H3C^4eR4w
znh9lst@rWgc%;XzRx8NNUnU#yR$Rb{LHfY3FdhaYtw1yO(fu??v}#Gmn4b1Rk!R5-
z<Is#Ysuu-{HtpdcE5u14H8M;xC5gsj&dNlXZ|Gm7^IkY1qW;c}4_9jZ3Gklmwdr&E
zNPok^cvo-;gZ}#rLuZGQWHO0}MxE6W-Dc95L1=tR8}|&N!)+Ua=AaM#EV1k)$EFKc
zr+S5@q_jBgTovdz3Lpd6Dc;xVAdM_src$XSDV+Kz6b9~l3{y!Yf><o(uTINUl96Nz
z5sCV>kbY^u*q|>tx&;#gz3NqM?qx8jJ!umyWZ6w?Sarh+E|@owLpydc<EnePW63x+
zZ`sAHEAQr><<r>w>28J*_&+89q(lxYZvO-By>&U4TyiN(X4kT5>mEi@9u^fDoPh5J
z;<N>vxkagbL}+ifH<xN;bLRj6AOJ~3K~&N8r&AmpHckRL)5WClcvA6T3rEIyee6JE
z7^Aaop<wGvt--OL9r%59t@umhZnAQdyAX<SHG5Jx^nTKB)9V9O6#ELqj5YpvPnO_E
z(ezKe68!ZAudg4dq(-9_(=2`fazKs0lx);0tV@+esPN(XaNN&MS5@rm+jT-IUIXUw
zr4b=qtyl$)zFMdtXhvm4ER{6XOj2XswGGXWHJf$1YNu2*SrH~v-)0pbe!|p%+S`@h
ztk@&XmoL(!SCg|aC^L1W`?x)!uzf$YCJG7kYK$A-e4qDFxTk`JQmwIiq1l1XrJZ2V
zcu=zPyZf=#<J+Ujv|pVO-4|W@g=XJ|PJ}-;tccKKmDDO{MH9Zg>As`<P{H<PsiQG$
z%0YVWvfKFb7j7lL;}D0MyNm)%;Zt~t=5fW1w^H15l%}2pzHNwLj8mrJ*}jS)jxD*l
z%kvSd=yOK8^a@Q5THH>>in->d8=(0h?foN`T&3@OwxJugWzenpysvW=GmKR&Yiw9d
zFz;D--5BKajpo|A;R@USLO=_%S%;(Y&`Dk{jM2sh-NIZ|QysG)BBYBS=RKGiD`Nt(
z@L(9v4v08QZvG-Grp4K`?|3K-DJ8QOU(XlsxQWXaokz0$DD^G9h*&;LZ@7asH($%_
zx(J(h97aaKU*(h1dtMbYS-Hj(3{&`|sc(uwE>crz<!r1<e;Pf=1+K`NDE+>BXV
zI&U`5JpU1wEL+M2g?spSB>f0;RYXLYI_E47zV(m%W?wIIxSye<B$`)9Q5<rL%E*xe
zoM>)mL<-8QYAMc((9+bzz;F_XP*zn#K`hB&B1vC&2dSJQilQTQ_6|~1Rzb3_gZ@N{
zctI((RVDOyw$a|*53xKdit~u&77|VN)7;ug5($wQl{GaK#8R}jv@n#6Q&tkEqoW5&
zgtCe<`Z_yEq{8*=OH+4JOgj^n*MtRev%dvDEi4Uxx_h4Jg-%=Wv+Ael@;~eN1MuA>
z6(WPMOBHY$Zf%&JK*DeaWMc$#0-yDPBJ6WW|KO8o7A;x<=R*i%JD}4LL8j9S!TUa~
z`gf7;j&?6Zra{~q-ZV6&rBTNnqO9;}qhu1wt1w}PxXh^Z=^*XDK4y*YOi3y|e)Yxn
z>rhFha}SNj0FBr_t0r#x<<Tz;eCg~(_dl0=5vQ`Ynu3_5t)+#*WQ?+sTsk_s%~+U}
zlvLDER+LLmdkdX?gY0_$1!^bH;*8o7*oI$Z6qQv{QJhOhYYROC2|P)cPn^%CbF28#
z%l$-i@+d9LC7xFViEdijyO9!NdBxOJmqVhDmR1L^h~-jSU4<Cwr=_*sSpX1G%Brd<
z$&b?6)=E$R5QrERH8tc%MrdwsC6R*s;!<)V5sHiR>1u7EdoTe)P*zz>Ze9tq=gy^l
z!^`Yz8wxENM5rO+7ddXdi`heH#ud~<m|c=Ve>Pq%Lx<;(Knx)L6>`rn3P&58={kvV
z)*S6*hX-JoWSIV*33|H4K2skNCNI36dv3m(k<D%V?1c^C1PMV;^I=|oemAmeF5mit
zFR|yF-=}iPom@XV&c8momYjGBA{LE85Ur@J{DC<a_WlcJQx6O($x2Nsg4&rEaq*NG
z?|ihC1N#rqJv7YBMOPB(Y@xKej`CPPvAE<nuYJHELd|rg6C}>NYy~4nH*mCLnBjq5
zdIkm=d%?~8)i-aVsHlwDQ|j2WXD^d3x|y%9S<Zy1^H?&wiY;4qQ9W-3fBfYenJ{q<
zci(d>hu(jQyalUSb9Mpm?>NCNU;7_aG=4;VYAk>F-@eJ5N#j|1<x*PqY@wrM4*&4~
ze2si!Ty@h83?AOb(Y6Fv-1Zf2yK(^&r%tCR(Z;d9Lcaa&dpNk^0}>@O`R=z?vE_r0
z=o^v#>R!6bTg5op!_*nOyRr{Ot=HS>bn0)PhTeAjq^H3cmKKK9=+SK7(GX?BbGq-L
z=k7wt(D8;HB-X(Ow+79I4B|8I8$eOpPFc_p#&_D_8OF0EcYS49r)4x=(p_1PJ5>ZH
z0Ww<@qdj35mNYWKH>SA8s$F(tMFyJGxXj!I{Yo{MHW&iOFb;JuCF5Ma>OO8;c0LoQ
zPN#UdonyU)eDe?Q;mD^SGE_XBKl$b=wtx6B)$^C}jeBln{Fw1fuFGfdp2G|b4N`mN
zMO63hWz(S+B$B$>m-4NztY*sO8C-JyboOuGPR~#hDRQ~;rrYS<x}FdBHd8VCQvUio
z_mC6G<LVn$(za_8jf45z{k3m$;mq;Onm(Dvqx<O?%HigFzsAaivpMU6#Z;wQ*?qK)
z`B&V^msefIgb8O*SDa$sffJmwVh#6w?h<CsU&#Eig3UV)v-Ga-aL>}|6pf$9^7F^>
z=_flGd+ybI{R`J|#*~>Xm_3G%UVoK_?%}{nUPi5$DKfp-Gx+LgO19&;uD-Nk&;3Uj
zJdAz@HH;<^A_x1gqSY=i%^t!2=5Z*syMSTG#u+t@F7K{QbtZq-VF?j%hqOGK4T6HA
zTy`Js;+&cxUie^l0G_*UN>6(;iC7+0W2Z1@Y@AnK+rZ_w-%Yfmo-@vz3th+9SKp>q
zid_IPUE2<m9w{N#BV9qb(}}=pkpzTf@7quCQtfKK`tAQtZsIuq^mji5qU`>36PMj}
zJJ0><DaKuSrX~oFAoqJDhx3-MpltRK$G5-BQ*V4qBobk`Wgibe^bmt7L2@LAZ`^(*
z?>+R7e0ZRruYKq1OsRRDc`GjE<8?pf>1~7j`SkA-5g;N_Vo~Q&^=K@H5P~z8TtijY
z7XI<EH<@w87g(`m7W+PoFw}93=bwIrX_4=8_Ov=Sc2=-_K@C6s%fI44_b^cb!y^&4
zG(=c<)+Bb9E~b6QN3`{i_*yl#VqGms9NBmgUM+!ZPuU5yENY(DQIfX4yOBgn<tl1h
zMI{vPgV1OIRl57zhlcUH?VftItx&0VHk>MKOTp#SlaGg4YBadSIw++Ys)NxR1?~$q
z-9f?&K0YSr!4-WA8x-AMvE&CzPX;Ef!R)6wvky}VEf~C%pZNH!RPD>&qQTUG@B@}{
zh*v0C>o(<&MGNje3jN6~eJq--iclN%;Hs@`+dfe;3>l-V)jy!5*_^3(=c|8SR)cL^
zJ}*M?r`^+Ii=hXh)Zzb5qbWjzB>%<dmXei8DO@51h(U*!)HNUT#<*eBr^&PPptyo^
z(zBNq65KAXwxmXzk4*J3ntbLUr)o0G&#U8SfA!bw>r4=hxUsMv7M^tmJBk+3wR;1t
zgAwk#ei`pR`nRm#)kG{B@sGpkS1K82`3+aou<_SC^WGso_r>4i{Mlo9_QS)JjGN7Q
z;}ZPzvHgy(MxyjK?&q1uA7xSD_c&{E6<a$KOd4Ct))!ym!|exYYU`t7(qb-|mCOA<
z`WcDpbGi4bi+N*58_O;~hnIf#x4d__lUO8z%&Xvvi_helpZpD5dusT>ckf{A`ge)O
zBkca*bspH*%m4h&7pN&LVDaS(+4$lw`Sp$j|Kt1LH2umo0O`hTODmdkv~zi02dOmf
z7b8;HfF4+#IHGhX%-fFUg!Yhth0F74&B5_~RF4Uo?u84$&O<_YN1$vU$JiRzk3QH(
zRn$=moew?Wd+izwRNfF4?`kG&ZU;MWjWD;{eR(M{C|P(y6W&n^SE8O?j8LLJ8E=8`
zjvxp?rfAr;iQ&@O*q6}dE+i=#WzI!cuwu~yn)a+`K;}|4wuYMCUjF5ko&3SK|Bz#Q
zf1mB`(vVclQ%Z2=C;b5Bi%)nVRNwP;1uGOwa;TkWAN(;d<iuHa-(PUuqBGbJq}uoM
zqksGtPP9k3>dM(3_fCQm3Z5Jp<fSKm%8NU?H335NiQ^=a&ZD;}@+mGVVBW$@8GF_+
z?fbUS-Y+Q5kJ8xNM`9>JZ~q`(@bn6rSTu$}P*PG##kg{=xxR!*w3nTmI)F+B26{-0
zNQM(5MB@?Sd3i*W{dD&w)az0h;r+Ki<{LMzU}8=kZ~bg7NmncSqs->MxObk=<<}Ri
zFcW|q5&SKWkX1X!xvY%h1$f_p)qQ^k6R<tKAk&Q61FK~zyI}XfORbpj2W#gpd?|R@
zPV40tbehv@V-0iE@S)$ARwq{u2oaElPN9?rQ$8$(#?g_nbnS`SREH^F;iusM3qFrh
z-8FS(NXLM0T*mw9gkQ+{g_%*kYx4DF;4!3#@qgOMP@@+BRDa<DQ9(+3ipu|v`i(-3
z7W=%Y&u5sB0!=D;>F0WUp7cV_gQ*pes;^cB`3ccWGA<|j<iJCq`K8mQz3#x)l`B*4
zY@JGk9!?cEmEe>t6${F@@n|z-=tD6Ri_q?SJ|?XDQt8EjVz`%Q*N!~mxp_pA{dD&Y
zIj`hMIkE7=cR%LqH!Npdyqb3(c#>o!MsdENt+S6*Dn%k?zT;E+|HmmVE@Z;I^Z8u$
znZ!E}ajb0sNt_EWyNqKSe#42L5l?XR_w_K6lnf82h{hwx{sw;h;|E!M(RuvQ4XauA
zuYb?pXfZ`))hu0p4I@O^y=xDtNDg_?BpqD?q>?FyQ%T}Ex#UGh=;|LJ(bvlmQR49^
z$OPRzgUDo(q>K=a#K?~ePW1ONG&DkA|DYymp>m|BN6o(tpYZf9@P7neg*~NH-<DmN
z`ggB~y>0FZRi%_*yz}oH#-Q1f5evhQmH3~0@`cZ&F;NvEV=ee5@xhp?+D+dzeNFL3
z82%6%q;=aKWJf4-hsxw%#Stu>=yKJIN9pHt2}GkYA|x3e@c@nR-nxf)_vJVE-~aGk
zratu=-5ni#x?v;xb~mxBImN`fBDS@6ySC@xi7A?F)|4R`#^x6l`Y3bzweHM|xNv+9
zhnxGDJGq1-8@geJAR<QC_Q|Kj$DX6D47edep9#2z?>{KvNkh|pRJ8Nn2YVRPzMuDY
zoS>?rnC^itHt#yY)t6pISLFb6Cs*?-(B0I^xXTxC!3C0YW{#zS6uUQnM9cI!Z2IT}
zD5|Enz8M**BI45!K_KXBKFqP9O0HS4giq@Gq3bx?4;-iEz(%^t*D$U3ZT|H{k7g`k
z25LHs9*oT)x)g(Oi~*JbE7;mtT|=Qx5`ItV!VCDJrf-p=;k2E~L}&122lq5sOc-9C
zwF8><AzffFf)qg?^`#c(5@sdn8LLHy-BAlbmcyUQWd?5ZAqGEy>n0-1n4|M0?Ec|l
z!2GCZ23wC|h1zFhR|64luL0beKLz1vx)FNZvnCs11-{U}WBQsoNwVn-`lXFSXaj;1
zF|)sI@u9MoN~fVJwe_0DCiTf!U-L1J4OFml#l>tq)(71u*uKAkmII&AQMQIzgYWZi
z4c*9Olux!caP<u<7<g+Jh50EqeY%rGO3>BX!T8y8nZCc1!$*&^@uRI2)OGRU2RkS#
zE2pWglfv5RESwtSq33rQb2KAo2oU+@OsOeg*QXybe&#GHN(yM(w~IZ87jS6DCK~!<
z<PEpb+tteM`a!O~?n)w`9wUc7w(UH|-bTstE0$8-Q%hmraay~EnC|<MaC1*TJN7lR
zbm1lRR17nFay9Q5dwC)9v}0cyJ-ceM!lIT}4f`ZKc6i1j(&$<Hl1b0u(Kk(-<r|~<
zdRB2gi(X#Q9O#u{m>r3;3Pl+OslJCUHUvve%IYREd&+pqY6h7zZ9F><G$5tq(mVc`
zvl8#}Pix<1MCNeOva5)9HxaF#Lxt?3XK;v*-uaX-U%8y^$$gwvo8sMir-0MfAfgqO
zRX@=D-eKBU{ZYM*CmY>hr{=FJ$R%ZjGiIF0j49(evf)Ku{AeF35v9HU0L{G#$2pSy
z>_2df5#KOXc#TBi#NoZP^^K^-0udq6-OQmA9lqT5?bt@qxS7nIIf<c;Mh+f3!O?xY
ziPcPFY;lCV(h7Ec@CFBt9HB3F43jHh<HpS#I=G+uqX#+E5#yY*=Te@V<lw&jbPXmE
z!#(UjbeyD!(ARp5hNc#_@7T}S8S|K2SH!Uc`)KJNKuW>HIp=WvqvzOouuUaHZSUS=
z$l*a>6$n1jXV{0f&E+;2mZC2UJ1A7r@Xj-sZ!prAvNcl^LecB%-U<}j!bR9qZn{_b
z?-%lPrc*9-*+}1pbh-HN$B`KJ6nKo>CDce^ezH*nt0&|9IEPnZ`rBSEv8F6c7`Sw)
z#P18lSngMnwt}xGD^E851V~@%mw}*dzIYQ|lPbsyZ?#18Q;P=Ar*-&&<PpGupEQiR
z)flI(Tw1Z+hho^1`J{N54@mc4eOo@TWrNb0E-MwA6b1U0h+YKfPiN}1cm=VvY{IiS
zU$P>ceslXP1hyt++s_cD9QAD@)zxswG`yS72YYDs?J$EbOrCXwQmY0<a)2E>_ft1*
zE>r7DsP|(bOqhK(jT@fl<Nd8bier0sk}R9VycrYeI&qkzjUA)}v>!i0PVEe4jxXWx
zp+oH7vx`JQ9rNeRAe!i8-+^PyzU+ERnl|vp=EEQ&F3k~=105VZ+60jZolQq*>>6g$
z%z4b8GmR5l-{$oVyBO%}VAp{rrq7+vq}pN{j~?PgOFR2_?;)>l8fTtSL+i1F)Hk<t
zaPNL9CZEZ;f_|QTY8?%oLx@O(j)o&N_Y4qam?MV{b71d2;x$tllRw0k&0E=b;1E4S
zNu@)>DWoPoP0j(|_F|GFU*E5-aYEO#(Ddcmi)RXYVA3adn$H=r1D))21(Yy4s?nK8
zhQ$`U(SEvLx_s*K#7mxGl%>NJ-YMwoX;<a4xa{p(KPKVnM%$W2F#gQ*xoB!JC%RKq
z<qorT?@^?L+=3DkZAUn8+^K2GYR}-TIkTwD?dOSy9_CQ{5N-8)>CK<OIWuZ`>zRky
zbhOJstjR@tPn;)|kZn~%?G9o7_Ji?@0n%sEOl|^5;b%bU3c`KT!g!4YQVU92VAp0p
zYFXFmYa0MkG7>H2+u#2#@7@0cJ~^K71F%}{6YhSw71+=XRq>_wi{uos^0qIrbjk?-
z=kNZF6Me&mL25o0Fckm0;b`GkMk7<PH7jE@=t<{!kcQawO<@lZjP|QB0kB~mG+MHE
zwo`RWeYhQMMhB^HrT(rjBf*~v`<cdM(!*%D!itj6A?XuBX#X%fO-Dj#5o7^23YK)f
zVF5@J5^WeZ@7S`C`nceO4BA3(0A3g!{#Pp~WFp_hW644r;KWV>F*A}=#MP_5$feUH
zKm7Y2)6hGtif%pTdXpS$yc=EJ@b1L?-28kP8DeNSX%v{j*o=?EgC82#y&)Do*t7_9
za%~II%rn8&`cdOrHur+Fp|rZl8s>k2D}?Q=?m>2i<Zta71(A0|N|}{FM1=DqWWR9N
z&)$XEqbkHhcNd%@&W=iUn#MQWEJN)m_ski&&hV*jH3i=VB`7Q^B+=i?a5BUQMomH)
zEJs8rE-6JO`snQ+!k^ivk>O}YnDGiNq|!f4cY2IhWu}bD<-hQiHj$kSjL%@N{~0cO
zrf=NizidAE|MvBtPBDVw(lRF&4h)gDa{LTecJ<DGI>BUhjrL@;$5&vgbDG#qpP@%D
zj7BS3vnA1=Y`Ogd-~GSZ16=`J){~^^aHpX$Jx=^Zmu$tZ?*Qnl?~4A+I#+gWQeltx
zS97P#m?`z;M(O>lzYI~o^qHZ-nfmNZbo3%dn-yu_{R-8E8NR)s#)dy;Y_8&=3s0ME
z8CxSdI~q;?)`7*eKD+g1V-bB^0KU}v`g1n5M6BTs-&R%Q^=%q88%8aUYmKIiye&&l
zD%Q4=%c}8|k;T703$|qy+!l3pA#6O-Epj~_<HIl}4w{g42T<ZMl&*}~#~0j|46Z=f
z+iM{8)BKjLnLK;8U~Q>s4yp9(tx%gjB@8)fderPfZN*F9C(Nf#wQV|Iv!+gb{&hdr
zEFrK>X{};QG#l{sf@?FLVO#zzaB=B4!f96WQnY&Cb$NtsdDwwbr%A;y<$T@>lkU-m
z-}x&<DEqN^Gwi{)0JSkKjdo2oHeO$<dYe8@Xfk(kn--^i))I;?GtFl`Z&FW`Yc>|J
z0Ux`Pu>@&T?8j>S73hFJNYzpQ{c4XlBV7cZ9`}4hwTIIh=A$OvI%?C_Ri4o4MuPBu
zBh_0O@oh<uXBK^WJi(QZt}BghJI;7G%y=a+-UOievFU5NKX~ITb?LY@x`MJaW2GU_
zz;`nqX(QD4pyLJ+nAo&#mhS~}f|YN*9FxVQI#>wVW28l@*(WSFc!Z~oF(Kkm{Rrp`
z;jPqR#%4Ub)bf=Oh^f<ONHoRj0@ahOV$8^jIxN4jV8)gF{7-M^NPQDh2;O?^pLnC8
zkbA#y7xT(H_|qT!18oV<d7LqU)gBj9aD~mHObf0$h>=(sU;oy9yng==+0!`^Vn0z9
zthk+<FFuoK1YUjkA9-zivmq=$AcA>VKc|RmZ@Z5b7fs}sfBvU@+|Z{LTlnX?e0!Dz
zoI&#{Bhl)zG=iNW4~)jp8L2^sDL?OBc4=MFD(VB8v0!4sv%6z;^;8v@{bX>$(Pyk-
zLFOc{48sC3Nbei07Sp2-xff{4rjOsA-Aq1UD*^jd4e9%2z!G|2v#?Y6?NenRDv8^W
z&4yFddpPziEY8NS+p^QF7y1F;9t9j4thBOCrx>$R8YO`SpgNzmq5_S6{UMz&@Y(Pd
z=1SRW&yE`ojFXQKIviYBXJ4_3+izIP&ZmFG18?raeiGVd#14g_owDI7G|uZe!_<Y@
zmL6M}FPj6TdRqd3`x+CKVmw{Dxyr)rgAIeP^AxD>yqMr(uo8RPJ{e;SX_OIeVK(gw
zr(2O26LG=fTfW783zB@}`~N`E)TMmo3oFT!l8UjD*!9%k@b~Ms5{*FJ>?`@Z@2}x+
zzIG=&+G7MSi6qXoU->>OD)#e7fAs)EE&(F9itqm2|6%auzv6G#?PK{}Ut+~s6G(|T
zlP6U2=U=~<?d>jo?u0w$F|8COLfj!Zn|GtCA$adzB%V+Ikq`OHzkZnE6bvT@8BW5#
z{`*?~>3i1`6&@ooCe577%t^KMHXq^Bt$P>}Im|t44zWlfRfP#Y{%|8*Ln)$pC7gHu
zd6efR*|m8iM_c-k(Ol-9a~|W%qiox_frgHLBC&ktEx3T<oCtMQ<;1O@8uN<2qDklT
z#bsyk_kaC&v?QWLhPx>pGo5j{9qc^ZLQZiFbIvGb`}Y0Fyi(3ta1NEZBW&CBDaSi{
z`SseLGj8H{$%{q&+CCC5;;aP=sLmT^(}quI?MonH`OG_K0prSZIJ9#!d+OUAk=Yr@
zJ3OqlYd+(PSlAr`#f5A1!qKV!NV~6R``@5n=s1Edp(SIfbWXYg%xW^o_j%JQJ0G-Z
zh1sN2h2P33m^DP^u$ty>Ku9{JX5!QP8m@uXDU*zY)k)ge<*h4tjlrI#0r^FQ`wl(f
z+nmZ~!WwFS_SCKixj+&(<RCH$JpCm!?uE2(8JDruatf%zk!N501pQRM<v40)zcRC!
zA^K1fMB1CG07&1*u*Wca^@jy@ENq(!r!0ssWB;rwBA1S!Pw3~QqxogbKj&<!3zF>E
zyosj%JZ4X=VAsxl42xW5&6&xGy*ubkM42{cK2yh(QonB-yACz7<u^}~S2>n*@^Vxm
zBce>6HJ_Q|%Q(1WGy59a96GZyYoEK&BE&Z_Pc~kIB?o<FDA;CW3mKP=P@xJogI<?6
zk;aD9H?Q<DcO(p+Z2Uft-VLwr@INXq&|^~uKkyzT3SVIh7lObvIUT35fh{FP<IZOB
z>}ulq-9!a6?fa0w{j*OXTFf8(>G#;Y>nPC(MDwe-{<<X$^rpy->Jl>9KV#A7xMXGp
zrGo`TJX)mWf@RlHl1vgW&O=0!yz|sA_~=<d(YW*Y%Wp1WZ%c~*%iEjB*>x3n;@@-M
z>s2jvORc4)*1p@;YAu#z%bVnV!5F+?FvRTOKp4pSnVDah$qX|g`6QV@AOu1n1h9>R
zv9Yl+@`B~vV6bIL*1qrEQt#_~_x%32r%qL!dtOT>pP36*zjyChs=ihAtvcu4_ns{j
zJcb59!rR7xHMDhWc{Yf*5;Sp+lOv1TG-Vz(Zrq5guik*x#%frbqEs%4twRG_ti#Ny
zEg0(U#;kR>;{6*J!&HsJU7z_cSUVLMY+8cf|G-8R6X5bY?#1PEM`6I$qjlmKSexR~
zJ3fQk*Uv{<GX?kF`+JyBYq0X>kKjEQ&qm{<IaoMzf*>oa$ZhtbWjMX_*Eo3c47$#=
zqoZdCY29Re=>4~#u?jfv(p#}{@nn>fdVJvC&*PGL<4~>|jhRy>!r1JGkW;x2YN~Pl
zJ)gt%%UV$!y8yrc@!K%E3Rt}1{kV1QObiScFr}qA`|@GsT*f=FeyCO&J=+P5UN`bO
zywmgJxB2=ZyhUQ=HDPmV=N}Zdbl$b6yzl<Tr1x#zXfy}O%s9Y}pP$RiA!M<T?f3y@
zuN$35=%XyD^nEX*HEOKrc$WKTB*4M$SG_J8JdVcUWwI+Voa2WPc(I-eUk|L55iFxS
zL73%k&(IJa9k!2e4<274sc>{{jlasb!=t_-bqQ@Pl_2%RJaSBV7eSF_VCUGm(05q(
z{JDI4sB_w<!-JBIRXTcK1^{b<_>l7G*ikr=Ebe^x*ou@*`hd1I%_Z=%8snTDR9ptJ
zi9lwlx4_Nk7#M%+tRfHkYlgYO^0=@-S{Q{pKK@x;e%?fsiw&4DWfG87VZ*KO!=jdY
zjBZ_u_uYIciUzpwn!9k%jjK=^PBCrvEYuie-w#*jZzi^BiiPXngHPOcIg&yH?)b<@
zaKYpTq%uBYz7d=wS_m=cJ1`nc<8_0_G8`AOjl(iTF60gACo0y&3*xYxKMT&{0*wm%
zu(-5BU)^6(oN@aA03ZNKL_t*AlJ0mBM!B1TYD_VV*H?I`&3MhrljDgH9H)krXwZ96
z)g*Pe`PPki?(t3NFT2=EOX%zCL(Rmwn2JOA^`3SB36@-bBZjtZ!hs&P59I-R4;Y--
z`V{`=Umn8{eSe?@>L)M5m2-yi<g0s-xamuS1L*A?!2GqB;_as%N58w>&tn0J2g(06
z9#o$oTr_(e33ymVn#Lmxl!ph=-P41v?rsd1t!7tuB(NCWG7A@9vK|-Aor^^are%Wn
zAH`F@co8o>^)y<SUWD3Wg1(+!Oqf0g<Er|w_i#JPwh`BFT!h0%Phq&H9SzeKVcNtN
zti5mso__pM{Pbs!;_V}yYK0*kt-87f!^49}EiHTuI`_PaBUQ68uXP$OUo{n*U)hT4
ziF2@M!T|2S|6%;%=a1vXt^4)&%Q{0BY9?Xhx>-1I>@<eD+Aw+H3QTOO!$4mzMo*fK
zmN5zT9XO8E>VEWZ=D;TBXzWkH?QY=|wCdrNGheed3^1D`dB2wN1*TPV_)o>q9HO6m
zR=^XF2v+S^JV*AQ(9=1$yhbZBS&;f`g7oD?azQD_;nQ5;x`|mv-`<F}bd7cN@*2pI
zJa#0X@8^qDm$Z!aLaFN`9bpr%Z-}YUWT;9i(t&OjNL{u5p)b*+1w6i3C@3G0&w_B0
zVkttrsHFwp%CI@SK|UDB?E8xXB7{soyFxJ|WLEYz0hX+%dUOM{S`V-{Vj>|Mz0dg+
z^c*?8t@L@mF6S_$D^~<j9XW17TeGf^L(^MJI$JfrmKgzNd>(TA&@P5Q$?A2H_7aEY
zn@CDA7WETmU{OmM4?ge{{Nm?N;Ds%FG1z?quWoO{%9Ts7bmfIOy5(iG57*$Li_XJS
z4?ciR&peGMo_ZPmd16Ou>u||stI^wb0t5X$DA%=M@q$)om%x8wJ}S;}Nn?WZ%>9zD
zydT(1Hz=FxHiTsu69gRFEE7D7H-4_!f;kDfoFXld$)byG>wQkPby1U53cj8Y@#!#N
zU$wHke{aS3#0^Yx&M&>5th@g%QD5LuNvze$<qlumT3Bl^WA$~I)c%_LdJoI-Y=PyM
zUWo%QJ%uyn1fyEc!*xsR@X%9R7=Ldku>B0s-`S4NfszQfYP{?A>+!;)kD{{#UT^_0
z_2Y5%>S=iVmv6d)%_{#%0K>`JiEqh(?zJ(7f0o9b^G-m0NO31s=ds;*`l+X|Y11<}
z+%ZVVf_EWOjGMU{SI$lGwZH#6eE-SUQ7C5nk0z-?v0#y+h=R#Ld->sS;QxH@F$_(<
z2%q`D)u@5R@X!D{yL&J=)Q$VU{Y~sT(VZ1#4(KEa?xl*F{K%n$m^5`3s*`M2Et9Md
zzj$F6ZhHUSnB2b`+Yg<AwJD5AU~G!CTt=F5>1suR3s?*d^rNf07kzzg_|89l8^=3`
zu=DvJ;jg~_LmV$m$GxAp9Zf~I+Vp9a9njlA7HH&WhF9rq9MVVLRFmUp&JzY#xWcmH
zJdXz-U=lgcUJQUc%ZY=Zr?bIdF_MM4rUKAmQ*q}-#<~j%Ng1k-lrdc4J0h^OtPc;l
z<P>_UIjs?LX}NVGm&qw0tWc@&s!#Sz_S{NiEmzB#J)})cIF0Tb=mEg@lkF_E5qiqq
znb>V@m%4&Eq^3+dM%g|bDPl*+LdOX3D%dWc{-X!STQ-*0%TJ+qZKb?9A60%SUJG$$
zyvSd?unGhryACBG3s*S#YtBc**bIs&;(Iw(47a$~QXJ^>p-pw{`;u5+^YZKYncrx6
zn7vP}&G;HA(sGJ4O<_zKZ@&C8rmVUaS1%ur7hl~D3m~-y1!Iw>SvzGz!^yf_Dxs^p
z6a9mI_~pYt#B*;Qb-Ws48|bUfSXJQWcT@F_jZ~cdPI3)$_;MG1pY2FVOlj9ke82(X
z8Anh)XSBl|)bRN=id|iN2XRfZfNkd6c^{APA~?JGGSm{!TVGSqZ4RPxV~qcRP4--)
zTVTa_UWhA;*^K8C%$YkEvzK3m|N8lRFn`e+y!YDGNKy+kY6><i8;4CVZAT$77~3)%
zGp5bQ=l|?avG#)5xa)&=pgzrVZK0|L#e&S6yc3z)6t-qO=Fgan>p%Dh_}I;>vF3(5
zapAN^SXi_yxCAYozrxm2C5@0Pj0<C%o4-gv@lAVYWFu1}gHEeCojM6(gX&2O@!?x8
zMa`IrShoB^j2b$DgQt36hs#*H;Rc*PZ5;aBj^ISE!DUxnjR|9#uzJOEG#A_Pvln(^
z<1KgL{K;dmaP6h&-uetSZ#jrnSG^aj=TFA?$<vS?--=hZ?n0NDgf-_+fi2Ww%7jMj
ze)|oyo2hv3b?Y#7<|3?F+KT5MdkiQ0Q<dlqfWGz<m~zpL*l^)|ELn2}#`f>S-jh9O
zJK2dlKmL38%|qYCtNTu2xUU<H^VZ^;b&D`#{`r_#=*9kHr(x3+&9jza!<q#!!+kh#
z_z+6<GjZYE@i5g5Xlbm$_8mJhbM<w&YQ-Fkoj3*c1BbEswYM>3IZ=hH&ddSfugT=+
zSGC@|$uUI*<lE<0g;~-Nt8&A5ggbW9k(HdHofX#+&L!lgJ|tI+e0U{ha@a^2bUt$r
zCkt6926MVc#-a8A2Y4356c53}&w&iGpMl0NOx!ZjL~)gJit~gB$x91|>~$8iR7{e8
z5VqXj`1kUB{%;_gFRB9n@GDB=9X7u^W1t45A|Zm2vbBFURDSx*Lzx?c=G0h*#g}Xu
zi!UG+i-sF(pMl0CGYQ3%DAW{o6=aw3EHIIA#xW*+0Y3T3qM2lXg|eT&7Q`e3-T?cm
z#D>mG`9uF;;6)N8oyE_ANOzYxL*!0%&i@g(8jpka4+DeYz8;L3wF1|!UxFF)7GhHM
zAPyXDLtjr9X05&vGX~$n{ZIZ1!)X}<W-Q+Q?rSimWg5<(GXeW{?#D2IV%<bscip9E
zs4_Tyct1||R%7*w^H4Aaw6sjZiGAB}s%IEU;1jGvo-6eNCR50W_xZTcJ{jXg&sk9)
z%5#hnv1$co#9wRx<FDyC{RF~hl3!T{(n)cL<^Iy2jOQe_Z|HB_mVAeC&K4bP01kJS
z+v{OW@S#JVN5(PnFbr^H=Wnq2g_rQkjt;C_SdXuL>wa_%S<GI21FmfC!1o_{1!ZH<
z+kODgKKDFc{?%KUyKFUn_RTM2PiG2Ktik)g@Rzt|Y7bs`>mUGwmiepj-dk?K#Y^TQ
zDGy-R&aHUy`OSFY#TRk9ZUM#|cnm*yZVxP~aqGuEi6fi7kJk@%YD@?Z{XKEkB?bti
zlS2I3sV{<46IXFmGnSt>%iY&CICb!C>^<HMz@l;Dbj+JE5hwR;$HBIKOqxCiQ=98>
z^7tv#))lb#SU>*!%l{ca|Jt|FS5?OD9XruIl%i?ERLq<@5oWj-dv@(c_h1Pq)L_P}
zIcOeTgYNbd*mw9C%1JF|&zpx@JAlDb3CH&DN6#=3Agx6*p{qwVVb1KCs7r>iZ`V$o
z=_{dj{6hSXKf4oO|I+`$?zVmaKvi8M=FFLi`a%hZ_U%PmS3fL(>V|PxFmD#hoyV|a
z-%+5d9y4dpMpJzeXHFc!fn%po+t`BH(<h?}LpZp1588VM*n`;ph4a%bC=+iX7c43t
zxK)-oO=qaYj*cBIB*jCj;WR<Z+?<i|WNW=3s5D}wzTQg%Qq*(m?RoE@oZZf;FHdYu
zOy*FEO6MwDCUykU`FHUiaz8vjw!US;y0c$k1X~1-=J2bmMeIsU>IkVs>?8{xrPGM^
zvSQd6TF4a(IX$E(5$V<Wk%Qx$I%$hYIharL7CrYPGIF#L`FO?n4dkz6SyY%Q#!F$#
zbBPdH%x3L$1XycPRW}B+X3s=JRf@y=_Tp4$KMHl@@S)Fq4js>Z8xOv;55@pVu?DR(
zW?^DuE!vMA!olP1*=WTY%$hq7V+upqv11qd$_ZMg&cu}QV=&Nt8v73%Lf=q1SLntx
z;ax7PimkloThK2z=mlH^n-MtHH3*#iTpiU}Cbx);v;h}6@;%pSVf;sA@GEi|57s-9
zjBC|*1a)G?AG@yOhj4)z=_@(Yi&;Ge3ucVNo;~|9oC1xLW}>;!fjvh%TtV*|CRLa@
zXD-?gY(vkm0gzzkid%5c+F|_pS09D77Gow($AXzHNC2b*9e88wE)12k{$nQ2M6+qf
z-V@z0rV6v>&&BBjJJ303p{zoiGIQoF@b~QWYLBa80@v>*DE-M;Op+A=CGbW#lG;Xm
z?A{OIk^l1*94*nU#(Y0@n?at}BB=RehLK0@ggN;5J@3P*U;h*jKJzAqQyZO!wxIPy
z(U~LX@?*#y4jBaA2A~A$+6zpw+Z|TeOkNj-lW@#@5ybV4zfA!_C?fAt5hmkL?N*W2
z%J%U9=74hJTei2vMs4Wi{4gLL!HzlHWLq)6ENAps`PY@sNt?>u4jEgyc>@r6H5k~-
z$0eY?SBu7i_($Rz;2mOtkc0!fMGO22x~kX0&y;@?DAw6n>6MSu=<bt*HNY|?eYLVz
zejC_QXGpe*Qk(#SkBgXJ$r!YfKcjvld{lC(uxl*o_;^#gNRg%f`5013fONGw@)hS+
ziXmldkj8=iX~!sLKt3-Qm}iYK$t|LPc%6=<W;AZR;~vbdI)v|h{}G%XC<`0u!ze6J
zv7K)#j#!Lqy@d@c#7mfC*#v=)i^Y!QgK+$cxGBNiKYl{v_+D}axNZ=<Y6u~@_!6MS
z76;5Vfd7QM^m*fu@v#P;DZrMw&N7Fnd_uY);8iWRvPg_WMQ+SE#VXrGdELoesH(&D
zcYXjzetAEBbEwPX8rs8UAf6@X%gkkx!_Dk@^Q}(UBB%&(0Y0n_FgKfbtXOc%ueit+
z5)yFNCMgt<mP?u8B;dhu))k46v0N@8O)X4PK(Ua-law(?U@<f_3~LOEg+jC)-yX~J
zk|s+xzev)2fs=ujRU~Ps<_dvI2{vA&WisggBvw$U<;KW}IKwB8Ed@z9PacOLFS=nX
zee7Ngn=8_C_@V+adD~e3K3crS4qx;LIov-HP7XWgm5S&R%2;WSMHe+j%Ye#$S82sr
zLDi`ImAXs6$^Fb)g}AJo{K4@NiHqDg5&xEc7;76V!bw$Mlm%SF*v1R{O7Dqm`Mvf}
z$$hLV7hY!{Ka$iVxI~kgw5Le2+1T!+<f}*`j^be>GMIpSWFZj9;uBY3{}J(^@eszS
zE_SRjC>E=Lw1iSQRsCZeoS8%OTCC`|u>N<dEBR}<sH11D$gc<|$+bEC$+wBdB4FnD
z95K!%60FQmA$8^RPM*g}hmnRlmt4&O7swF$TZLG!DD=6?a1LA<9lwIy{xAmhb+zd0
z>(db$!!AeU6~>gq&z`rynw*2=h1QP64#UL(?`JekXhlt_11C=RprLsR#uWN+>{N&Q
zBxoeFhs+{KG*|rO1gm6F5#d$!c+ZDEfmsb@9N+yb-2eEC7%G)fDq9qa1<;3fQ){s9
z#yhZLb_=Qp4&h(E`(vCLqLn~pEThgL6STh`A7$huG=dFB6pGb<r{95kHeXh&KR20L
zm5d-?SP1dCa|m+CBnJ5J#PgMaIg8x!g0Hguv;yIp#Wy>v)cLvJiOy2SO7o4trMh6s
z+3t5cD#{T{bb(~&f>~HVDb7KpXm`ExW1qX8fwkXJ-{0*BN!N;N=MoF&mUGTYm;Wp3
zj^vg9fq8Xx;9ghe-*+@#<YHP*QMML^Lc#q~&;L!|keL0y@WFuURAo&CqUQQop%CVT
zXlnOu<*pnTqa*Qhv>?pEBX`sEHMye|t3l+3l*#FxuhL?cvDTuxz7bQWOu%60DI7W3
z36mI%Y8sD;<HlgP=QIwVIOD&=6=Ec7Eszw^(mEBRt1ONlK7`&O3)=B(Xr7EQ=)uwU
z9>@f002IbHkN=`E**pCW#a2D@W?<Ye0Ek5C^)jx7frY`HfA}}}@R~+E{@fOP@Xx=F
z+ZS7G+PnqBHbtpac3m>VmCI$6t%Xffhi5iuxl}@Gfh2JcV!BTdrD+-E6iAFgS}tcl
zCTk4J<x(a?=yV1^)hN9C&KvQ|?|cg{?l_3P!4hUJS&#SJcm=9@kK@>xUf=f2i9^_S
zxDW5TdIetH{35!Cvm$P!P|6ixfra^cd=wL#fEG;^#`L-&Omw;<Kg+1?S!qbRGE$Py
zw$nks4CUv`>MHqQsTh%%LVS#$x1?R4aF?di?g_11{XaaX?w8Mxe%IN_M>&9f(#h=_
z#Kxi<u%4y-+yH&Udz@&pnQ~?fWhKCjQEdQZ1j9k;=kX$2eJen3?aLy;p&^MjM&!~S
zVieU(Ft&<2|2{0M!s3_<Grw6Nq?WQ8W8w=1$3vdlh?m|$9t@>?2UXJVbkL{fd?EpP
z@;L?RI}q(G4M&(G+*wrT4LqmBcdPhse%CNYiRWl6MFoskxC?mJA^kNg_5!pHHmSkN
z%df|E>zAVE<Pn_i8I0nghmGv2pR*qt(K`pa=hqnC^ecBp*+ctDekVGJS!onsC>YKm
zFgY1$PS4?i3?aeDb{w<N{z0_>@qti8@v7iA3j_JHe{#o_cq5nByuAFO34c!dl9l6w
zpS$%ns6}zq1bpHPe}ZMxCt%%G*I;n}uW_uu1|Rt3XR&ndbX;-$&8TnRhMmW{gCaM3
zvErsr;Nv$eNAt{OxOVkayz<H$7`6t5`U&{_U;Hg@n%aSfU)qgAlF3Gx6pe|xTo#-l
z+INY9wtDGn6`bYd^U1~l#i~IJm=?^MwE~l?+A>hB!Fd;5gDY0dL*L2Wc;vB7sG7DK
z*I#@dstd#D>Ps+i_;ozI`7KPCvjR77ya<KCc0B&bPjTpUKgP{lgDVz{M$$ADO~c3W
zgNL8T##^q%E06ya$2x{_#VvQ>#A{FD?PFb0c1z2{INg2<XL^Q_6zg#H6|3>$lh5Mn
zYc^o(o`1tYQVpbK^mcWi%XDC<R8|h<tjbO;olUiow^Ep6<ns>vjE|_oMG{ntA@*YE
z=S|?+OD!!s2U*o$=VvW4(}}e}m^j7iM+G?zE>=;ht;zdQ){SH~0z(MB!onawiZ52g
z3c|%!Run{k9>;Prag(T9Lqx_Ei$!tJx5OJUcTnuwoISW70UW(86<dXRjMzsqdull~
zNs40By<wcQzl^i>@4UZwG7ohMZr8Xril~Lm;qy~|j%ZQG1soBwdIrwxtC8V^0$tb!
z2}ssrp37h&uf4(%ePml9qnI^*t61PbuSsS^d`=cbP6zqR8gd~Pdvrk+!%B9xxxemt
zKo=eAaSF?eV(XI@#~~GR5<gCSVs~EJCB#bM8Wab}b5p**xF+669^`n9+e=DGJVw{+
zCeFs%xufyg?p~~4vl#n-vKc5=Ax+DktF#R($j%`NRdEHeL1~0-%)UmsHkz@;JXYi?
zRyg}7$K_IeYxRw-T=6CP0?LM&9*W#W3>rNg`Iom$!1*!G$rWFL;*wir0R~}rzgP<J
z^R|g^KmZhn?t)mj%eUiNmtb+@VSM_({{v=k{5U>+!#X_wl_zoEH~ty}ef^kr***BP
z4Xg3!>xTtRWdm4iv3%Wyc;&t?;zx%@<Db6tNsO)eSM&}PuyVsq=s&azNo|!D&S8=O
z&M_<{3!%Dm8s*^PFJS!@>vjOhNnd#5C_eVdo3ZPaUtuT(8m6ztr|(#c*Pi`3PB$*Y
z9UB&*Y0CMST4+P<td&U59K^~iH)2eEGd}vM-@}PFp2zm?iTM4yuSGEd8d~PzuA48z
z?qB^9&;4d6N~Jzv;xcSlG8KiU*|_fFh3N0gK22(*4(f18FfC#4u`X=5W&@I;AzZlr
zYFxMeLev()319beglT*d_ScLbHdh$w7-8p<&)&omdDt&=KrK`j3oZ-<k<d=2zRAr2
z!!l|eVis025{b+C{R~`~>XC)+qK}DQW8K4BW-{?xn*~!YLC9nx3O%d9q(<5}>N~k|
zDhv4n#W*!^QWXnB4to06>!o#%e8(t4E9G`;E#nlaUO5p28BlW*>nn>H>qsSN%7X^Q
z3T*fwaOjh25GS4hib&KT9s?3r$Q>(ip!v5XyY=A7+@gIk@0T)Ju<e7w9~5KAwblJh
z{<Rl1pS(i9Aigk0*g~{1fZ!6S;>r*-V16f#qPar1Q9CO>Ir6whT$crb#8Ua1G1{J%
zA3panE_l%danJp<oa(wD9o<jxoBzwP8Wsiqov(kge}}r;&~BS>`I^Nrg$7)9&5gKt
z$!rt|OI5%@fl4|Hgq=N%uo;9uyX%5#<YK|UHqth7c!33HqU>M#f^jAkC4jZG%O`bV
zj!~H*D7b`r>Kyr<rD!K((t=cQy8Mq93)U-{y^?d0UR=Ad5Tb~6g2>v$2xXY(Y+$d<
zSEU_@hpfm)nL|szN?^-nn4}6bXH11nOX%tDLCF@d<bqlF?VE2yez3vEL1OUqPoKoa
zcYOl?`LiFw-j^T8iGG7ItxK?Beu|&Iu-kpxhrr26m(W*=$Rr9_pV;WrA#}}nSF(Wn
zi6WV|2)jeCzlee1PQ1MPG=LN{7OudA(Y07|(IuEMp%L@u&Om|^PM<i2<0sGH<cVV#
zEY_g0VKSywpTI9(-GY~%d<ISP7ow_=0I=Bg#>?2Ub02o>+Ka(*if5mA8jCMpi<OsL
zj?-H<<4D(VW^nOsgh4KHz^6F0Z!fw!PT)+^iVf=)qpSTmhDt$_V{_&R+sBJ?1mZW#
z#fVC_dQ2Ki5~7u!;|HWz6Z2TB{!-h_z;YsyoQ(*Qpy(n>&F0TmD{{w3B}pNa&zGVN
zhf`i}mS49~Y8%}q*B20h1C})yJeL4y=fDyNG60q@dp%_v;t9693v$mhL_A{7z}|9&
zo{^?VWTG9^5Ju|&LE=JJD|Hrg8zX#B7H9dKD_%2U>77Q}H4t(*e==4`Et0XsSQ=C&
z8P7eI&#_({f6~sX&==05c5A~TrNZk<*R7Dj&*iZ|^tC*xLsQ#Tol|2uAI<Ste)k}^
zW97z|g_Dm<1NtYYyWhDZdqsFEd(An)e}Qc1>)r*VPji2UP{=~@9w{2Krg?xRw~&u$
zdD(yEBLxy4?jv?sJ+3ZBhx@Saz)_qydI$|Omtpn7$!KqDM>*A;CF_WoKUkqC-}V&c
zqLDZ`lgaC(fst~vo)R-%<a(HR;PCKyJ>X!UDmduLl`IuBLD82Vm;SSUL&&>^#KCZV
zVR>S}ee6=xo$yN0AC&uOaY}q=4ZFXEPYhJpl5J!yL&AI9ypoQ8yd8|^t<y)|!0YWT
z_|rf91KhcO9y$hwv+JgaWgG9r>YDBN-c#G%c-9Y`aZMH0UVI_8JiiGKKlUspU2qY`
zkE+KFcie`9ufK+daZRWh)r8S?Ro-D}goN2o$|}rhNIA=wT*Xc+JGsjAWAq=wU;MYf
z!-?J@00!M%r*Y!YUcC7HX8io2f5qS5_pJD>RtBy-+=IcYCXBDIM#IDwnBH#x8>NFo
zgWgk&F*vj9WgM+tj1S+u1kb&&1trf{To45}OWZUTXAZxOA3yjY`fLGDJ^4IVZMYVV
zwR)FL9vrN+s^DPv=P9Z^agcKcga@N!gp@Z}AyVR^u_X=5LK4oYXy-%irg#$@{a)lc
zBV>btxRt_0D92N)2nIorxb^M)B`Y!D9XyIzAKU)-?|3ew3LbfD863z~2e%J+OUa)|
zf#b-ExqZ?johZ0Lq%JX&F+#~w`;S~*8u6Y6F|xq+_7%4gJWARpm5j9WLUW6RCNao}
z&Cw~%<FR3!**2_ec*u<hdP9jlmpJ|Wgp<~R3R+46S$r{VOJb+pp)f{8eiVMmc+fmX
z%qw39VEGPuCs^a-$0zMYJjmou@4X*O97#uwu5YJy<kSLyxk9eUWufkI^}30E>~vyQ
z!blWfvPcyFh8sExM_GtTxC9t-V}Q@Y=Mjk$$1T4n>_|__Lvk*{MdGCWLFi6A6SU7}
zNwIIs%lPpV&ttf^4KKZQ92c*@2(<;;HPc*`$;SHgj<n1X7W9h{KK{r|5&n=Ek?s^y
zJhs3<^q0Y!>Br`f8v(wEl>>+!ryLbnb(sq!-!B}430^Nod@00jgs-Xs-m7C2uKuzS
z{Okx!=0ii3B3!X&bw16+WdZ!q>IF~ecdOl>a{n28?;rjOKYZi~Y<_DW_Uzb=Bq?Ct
znj3NZ>IVG7Km7=OWq^vz$#a)u@vI5Xj<q;{{se5@vISe;coS)3D<+Js$8hIKOj*7W
zw_LFttqWFS$@Fnfe|Y(eqytyKLO>tH#sliBX(CktFex)UIDoRvzN({GE@5DB7zbb9
zgkP`tC~mpsT_~F>Y<=qc*jXCFP$|XG&>%|XG6n~SFwlJzKYi{HKK+F+pj0ykkN@PK
zF<7=pOT!o(DlyO&NNf+Dd;K_8Tvo#N!)Fj4Mn)EF_Th;%)8pi^BY;V8==d2dTfG_s
zZF@0L%7P}<KX5AjPrn8ytt^M}jseZhIaD~4NW8JzDGo{zplmc5G)Bq*uFPcu@^U(R
zc>)&p3T2}4Stc$Tq-+x7=rarc)1xj-y?%O!FDJ9aiRL9N{G6<_yp1Olp2x?jz&|Af
zpJ8%hi&b2vy4*eRl?G@b6Be?vpir1vKz(pX5Q{I`EZay7<BK^OT<r)6>}ByJ<>K8p
z9kT}Xd=|V;QWnjST)439jJhUcL(2%ZuMA0N07KJ@x%_Uo#4O?iel8Ikx!YV4Of|I(
zDjlEdu!OTrma%85bTZ&VfU#1x4)NCH;tMBTNO2-l^H976a#`<?j2&UggS!4fu_}va
z<0rO&*fDjvGOripLW_6!!a~urq~Y_DXH-LF67Hj1)kvIpmnz%^XaAB@h@E3y?N7S^
z03ZNKL_t(a%tQS+aiSlnX~L<2DlA(x7e}{k#&C-K7bCFq=lKiT-f|9Q53il^cJ^br
zUG*%Eh>N(7e;@FV*FOkp=|j1Lfz!INiaeF~&!8qyR?Jz{ou=Hz!iDd7PQe5DkvJbj
z<dtgqxEFauj_(cQA?0|e07rHd>ap?NcVXG|X4Ie)-~9VmQ9Wt`KKhxDp>E&HxaZ!_
z<Jeoj#CINj5f;Fz>+i;@!Czq8KRym?dhnA+Ud5ds{{w93FQ5RiKu*8=&1bOp_-SnY
zm#+e_Xjyd^n%8#V#hoWnNHiX1&b$TIZ|@4}GXAvQS{yXo$a*a`(bPALMyanG!zqA5
zHAdGa=<ex9QdNtl#?dh8FuJ?CP%hM<+LVwMYG8*3P*q!tzMdY~qzX+<jY!Ia=;-W5
z*%}n8Yf)EB(c3@d4LN$;WZe3Z&mw*8JNUO}cLd}voTjD`fBL2Wh2Gyhhh2NOV$)06
zt{q@uiZvM9*nqySPV^6^ShD^qESNnPS1lclKmV_Pg=2kXMF2s*$UD2ATlyh;giaC_
z92?q)L&6WcYQm%uJY#0_iU3o3_~b3gS2%*0Ka?$>=LK=)^a-yK9#R$-eMEe61xFb8
z5*#Z&(#@;BSduZyJ~oe=8#SCJt{W4EBDV-R0n&z%qzD8+SAr#wUFv3`a3G*_MN3{e
zg>_VgoIEli(e(<fT450O3+pA$au_c}aw7&OGrhx1YXf<-o)GZ>WbI5$=?Zq_%L3OR
z#?Gd@?9aQ^XB#6wUOr9(zwmq^lCZ-j^yyq^kvv{LH?^22gf`TH7dT>Y1x3DXe!-b9
zUZQ;Ir?&yxAr4Xo&4JamqcFO@8r@x;7%HWK91)%jteThEM|~bk@1B+YPRSkmubev)
zuonCXgfAF<CmX;Taw>dL8p@@3f}BF#&b02?yKFbi!1`iX%dDKu`v<2q#y||7N$aZf
z=&z87`8{sl)PfWAM}0+`KsMoXFbV1#Mq^ZM75ck7(K9d%lN8Z3b}Xt283w}xy*Sg|
z2Wu^A>PMr>4x+bzC>t#)VoYNrstXp~9UT}brGENCbsZY2EjoJ!!JBDLtn6pEG+zF@
zBb;J7fW`JNB?T3n;aHKs)H%Y$F_OwB8o=5VOE=tx72|vG<3~23W1tl2=j1jCCQg}-
zv7@Rn*mD{Oj<&%H2jEY90TWthqPd|OLw%h%bmRnv(|p1P5Ri|SF}GD86a(HFgW|#C
zDfM=PSf`LKrt<G$Oz`E(8F48>5{WEK2RS_)K7J+X0y9T8Y#L2m^gb5&;oPc_f**<f
z%6PX?SX9dBBYmYxrDBROq<IC2#HW$06_D`=xa7`z(qwW8eeCSTTh@r{kIGo*`V%2J
zJCQF+JlyEoUwJFfLC!mCoB}k~;VxkqHx=KDqKteXyAHq_u|cF4m3%rjzR9^kj`RZ=
zV4<&j7ZFK-Fmm(D?}qX*B(P&M=Y)Y6Wdu3IxRGLyZg=}J0y|5&V>pBh0#vXOtR6?j
ze5FEEV4N@P!Vr^km;f$~dxjcuJNtN87)#Y@+}O@po*Rw!EfF(%2VCSB>BD{umRtEc
zh;IcSC=8Ix3FMEYpOr<3u#>z)ko&T3XT4bAOwMc!@^;LdLWso$5#_WHQ*UfSq!M@g
z;t*cHus(?Kel9mjTJpu6mLuk0T2D{5N>39TGdM!-uP{SyKL0^HcPjhYEjq$LA>2g=
zWmqas9QjD&>w}HXr?hirr(!EDnGp1NX`1?e@p&XU1rmkHR*>M?WU5@Som<;jf{DI&
zF#`oveq0(`5r>>7R_q}lI3J|q_Lh-<E7?U~SF-qoU%8DuqGJTUSgF0l3}E47GV6R3
z+Q;OsOTRjsEO88Va?RT(k1@9&c`juOK}aAA%W)Mn`GkaUu3TXNW<x|`AizA=PZS<H
zISQPF9MMEc?+9R;SDcC^n94gkLRsXKQ_RN|_lZOs#*5M)UNw;*seNUe$9#hGdL+qG
z8&)acLmiu8o4ilPeOUUsw|TAr3i3!LEahM4BeA(gl1<y1Z8{<s2R<LOL8aJ~dMi7L
zoD$Q2WO;=XkA$@-a?S~3vGB_0fnX6+p^B(fTu4qCsoh9ph1ohczPOg}EATkSIYUmR
zSd3PF6$T0mu*&(@D*J}9sfG?6!RWe6_P2n)qh1)-JpPNcLZC*}krb12WzJzh#+S-7
zIVG6Ba5>hkT87r<dYn4hhT(FGaZ?vy{kqj?t|{Y0TL;R8daPQx9KCHvF<7SYj0$2{
zz$A58chyx`ws0<*YAsH+cc5G@V)><4V$HHem^o!Ux=)`%-(U$A7L8NR!^O*H;P~NV
z7&cW{vT8jh*Yx9L#{d>yxE`~bQXD(g>A?WaOTZc9|6@Ew@*sU<Z6U_;m^vJ(orU}2
zWbn>~EGXHm@Xsx|=j}LPgSG}xU!=<u>%djE)MJtD6`_g&4MIJUr7)wn<JoWW373iq
zDIY+j3QsoPgZ;b#GOj)8>~ou8Jxrd}6dn>!b~*+s8%I0`VdTYZZtt!XHzIz#3KS>~
zSNe~3M~q_wl1lqiKDkx21x2>ub9^g<FmWX2H8zZ+APpXv=acdn(bzcq8&#l@UIygH
zte2<;ee4PD@$AUsGl>QE6`H_Qu$91yi%RVeB(J<?Jg5wY=O{zItP_BmU-+&ej<vc?
zE{^>eCP)5~kF03x?d$o)iVHH#4EuqIO)id^i$rr!EQd)dCzlO%68L$pl=jgv=i|*|
zsdeU`|Fgl@bVRt&*@b-E+fC|6xIrdzEhoKC5-k(^Unm2Q7ahy%67x~$alLy7b9%lo
z!5BP=y5_lzI8yTl-_Ml0&Y4WsaY}aCl}oa}T;xIQ(yAiGS31lkqLMN)_XX9KF-HAf
zD&Gc0FqY(~01|;$Y%#<+$wV>8dJ7pnT)ewWc50rw0GD078k0sF96jC+Yl@h$U>Vk|
zT!z-M)i{2<4P~dhEFP`3DAqM$)w)Ztbm0sPbeu$2-;lFef|*M$!Uf}j11Gxy`D`SB
z#1jiy)aMGrV7dRYTu08PYh-I3=m46}+lW8@$n~f)z|0FS!1&rKB#pE2$A9>7jIXW2
zO&|RnHeS$zQdJY)clX_B9ThmCbTk8)!Z>{7Be$cvP=!n0^GST<`sFaT8n=J&0~lXp
z(Y)Yt{Ml#j!k8+9v<$4h;RE=i|MY2`*HVYHP=_1u{uJ)L?JCqaOvK%v{Uf|@;}Z7;
zc@!M9x|S<zQVOC_ATqOFUP)?d1sdq1m9rjC9|QtNxl^W|QS*rixwhUeR3K2&5He<C
zv30E&i!`8%)^gG4lQy;TI$KYIEFUKyc_T;?viCLFM$Tb42dQ}>^bkb}lQmRZe=9;H
zGfntmg5eYrC4A60y13&43|bIJ2tGvy$wt~-DGd%g#>@x;56Ft9Bl7mweO3sHcYq2W
zpQKG>R7y5vVFKJ;<0!~Z$ijjLU_z8TLf#gUFd@4eAr#MT)V$K?L;1C*OZ~NP1DQ`#
zL8jG|amAF7lh#PRxo`NM5{ACS#eFsj<E><bxK$3qRZMIx@7M^PMU9rnNXD`#syQCn
z-Vm6@i_eVvRKX3+AJ%H0;aC#jnsA3*7VzN2S>aIFAZ{10hg#Tay(}zD^=RC1$36H@
zpZO#%n&09v&fje!e$_Z-I!nG3zOCBkv6Qi_^9%&XJ9Ly?f<E4v1DPnESZHAf;e*1*
z{n(z1C9ex5jLbVcz7A+oNcVFBD!JLG>6|Xaqv=+;FLO~qPg-{6+rmaR(npe}1!~95
z#OMF;(`YPMTy)DPaLdZ6NUBERlFKf^sOlnCzw49u=(URhAqx|}#l{bQ0XHt0h_qo2
z{_KxGjiy5O>sEEmv+>E#{s}&G-7=I@@_!a2@#IU&86lj7`z{qQ0SJRDyomXXLF@cQ
z=-Klco_OL3eD`1O!=au5%viD>HAi2<j~;pm5By>;uDEnDlTHNu@O0YOhG(C89AEkR
z{aAhV^=PUq0K+|a^~KHj{sRx8q4ffcZ%BZ;Nw{!^!7qMw7|Sl04Fh1X=LFh|<FRVx
z8VnuZj{adWcvcB<PzaV`yNP1Rl7Hp5r#%k}Mkho0F!o#6v|<xV2HWU9SY1CW3TjtT
za{vXLXJE2tFRU;bWPt@ezsM{;*;uW!Fb^k4U+-#z`CCqc^cVx#Fkmlh`PV`M|1fSo
zzOHqq6V_%y=f7mzvSwOVD!6mCS56o8K4>TPG?Bp!D)_l2l5`l0`NW1=(1%IL`veJN
z*-*J88%sK(Adr{6j|ehn-xIp{IXNasFW3;bNa7dqqvsU;{hZF8Oh?>a_?PliAtqFN
z5}ke1xZ+8w#tB=>rpX~J?37Ieo^(E&U&oF0c=5P;Jf4%eD58@{2-bavC&jg~Zbw7n
znrYzlVB3W{`(*6xA$&t(<6?>8)C&CQKZ*~wgM7xCe8Gr0S$^T(!XCoyrO%Pvfvbi#
zvH?f2ei<@#<ELXrZ3musX*bThU_Ppn1od^bNDS)?(NB&~e|ZNp!do#rgU9k#a*ift
zG}KNwz7>j!<+buNe@)mdoKwXCw3<>^{-FQ}18dvp4FC-QEuQ53**QH9`ECv$5x>f<
z14UwS8J?FhulhS5WDIR?3;-BAaRHk9x8Mgq`5Aun#2dJ3;~ESPbm6{leic7{<Wb!J
z<m*^`{yfk3axQBvnkO`3`)e=bnHOIJ>c^s{U|~%)Hr#vzwrqY5)_p8h&;hKq*_QzO
zz>=%COm1CWhXhmM{EinMMfdcL`0`)<B|dxi^=Pa%7&~qfdOJ>G*cf!RokBzN1Qha^
zl4!Y=2^~XiCtw@Lp{}M1wyFVZ)?b0UZ@&pgx4(ju-DNaSKMxblQ9SX~Tex7w5>zDy
zrreKTZ|lU}?^}yEUf-#6tlXubTT1!5%pJK(mVHSy;WJzaQ6PCIr*)ew9I3DaSO^!a
zek<7^`_f(|2bAN{m-<@YM68g5Rfbx4MVy=@5;JLljQT@rc+!z8A65TTaQeLClCaCr
zawkxcb7XbpMKSW(2xmxR)f`Gz%gM{o34%h%m)}97`K;1|Ahd2QTDe+lG=3f^dgtVJ
zG05WxViwp@@bY5I>T7`aJDSV5@g(8;Rkv?uz1+Om$gcRsF72vV@$654uazqY63;-*
zZOKtY^~z*y`OcX!%KK;>>$3^m?R?&u^}a;tib{(XSZr4Vic^g``Lgjo$U7a@V@dsd
zPs3OX1YBSG%j5eNMz@EMosu>C6XQy|&9c9bL5G(Sw=_LwUS8>A#>*dWg3xup!%cAt
z5l3Rj#P4eQ*vaO~^Q2@^nv;A>_=^E(WyXo0IL1xucktvbG4em^%X-n=`CFBwDbRmv
zKX!CA;Le*bLv6!E-1MH?uxddo5*kD3%y(ggj)a|$5vZ6Y><tM;1)%%@q4%$B(99i~
zIcT?x(Pa$$A_bZMdFvakFXf7igDEfRbdkf^igAJ?+#P4`wJPTl?pEnY^9nx@J7mlq
z;xEW=^n7wN#pfL(urk3=#BUxa16onXc1-0Q0|T8%8YW|0T{T)~OvmK$;{cl?wH8VB
z7~H&J86MyC5-i25jP1l=^Anq}{)3;zm;U5q*!%J?aB9F{$_1BVQPo~NyS<Ir7Cs+4
zzD#^zkt4`NCczK`c(UhrYyxXBaB3I6{NKNbFaO=wF*xgbyl34E3=a0ASX~2SEsE7O
zC=CsSp&NOvTL1v6Yf%^;Lb*&IYDh4tbu!uxA3*O=8S_?LjHap@%$!n($>*=Zw1xt#
z0gh~W9-B5jgKZ~zbF-4(WG}YA;p?#82`HiY7_dwRlBGPa<;JI=%C7N1oW!%BwgD`L
zMpi^ivV!bIqzYkMmFVc!5T`35n%vHpCn{~5oG*)Fh`y~ow@n{uAIdMcy_r!N^XQHi
zC{Ym^k|1O|9+L_Y(p&i$VW^TdA2#<&FaW)_Hj~B4?}<+#7TVQy75?B8b}R;jtgNro
zlZZi*t0E_+0=`BsjT<K<EW%H_f&ZB-s}r{T>vtz51mV~|K?pYW8N_<|y_}*#shB{7
z91^cwTN+yaCuMdA7j0DB8jq3W39q7jJ~amb4JvNGfr>nmHIUsKmE_F)j9g6?Jqkx~
zznt7K{!6_TCr})KchP7ODQ~rslnN_HjIwz1#j6#*;&m}%KB&;5_<_P#%ukqDisL-y
zN>?9`8e1;11P{9T3dI6S13mcFOV4AtaV9R9-GcJ)5W=q#g_sX&PPVVdB5(!Y2he?m
z8QGtMI~pR!JvIV6A1mTFcmerrBlCz={s_;xJQ-A<T#yi!WXNDKrkaMhk-T8uAitF?
zO)4`6@@`lx=J>FJQ&?Z!E-PZWV5V{9{KS!=E**#8z`wtI8ejO+KgOlgs&TlhH=C(Y
zhnw!X7lkbk;%9Fh^PXyLiWC5hMVi*)y|-<^PrmuL_-|kO2ehoY0W-&s#e3iPZuE8y
zW9^Fh7(Z<RW=|TO!#yGQ;=<ICL6Bz%0PCs(|1nq5&$@4*AKN+$b0&{PUw0?E`i4<e
zEaK3vEf_a<C1y^VfK_W3VfULmkpeJD0S#js(bzZ!qZ{hs_M6y<9;qV6G&W=1wb$Ux
zj^CoAZy08{2QO`Y3V;8%U&FNZH)GDENm#dR3f|mt2x<Rm9PFyak_9b*1qQng;)f4B
zj{cIBN;$*ucB~{I_H*%2q$*bADoa1BLzu~31twv>UFwTrxl=&nS{ZEllGcV3hW2vy
z3F{uegP@2`JlyF~(Ob*mc7INu+$+LSjuG$g9b28_ao84m6(z|?5{_K`5}v$CFN2?b
zj90fTpA2%26OnYpc$s*?Ejcl^^?kX&3daCnfQeZieuT(8lFnlFF7usx5&3x}iIEo1
ziUx9rDF48F{0OjC%qTE)@-e+@y_B#}X3qP1@H!Vy1cT=r<9|{pw(wA~3-%c+2DF9~
zfx5e4!{H2%33yvrt9wS)(Y(|X1&}e?FV(Hk02D8;*I&p4PH>VNAg+b_BY4lY5H=Uj
zy2W@+)?^{?$oXvO@Fv4U@dflTqJJ9W@eCi6Nr!sz>NAhw@fWwDdgv4$dHM}3zHm8e
z3*@xHJbpfnsmv2}o%9KZk+&E9t=Ujy=!JL=GoiELs?=klc?T}v2hIQycSc`}azBkh
z`OrxABti|#rjSST^MxENsXg!W{36_dV&7w1hX>WWvGX}$gX=aO!f&4W3BLK=`>?k!
z#n#u}K%rQJEAIFtmL)szz|(J`rncJau;hx{@b2~Jx%m>*H#DH8n7||n>gz|Ls!+lU
zk3WR{-3jVys*x0{v!5Wfywhv}6n$OdKCkM{9#qh@<*8Gxq5rF3Hnrn$&AYC^=(=hQ
zo_-bId+8uLdk^8h&9m`=Puz=vQ?KD)pL-jLZN#CIeYo|*pT$7gpzqW+{Nq3V2wj#9
zlO>8l>}gB!dmsHMy4w%oE8qDk2Fhg|*t-{fgJlf0?ZQu9IE0lKErmU~4L^F|Asp%&
z#-VZ}R*h@M@k0mDJDly@4R*KT;PJl9Z)N3Ag`7vhK>s9&Xy;cV;+7I54sz$tU6%o5
zByKE&-+fTmgrm8}7FIlH86<d>Pz|Z!yjv2Td+(yp%V2bZmE$RbqgYjkNQBnmNI~m#
z3s>!gJE3S`L|da}VJ-9e5^+Gv>fnr!r^hiYGy|pdZAE=uTHf0}mi!4@B3k1}m|+n`
zgVZ-aju6Z*q==(ca9c@Q1P?R0kvdaC08O38FvneDRSSV(ZULk_3Fcr2QokW9%|J<t
z0P+_AD&~!{s}=rg{Cg>oA@$tOghg0bxPDQ;AYNds87(0jLE}#no`CvELy3y~o;%`U
zYoRSBFeEN;e#7n=@PVI1a<^Eji5(A!a}=}WPhMXm<cMVQu%P$xE53V_BoK7RzC&|+
zyJfFU;E0^;xq#682Hx40ejtj85F_#@Zfar6CG_=|fT5Gva(Dn&ti1rQ|MF)TC>u2f
zV?{-Rhzl_ZLlZnxE%5c&2OKLG7Nj%hWqK_Rk#h)m!W(j4!rcX3o<H0s1`R6Z7PwjB
zd+$?L$josAKfZ8F89Wj!!}Adr@YEtYBp+Fq(c~}m2##htVcwGu;;8s{a!CL2{_E!T
z#j2^s@=I^T+W8Z4YTxVlx2LwDs&+h9ub7Wfy`%BT-@gFIwmgk*{^WUB08NvoV`}Lz
zj0KXUAOHG|f5Tlj-i4*c;)Nf58++P%QEqz*zp*Kr7Xee+y0G(DCyK<2&Wzdj#Kk^(
zLB^AY%n_$cNVzXv1d>7li7~Kg8Raz1&`1x(`lIfXR_#X`}4eB0U-rBVrbX$%U*
zA`$>8^@Ty9n823POtz$uJ#1=#v|P^YpA?X|#FeJ0Ghl+m0I9njo2x1!&cr)7f<zEF
za^P^{g8HIQ+18hgB7khzUq})x=HiE~d_6Wcb<wLYdd6nNG9r|&!o-@)Jmm<s(S#r@
z5R!!=Jw2UOK^ziyWtq-?aI&v_(0mzzS9B@y-p7%@9!ZMOq<vP2DOqw=2mUuNFA?k6
z#?7f9NLnWDu;}LcMUv$^n~RGTQB1{thAuWzgqL#jgnmR*REU+c3oI^$J4R{($QRr!
z;9K`bCBCS@a=yrJ$2lK9jz-Xhg$1O&5%_)uaL6lTQs%g5BpX=xZ{=5VOH5$>ch@Po
zMPX2hZ~c4c?@5xNP%tQ$%19+lj^jg{$_2%_xKfCH$c3T1#JN0vi_oxR`#F5f1}2va
zd;KpLjgq_K3&x0g@zZ?O@*I_0#r-DNP85l8JA2`y{yu_~QGO!H3P}M;Vvv?gD5u$+
zg<=s2;ggn8Dwkb3RDiJ-<<!CvDT;+6i~-8!694?C0gOpdFhHryzv4h~Zf4J$Zxt~p
z!-(@z=<sw%v^22{9_IyyU+9%Z&XFpUXfC-|B7|^NS81GZ6(kbsIms913kzT5$rU5Z
zq?1d2=PM^^Wyf}NNm)t$vn3%;yt>RHeWlzZjDnnxARZg)mT^RuF!`wG^Avfo|4=az
z+d0V>&hSOYh;q6MerF7EAE1-+w!uh6z&RHNZlur$l`f)iWb4s-EbuSC3*`YAaSpj#
zn48y7LXdu<vC79MNo#WpzOyY_lqwanfT&mf^%%;bUNL{no%010LRc$vWTm;0w`E?D
z_;B&~i1{w>isby?y;oc`VgbWEVnsaA$T2&klAdQtmbpUUERcrW5Xz8`8ETuKFFt{+
zmny`va0t1h8TI^g;Nwb#OfL7hMzP|g@<QDYEVxQMCPZ_p$pUW(3tw3f0#KPmwgKx@
zX|qGw-;HA;*P?QS=fOqm+3Z|tEUA9DK#eZ6&T@qO5ucB_Cag7--GaVJmK2GAxga3E
zxA1m=drB%(Ou4x0;;YRRrQs5WX;;k{{&GeGXGG12Ar!IOGHIIT6iCxD(z3h%9TY#t
z%}t3WJm?meTEPV^xj+)<gtK_#Skc+(DTzZruc2k3FG;1ul|B4wohRmT&K1kXD96fj
zmD7#HvfM9xPRA!qh})}VD|y)1TP6Wje8!3h%|tm_@(I%TeqN_cI^WtD@K=ubIgC$a
zUPog*>8d$}pZj^091c?iK0tqSq{#CzE+VE-!45wD=pX&ehP+P6H1k10D48>O;?cKD
z`MD-9JIx`87~CUROb{s>7gIWkQW8^SE%9_ZDf~m60PlZ{vSS@`RQKA4o3-M*zNniJ
zT!sqy0N6%s6Wz}88<%i+FpZ1AQ^tYS=Rh%P!?xnDm?OL<6Jma5c}};}dTAey7AApi
zCTL6aCOI%W@1jyA8}V1RwZUH+-!dIrc@Y7&nP|D<jbu|RD%VM?su$Rh{=};LKHhHD
zqFgQ`mAYHeModkEaPh2TO!{x2RGxQ3zq2fpTx&=Ha=F6y(LTdGYqUXR-cS?pI9BF3
z`q$~>7j@q5qHq(1la_&6isH|6goHxpg;2h%nPr`cM||6$y%jl7I;Vn5<OP=#(sCKY
z!^0@KyMfu17S^U19v*fdez93SR2t6iG7>(*NLKuh+NF-aZCXaDR6=QZn0My9tl58s
zvE!S+=*gcWhz$!Fth6mmWX^#r4Lon%yKwJ^Zo$fBi&5Qw3Wra1fp?~;Kwv@@UZBjt
z^JNWIuD=X}r;eh(<Zgx96f;)bfJ<jv>^giVh+udRmt(Mjj6X#Qkh;@z5nK@HLz0&t
zTaE`S#*j`5F>X#%Ur|tEm7g4wnpN7#l5$%b&J&U7r@k0im_0XsuyYa?F-|YJ!iqHY
zc=0#-iycyHqL|16jt2D49~j-nxuQ5vh=%qDpLg6!u0Ww4&P)3Fpc4rq&$n)STgYeS
zUxX_@BgPRn7DTMu+Pr*TmOOsa=RDlPPNA)&f8}ojg=6viaAD=6#_(VH6RXmuIb9?c
zjw6ia0vw>25Pju)Xe=t-QT2ZB=OTSw3_|QH2>U5HBXLQw6xA)JvoMa_;nd?9pUfPD
z96zL|m_z%Gh<({e9F*8(;#Hj`ZqQheS*YkYA`p!;xb_Xrg#dWhL{d#9AMvLJZs7P2
zWpfgEE@mnCz2X!PAgfPp(_#azx$y>|Z9mTRm)*6zJ_@g#+~TiyY5gpAm~o8vdu>}Q
zu1PycJz{*8b!P5{8pjXh-g7bJi$I^)*k}CiEQG%b4p6dFj0L{V3JFXF(Hd7cN?+Fg
zr1K0Hkvz706xQoP3%TsG4qV@m&rpRYakT)7V<zLaJMY4^8!y9x)&}g`a{z;BhE>C)
z^YEF^+=Js=o=4|U0n-<+$A{kcZmhZRd~_V%gVVi3jI(9O9W(iSeDH(s#}#XrqW{ER
z9PjAI<nz|!!*|_^_3Kt+RH+SnkDh@^9G=eU6W*PG9yB66(OU+XJ_|Q^001BWNkl<Z
zCC$AdsA`=!XE{c7ZpSmPzKvTx^?59wGzz9zi*Ze(kpQS3)r2v1Rj}5gZge9WMm1nc
z>r_l^8U+BT9o2{_Q>S6_gt166$j~x%2Hx|&yRc~9984bH2m_0%x-n>~v*_v`^a@xD
zj2=4yQ(Gsat~!CW7RB08XliW2<knV<8(o(pvds8aohQSS(Q=1uP8By{5Oy%JTcp%g
zuV{UMTAtkM4mmkk2Vox?-SU4z95s{=K6#B+kO2VUjs=gC1w{q%vSFB^{Vz&{`05px
zer-d;Dful~0HJ}E?(V&%uOu;g90XWV|L~l~DV`tw=89|K3pr5Ht#u52gs#&nkBY0D
zFBy^}kUQeJdZ#2}FE@-Vl)Qksw)xvcer)e!R<&32h%sdUXs85u#}2)?wjTc|W@0WG
zH*&K48EQ-8MG*GOeeNLrCy%#~O%aNZDxce=-v}hNd;!M3a1Rv<bt8OZl?dsf28N&q
z%ZqcaoP1M==|em44y4dLj5lkMRMnzs%qYMDwGEAEs4W84qN*m#9g`=GLosoQxmbgV
zlUva;zR5ikZDEn1v3Vk<wNAn4x+?b|d;#MoPR7)h38+f4x$7Dl(bzN=t*ukgP+Rm-
z+t@r2lUgQY$%V@?VN{i{P~{`W8>A)Q@e;*>9ofGHy0R<xS97DHy(@7vz0%lo!J`}z
z;yjH0<qIkyJl|^{HCEp0w?NM4@`4+$u#7|@yBZ^ptr0bi-{n)hMLHPn>Nj`1<UCJ0
zjD2ta7C(RTX*4Xo3AbJ}6KM*VVm&t8dK=cST!H3Vi$d)feC+o>f}PL(9IqW8h0lHH
zI-NI7Hm^;KxZ@L_K)U}mJiDzEANlwP&|u0K?mvTHz4$DidSwT0`_$(!wWiFx>m)12
za<wi4oHD-p_355hA`qW8aj!d09K@Ti{uY}qx&@bC*ov)hG~$mwcpm=ZOaF}dSN$F?
z>VFbnd-P3Qdi&>a<>U@*Kh%ZNsqOf;Cw`5UmtBVoCfB3T(2Ta<{1gv9e*{a`ti!ZP
z6R~pbIy7wi6`p*d1NGxtvHr?yP`~Rmp5Jm9sV!s5;;Zn<_pU`pR|)k)2k@1z{{YPw
z-HJcHZYrL8<0vke*MzVB)mL$>udL+H)MQ5_a`IcdMm)SNcOI;xaw1G{8|8#yKjA!7
z1jd|KKFgqX5P1MEdr-c)ubJcb;bDaQCKzAtu7%G6P%KdKKmle&jF`Zotk99JS$B;e
zy5-=Zf`<l^^O3_UIAj75XmVT%d=0$M=ANe`+dF0Cs+fObP!N_M2mIP!6Sfg}=992t
z8wmXLvyyBzze04MX!NQ}UCH(`a~dt5zbzu{Ef*1<qeEonD%P@oaxDJ6LsrOhh&b<Y
zu`HjKfo#o10^^y<qgK@tZ|`rMY;w0*WRjAO$yK^YTq1s*EcB^3$&;B|aqQ&ufrb76
zjO0+6sN7Usjqyn@Z-@ttui4xNbI6@mb<0<^6)=eYEP~t3@pEdL^74o|JnlYOhS(tY
z0bUEBY2I3V>WXIk^;aLnsvAFyX@`D-@4wQHJ3sa*OhgBU272-6!w+HKX^Wfh{umaI
zHyA3^;_Y8PghyW7iA9&*g7;iHAMK|*aCGNucx2P-ShV4GynD?wbeF2pv->%G``@3#
z#kb#!E1OT_?T%_pDICUEzxiW~UAz$=zIhee&J1Jm%w}wMm8rk=Y#ln1yA0>ZyzL?E
zrDH+cR>lSUpUOiHV)?u&-@?@#scy>$d4#V?+yytfe+>lZSZ-luJVU$=xj`0EzNXM~
zDNYjd8i}D~<}iPVah0D|;0u`pb2&P~pT-!JdfM>%oBe2=Ivr_g5dDKC0Kl{*mtyA7
z+j#x(jBMz%gtoRbND_l2u{hO!#tSSt(qc5`PbuIVulyQs_Z`B=?)v{QbzBv8?B9mL
z?n#(Fdjf{~d%@q3X1#rrFk13Oh6pqnTo%3><RU<(JOh&$bhLM(sksRzDWa~n8h}Ni
zss`1??Dra~Mvq3@wwLhT?|ujO{rol9w2Y%i4x?vq7`E7it2eAc0bTg{kM6_v{fF?#
z13$!Lo8QE+HRw394Vzxr22)i80HoytuDJdxY}@nz{_ZQ^#Nd=wSTel<Np&?k54?p3
z?!OQFF%1hQ)dkk)q^9+c-_FPx85|SR5t!h}d1%g{pfcRgpE6o5KZ(Mh4awWoYEb{P
zX<RIV2M}{hCD>>4GH{M@q)hV&=a+?*9GVg`osjL+PH3pJ)JlvYVi4g?2r(4&&BN>}
z7aPK3jaYLz+xd2s;Dl{yBsot07KKP$M$eW1>ym@76GDHa<Vb}q?;jZnHoYd33*V4B
z`c-rUYfeIjeak>9C8iQmi0g(jj6`3VLFg`C`|?A|1CmE#miOH}rU0aGDNl*%9>kH*
zQ|ay!Ifr-P{}HY*Y7RaZ;}u`nx&gh9k2J1fOhQb(f6C%1B7<(jTt)s_NnRPxI_9Bb
zh`|ID7ZRtq!cQhx05F9jYOAyF*eX`nqN<nxRiiO?#(3=C`fEJ==o2{7-ixu*mg36A
zO?d2~f5&6L+>UiutViS6@wn!yRe1LPf5zAT@mqN8xi^8DaoBkI5<KyPZ{Z)l^<&Ij
zbvY(C7Ex7KgMF`W#`o@f7&WcuV?upBF57Sk-g@ds_^0nZfP<$8*{==%N6`y~j|Inc
z-oF%O$9yhqs|sI!EXmtU@=ZQ0D*BmG=j5$SAnK<=PU0i&o1v@{m+U_)#~?c^EO;)@
zS&C<5-eb6EC`9xoz^xj~Dh^ZF2pO&Le>xh*PsepP-GW8q%h+}F464UW#?2cpz{3we
zjnrh%@TcVx_Uv!N6*s*L*RP$2U3(5g-QM-F-Hn|`Q><UT6iZjE#PkV`sID#oFqpYy
zEpEN>Y78FPj?PlTvF#qW%6<plS|+8O22gRv1)YW8CJlf8(sfGH6wMRI;`FK0epL)>
zQ7B|z7XkpL)Q<y4P9n7_O63x&M^D2?@4f;1w!DVtUflvytQO!3Vw0rlPFq_<V_gAl
zXF4(1*MrW!GDg+c04SrgvkT=k#o$ni>MG$35y^$^yz(Z}P$nC>NDhlU>Ac>FeE}x^
z61HKHr-TyY3Nn-rzRO#6zKs!m-3eu4>u$SN&=sn!Xk_I15#$aAkgG+K^IW@!a~Utx
zId|p{qIeO4{y}c>bA>jF;E*#PB_<(z?Dl8GNE%a7T+19EJFIw>w~M&-u3fb<jCMq?
z<N+clVrz%|_%uz0o91Wn$w~+BPURERNJ3S<6-M&Ncn)G7XLrZ5sT0Z>m%GB4wCfeS
z7>*epO7PGhD+@3ZUX^<>7iNph%pXlC0*D{8zlCBUJKcX2-}=^lD7G%ey`TRaR?nV*
z+PX%JZXAcTS6+>Ei(9aB=RTlNgSuiFr`o$wDh;83U=T=ZP+u&et*aLUT^$%e6>6%A
zKstzyjy|NNVU%q4m00yP2|ByG(BIdCuAY9ak>HWMp4vy0O2LlGIUPBJj*RSxM_?<-
z|IzbQBUl^l?@s1k#~$ZVyI|DP=;15oAeApE=V@6?&_4G#QKYrZ!+zIT=BKbS1kiDK
z8~*9<{}SJQ`V?-x?m|qMvlgr7PQu1p--pGsrsA6GuR>$}RNT654F2c;{A>L0zj*-H
z-uW(6nM~%SP=#W_6=O*W_kZJSD79XQ6*HQ!|JX6~^$h_?@z%2s<4a%q-)L{S3@fHp
zGX}_Zb&CLRkNR)-m%r;}z_zgxFu9YGC1m;Kx#L^sV9jMyaP`6z-?;Axh6W4@<L6-I
zs?}JraskY<*?=MWB}oC*RS8OIg82*Qp`pIpeaA|Qj)4T{FPMX#J%@0jy%Q-6I!~WK
z^SVVif6ig--hU8pzI_}UF1sA<qfTS~*daW7q!Xq!@!!*8OyW~ZxFzd<e^4Y#-r{8&
zMsF{kt@05EtxSQSp9~^TUf|V^3ATHkZTL2aUY%wA#cc{`H}p50JP(ONO3bNZ#BqlW
z#{l$i*#}$p;q4d$3LH+Z#^|;=9lZbPz?1sK=qtQsp223qMjmHC6l>OMTnJ+s3vwmy
z09Ci1jK2!GW5%}#;|KYPA{Glm5;pqOJ};Ie?|7(5K2@#r?Q5fKEATVA2o3!<^f|&%
zl4G`lv>vBy73yqbv|VXuA@l{BEC0BHl-sEDXFtLM%tmF`k5M?zi*VSv&Ktm%A0Y0|
z=Wv9bB({3!{OzOPX~Nt>Hg>o=%wrbrYR3vqItDt<puXi|tX{nus~69~iM@cSX~g6)
z25-Lh5+==?j|oi;cy0F%Y~Q;ad*1pDj`bH%HFyf$o$c6pWB^xPcLmHFhf!7T#ala%
zV%PB$8!umv3EdM>-+K(Fx&|@V@D6ff5*X8u?R!pP-9?w6vvCLuXHIapQyDL1CJsO5
zvmhSS?$HR}W$4EPjTw(Sc{{ygu0Y^UKKAIXCpB@HoDP00A-2)Mrw-;(HLT!1f4@T#
zj;M4E|0T|{dfYrFQb7IK0*^2INdFD)5D9z99M&{t9#$@#f-{{1ShH+8PQLyWPVL%?
zFMo9x3RR=Ac->{#wPP0s%Rpy34r^8{M*pNG=x=L-`@vAGzwI+vH2f02{@@EJ!=S2Q
zv17~YXr8kQT{~aK!85>l>#o2AbYrNt1=B|l<8)8S!%Qj`#x^&9Q3nwxOljAw6Def4
z9pI7)16{1D#+2zZF|HxO&mZ_cwjAyN2G8JF|0pb3Fc~|4`v&&!-;IN(dQeqUi!(>|
z;CN?0j4>z;cHu~;!OEoz(6;>*>^jwjojdlSY%LBSJ&nZ|E=P0CAod(Mjxr2-+K&Qb
zXJP4_iP*n)4|Z+ej_OHsasJG)*!<|f<BbESkrb;@>N<{n$IhTwU5%3mcA~wn4E=j@
zy0DUk59>-gnjkp%S%5H@6@kLnK>B<@W*PiOec~$faT7Q%+l1_*pJFgdt{l<8a8cos
zv=I&2FKq4;Tn21%8iO2M-pXMR`BoTRH-^qpz&KIdEM9Jzb*K;cR-Yd;3{H@|0uevA
z(8Gc5pCJ?l3%uT<;FPv<?}d2$iC*bXgtgF75^s29=h=n|HzAi!7EH#z5BZD<boHI!
zZO3>4zP$3|2N+6Oskh({DTntBfra-6nwNHA49lyn$M*2!NL+YSDHwETPFwOJW|@#I
z;RDt?<31hp?5HBqh11cP!j`g)5tDL^OI<aMSm!)aR?kWz&q;2kfw*v1!{-uU^g9nj
z$pb~D@B49Ja{5r5Q-Q8IQxB@*!T{x-)9A2`SU7VW-rn*i_U+q^W1VHpU$h)c&!3C7
z?Jr@|D_haqa~iu3cVgj^rI^t&7N?IMz>yQDuxIBUjG2BO&YRhSj$;RK=y)6U?LC0W
z^A=&+m?1p+(8D-%x*x@=8gv}lkK^3~s4iOU-@6C9cke|*>jJbkl(B8=R_xrl8(jls
z=1B<+yM?f`jM+e2<#-W@4WA=_<ZB!Y5bag$N&Ewt<$}_;h>;P^A$i>z@6&#8sgUGs
zgu}{PtAan2pz?(TS`QYAYdzP9oGlzFEc(KNSH*I!Xy(4f+D0s1z8v#rwc^y<&*8x*
ze~tcuUYt645+_c!p=`_e?JF<fOkW?i?QX;JmCMmw-Gv|g;32g44gmlf#<c)lhp_8V
zJ1l^P<{7y3;+3c$ID#KO@EAIVQZ%;A!iuFAVCwifJpJSE;r0C;ULSwUdd~a>e*Z_&
zxGLV@KbjlG+HxTS3s{??oTeElNdkg3wJA~yw<~6{Z#GWLfJxZVVPR9t-^F8rByq)v
zO_8Q4fW)7J&2HPK018RA%SP`LH#cnd3lAm%Y?=bb?aVpS%5Cq4#)};1vPiZH#C-BQ
z`+V{t*!V-K89AR_-k8L>orMvKigz?_5SHOe@jr6N+0MlVv34VU_(1o|(M#nyDy)9b
z-mfwy;h66fKh>V>ORGt-s*K5dZvO3DyTVSu?=l{OF><zxjPp)?LOx`Eww`$#M07hx
zoBzk!$#O^TEXRD}TqC1WAN{FgEzdiTI=U4~JKB|eWJC>YmOs2)E?;Ch0WgU{VvM&@
zmL~w?a!Zz1QdjgOFbU@lz~u^){jv{C_MA~FWTd_@Jdc()Ts}wq%*Wu_{WaD`-Bod8
znF^#3=dPeouEcNeAoxdY>f4OWqwj=kewXXNOEgsKD{YZ|Z!P&=!Udqse9a^U7-L=A
zEaxOi;_a4Nu4S^Av#P#vgJs_t2lmmBx!ligk$^V)l?DHaJ^m=G_Ne~|Ebjm@I&G<#
zq$Mj9!OQ3E)UFLR&LS87o>B$U%O@kfpvWaa>6~(Fo94(f5%h(8P;q^oC+Ph4xb?+h
z*f%;rFb3VuBvK`!Rg%i~D)GZ(PQ*lW>S9waQ=QDRaD-g_1h+j+fU(wu{vRoD`~_tf
z!rbE}d^>s@HJD$jWK9H%IQ6>sI)QO>_S?`ld@jIR^@q4MDibGix=Q_IqOO!A^saB1
zh@|iP0kmyBSOkHofNSJj#U$jgjJB0gaJF)f#FYMBnXvQnIVplqnV!p>^885ihcYU7
zRV>DQP8JWy+b24|RiALo`(l3nN=HmD=|^WPWa7GF1gwMiXVvv=SjTuTqQ8qXap4@X
zrW7SU*@}09wJx;tim-y}Q5MPZp@_`lU}B-`CT}pugRFj@J2bA`CKOa}C=fNOiNyRO
zL6g)K^^$y*HPpUGcC_lZb}fjP{WL;8!9%2f)&M!X2!BxA6k8O@f`hnB&F>Tqa%&=m
zGLQuv;J-1}1+}&LJv&BGys>~u3MdwfC=?P87L3WxQRNAsP$>A<tc$ZEW0n0DE9C7H
zc%u{x1(^SzwD*p;>#EL0zq$75O*%!rA9Y#1OO}gl%eGu_!QBRIgTX+!p}dqw2q8D`
z=9ioM5)u*u2_Xs9R09EHTmvq+%C;n{_uh_lbc#;jd%i!`9Br<36n?+AK|Xu0HOm;^
z7~>mb&b9X1utDbmBuN>Zb10NopuD_1{Z~<rvO>bLq{l15SAYvBB#yx<D@%etf)5+d
z1BIjvWo6;FUr8n>&PK`8E2G1E{+(Ak$7emQF3F2!Zl0I)^Im=hBf!pmxw+%Ea>zyR
zl})ug<aAsBv1!9GA){2@G)TC;ax%F$47`#cd+AuWaYxn_V$grXVA9TMsGJ0YvWs&f
zpT5k<=apacR<(swh7ZA*CU@-@f|Y1RlPEjz|CCIs80Hb3+-qb#fp(%D+u6(yONx3S
zCdE5pVUtHp!fZ-_cjNi?QLYobZO8@*I_W~%@P*aM$Q#+*dl4XW*%V@b&+|%b@v+#C
zw4O#9ukI06i?gV$%~g2`bvS($U+|Qal)UMqeVp{w1srMVBOExX(s`BSbmCMl!#UH)
z#VQDPG(Y{*FHiCr!>GjPEmE1Z&dF0bogW5kCA+n4b_SPYYY{#5mfWJqaJU@ff$~U9
z*@dxYdqCNm!bWG-fR~pdLu@3r=E95)C=oq07Q(44DHm00zDPCjyv_pV<S+9OIf=$e
z`G&Mna(NP4`C%}fE%=<xg(NutwR}M0Nf|-dGXb)e%rgcX7cQlW40=IBz0JMxoNFDD
z@O$u7G$(~Jlvh-sqM{rLeWR1*P%=4(LZJ-h<z+l3Ws0OwhC-5<Soc1iqtMjc@&&m}
zDv69LSDj$YB~K`z?F|_y)Q!c1kA4g{Z@3bdubzv2Z*M`zU^zD4|6x3E+YMN^WGZ&Q
zyB+PW34idJ-^0l*FQdIrr9w&uDr?^S;XlQs&bP4t+yKPE6Rf=X5&XfuYw-GSUq{an
zK)M%d$K%nD-j7pz_v8BaeFAsC=O(=WeRp8VqQ#govV{G`N%)&Dd<H9*FURGVUykzj
z-RK#;1|PY39yV{;iLr}sz=PM$z?Q9hF<25&$d8&PEx-pK`5<n%W<6%NB-pxZKj?=f
z7)wXks8^|orMEnYRU><`^GKU1M7$0kC;JKHnNk1=E_s)FOyC438*Y`&V>*~)6d2}`
zfOd3F)=@w8*DDR;BEqb?^dsB38WkLL!p-Oh_J1Q+THj>JKNlnJrIVr*^JE*dBOSEL
z?u3Zk_T^bWop^aCjJZVQF}g<>QeR4TGESCsxL9Sv5g!scgKX-!(M=WAwgB=`0Zd8S
zi#bg}NIHZbYW3*|=a{6qb%ZmGquWz~;G{^7Dk4ML>24Z~zZ+Q*$eiNQdp(ts&HA-Y
z)Mn?`5k}%qu`kB&W1rQ@)7nV>7}hcNCJ0m#rZ#FE6Za|>jeOc95q>V>k7zBL;TF52
zDec5(CCBJ5MzC(faga*JdAq;1{*rT~gZBx>PMw4KGsdH<{T%uR)xB^Ob4p}WSQGbh
z3`V(tWmoae;Sm;{rZ$UguKbdU2rZ2+{xe)+d`IwOIrx*ZyCz!2dCXgpJUX|*tCCjd
z9_RQ<0*MmOWfr(52kG&Y?k=^qEOb>Hes73W3=-z0d+T_7;*3~!>L0MJs2z<Df9@}^
zvg!n0-E$I2c?}-<?Ek@C>z8Bgx=Yb>_#GTMKR_e1fjxi;3$DWNKl%VJyX-Pdsq4kN
zyN;w^f+(xOeV_RY+%df$&%S+_f3bl!LdxW3x3KylNB7{HTsa8kYm7|!0QeFtot(Mu
zHq02@iogE57g5#FfUf=_jGT2PZdp`~|K~6N50=00PjL78x%lg+PGiitai}Z^m{dGQ
z?ue12$D_Qez^>&S&@}T>+<e6XjIKGIKCPPw*?Y`cvjO$z_u|;ubNK0hd<oArFU0@;
z(~bCtFa9k$dU{bg^BUmncKrR{d<(rLkAc2k^!rZq-u^jUy66BF+`I;_edkNKFce=(
z@j!VU?s)J6==kk7@U2b9P*YXFV98;`=t-C}V-gC3-PpN(2ikiE;8BK2Gv{F1xJH~k
zx)(e59YyWPW{fYJhSh7E(SB?%b{{#75(*eMZ5C!sY{uzByRrAkX%t0$$Z9N~@H2&+
zrEwuYmD>1J)K@?TTS4Vob{<(t#6@I?VSZyp(Lv#rZZ2dwL(-9s6i9ldnUOsNV;rl)
z6}v);M9SiUv@;!-uiV05F1p!E7hZD`4<ZBuHKG)Av`eNtdV{@^J;uRPA*wFT2=8(%
zi@u6nA-c$|17c4rlT*H;3-+M7vpTU3iZEGro=eTiVymKpFjX?a=%_KLBxNr@k}qkj
zbrPZ-CczY0Y}v0QQ7UK4=<Aj2Vqd{t##e<3^6i>;$;vFib>h`}b8<_27=KbR;S@G{
z;SqDku!}Q=0gt2SaLR|+4OD!4@d1so)=k?jJ{~9CJiO>Q^DFZsn1?Ewr{dP@*P*Rr
z5L24U@#AMUp~wl4t6PPu(l<L@iWmo^*L6qwmy#P{1+jtELtUPWcv0gf{GF2@l#4hg
z`5wH$(BqI?5WGQgXZ_LcIa=S<_z1rVmcl!lg~`6rm{3J;;51H2E}#$=2Yfk|_?cDG
zwT&JuV>W=tbMc8FWEs}nav$c8ZN%Bens7WpQ*!~|{a=5Jy}cfNy%$6W>gF7Lv54#M
zy$AdM^RMxfqxJZ^KYc%*efDJ>>nmW|;;S&Hr5>YvgJ8z?@!@i+A|NIeC^8GW%ZkUP
zD<`Ep^>6@sI?iG2oE2EUU>1_z4qO--!lc&uIDc?6j-Bhk)=fJxZQhLVlUflN>Y=I8
znar!_)DHaPzkCPhdWR$koCm5$<N9?o@XWKXV5sEL+j9Zk7kV)?G>Go*ZuIsIz{8=w
zWfE4dT7@;MS7K~^1>8^<9((L5-1VtHMq&4Jczxd)nM~pp)eYk@XKWEKyuJ(P&$r{m
zsWudSg7MR4VM5DDjG1*QKKi~JP@MoX*4%=R-+w)t8%AQz{5hyCb13;LEL^?_6Q^B*
zM?dxuCe)N+-2C<U{YUP^xKU$p$3u@`>Ew|zCx~2SM>O`cnAq$}SGSRiE9Q8dWOI^N
zy1FC>l&Zv=@yXu<xl%zBd?{;hN*d`nRHx<@qr4jP%1`N72vkp4De~q%goU#{rV}2|
zr~^O7I{q6aWI8ESrUPHv=meyXxtwdbj4`YZI&1{rAURLvO<%hbr6etLP>%I%5Nefu
zrG7w{)lywefvrf+UfQQ5r3&B(Sr*^8yA(r2;!4agzvNfaUJ_egGEskN7F7UbY!}+5
z`idzsivlGxCp!RMbDYl^pZ&!fKJ-wNK^I4&uhzwTsGk-RUiUu!Eb*d!g9|I&R<Vci
zcMSv`(x=Hcyx^yZ!xXSQIm=)%vV{dBW-t(6CVp|oCsL7ZT)>NbtdsXZlAx@t3>B4i
zm^p7T7R{Q30zHO{Imy|Pm47e~LJA1>MZnByiVH0p=K#@B^9$k0e(n{15H4nU;i5J=
z-kieK@{<WlY73n^v>W8Nk;76?q{W1eIn|VX(lHKFB<ne7cZ>=^N8uihNLv@pC8LM`
zag0QB#|c5X3S0^AKK}#!$5Wg6k!FBLZ%+YteeBct<4-+;Y0cH#N$cpG!<N_HLhFjP
zxaRVeII;6hoa=WetDA^hubz&dJ^5<*vV-RCRNln?%0eVu2p;EEFc3|~&Jj{5oX6qU
ze}*qU_7Y|;zZ#$W^FPPR$qgtkuRw8d5Z(bpg99j(S857SuaFRQV#_vx_YQp>Z8+1}
zYg+nJ2{V^pjhYj0VC%82)L^;ur1#lXR#l?1vNF9-&jCY2L#V8-#$aDx`i+u^M7#%*
zLM6&Efc|2M^#=e~sKnG6voUkZ1k9Vi0JRkfF1zMZy!?~z<EOv)IUfJ{uhBW^;gTV2
zdFeU);D<lP;l4VIZm7YEb?Z=UJBTC4j^j*k9ab%#W>+kVAo;pHCynr4CoX0*WP+a!
z>?N_9dnf{V8RF#C869!l-jPf+j^iPWJQ_p7{=d)M001BWNkl<ZRaq_;5TX3ZJ+5`6
zxN>Sx-btbJD)@-TJhvk?RU-E)IYvoZzbE7<846kRbZTm!KaNVmX(G$<SY=-y^)&|>
z3-Gum!bwJD1WE#$2W-JD_y}$aO<feI!h(EP?BrNK&!B?)v9J0ZIh8&zzURWDVJud*
zzzE}nOh<O7Z;m<E*US9MRw9RePyh1#Li&P?pzI~%cx1=t`Ymcpa*FrDd`_X#4k1`t
zUbXxIvZt56aly?pit`!SX~ARgQF8;SUa4*tJ6X&q480vo<dI-8?UdxHhsc189~D#F
z*v19PNNt>R80<KK*S8(V*p>#2p1BlPubzv(3*9K{TW44w%}HS~7GcK4hT-dB{3yzU
zfXWA|9pOqCIb&~e1$yfMdDuCPtJ-K8WM?mGZ*&(m;w#3~R_hkCTR>iorEHwrCoiMh
z_9{Gc=M(V-7~R*7BXR470FOX$znkBuHa~mvIC_d|hM``3^KU+nzxuns$4|F;eB%BM
zNKD5N>_|{sQw3L6j>@t!R8&=?JSoH4n{LOMSD(PKUNx_X0{}7}r6Lwut1Cq%lW6LM
zQ6>(J6AWU@E6?Je{_d~v*y|UtZpmb{pFN4%#xW=_6)|G;SahB~#g_|-Q1q%PcBr&r
zw31h%a#AgC#Kx;G!Sm0&gn{5xc|!pn=Z@~elTSQ>pa0@X>~HIVPwH^reK+DqU;10r
zFTEb~CX7gp6WOf4=M2sbHega?Rd{3xP*&N3yKh^E=YH~S{L5ocpnH%4OI9<ipD&`f
zZ!mq7O8UeTz<U%uP+M1x1Dl@4GjAO>D;k}=q>^_kh*g25RvDmk87RrSvz0xqkfgAX
zA|(bh-oeo4os;!XMuvbZuZFzHP5*Ra@v2==APgfuL-c7daC!wx0Tb3~R0IIX5Gb;j
zQJtDscB}<P(l<>8dPhi+aUfu=XWCr@hs=xYo07D~FCs_>GFU1TY$7ne0g;&s2O>Ql
zh6`_R#Bz)u?GBR-h$Ot#Rb%JKj#RMF#C9y6pB*txj2s1OwM(FoJlgMLtVILD)t*mv
zupAWoCL1cJ4U5E}!thqUJn49G+#0zQ_ZobHPp24@&fvdh9F?6kjv|WQ5feR!!;t=E
zcl;gy<&hP?XLcW%LxL_Yi)W0Hi62$`rp<i@Yua%Xy;U5?aS`~sP*INl_9OVuUp$45
zo<5xGtH6q-m!LfHhFA2J56ho1E)t_@POuzGaT3d|7t6APmE~1p6XJ2ry)NKH#UvM*
z3~*GCxxjxuXECkDk$$?rH*FnaJL<(9?4wdVWL#Tuv28MQ@I2|?^!cfv#G)23F^>md
zabk0jU(n)0xv^c?h%U@ys^Ie;@FkD4XHMaKdpmaR+K-VfEzH?f^-UOATLJHZlCQ(H
z>*wGnk3EL(e)BPuCN0I3am`pbdny*JzZ;)?U;}2Z+=%s;j0G>4g8j;ZM#c$+oP<2Y
z(Nyr!!$=K@c#K<k74BFy87JENuzYzVp8E1B96t5}x^Mq|eDI-4v@V&5C%^h_z?GrA
zwguPUdKVU50Qx(R;HjrxK~E{0$DAw2s%vjU`Me8orEWa`<g@6lpNea4xEhlt&%!M?
zU5m$`d=}Mn*Q2>(E8aTT4l{}al7c+T1xPSz+CtoM*IgJWIUIQBH|UvqBPMok!na<2
z341GM;?CPPVBc3CNB1B-b0`grq3$#I#j{&*=OZ7(oPDQI)_V-!d*XGR?k>Z+%hzK?
z?KF&Ns7FP40WUuDB0l`y`%yi8H+-oBzxw$z>9g~K97&R(kPP9C7k`7rx2;6W$SxGB
z>Tr6`CUzPVsGhx?gPqC}@nuyElfODU7UxCqQ0jo5Op*!(HV1=xK&$c=o;!6RO^jqq
z8eLy*qssttDOc;j@&x~s6(Ts+IcG4Di+)NnaNVE@UHi4F+-WB$4I0(X<=BSiP>EP4
zCDRYlOD9++S(N~(s5JI=qIb@VixieZ_0+|g%j!fWGLyL;N6Qt6OD8De3znS5bh=<x
zWw0Sg>!XgJc<mA2{LcPEJ{ixeX36d>dUMVNM@h}#)aY`liodzfe3;!KkBcrES9TP=
zl0e5wj!&@Nf*us7QA}{6=kY_2oNW;!ugfnCY`d%v@*%S0F#e}zHFAo-IpaGTi#P-1
zVx$AysdmZ_Xc5JQJpjHxGUIFFHI&|pecjl;bu(PeXm~gjOF(r+xh;-3d?-dFRz*yW
zBTWeAB8^Q}dvGC=E&S8C7?!h!^~n$cdN}<d@3{6o+cBsAaPQ^K@N+BQQ`{hU1~i>U
zRke+eBhDjM4`B>QF{b!fctVS*V~zS3Fn7(BxPJ9qOs}lQjeF1Ixp&&}fsfpS!L#Qu
zWAREn^NTMbJTeX&KJZzr8hi$S_LV0whzoe*-Lts+f%`G9tr8=;58-6nS^WFo{yEAM
zhp{W~!`QXw@yedlO0(dnt+Qr%A*Dc?S}lI203FF88<Q<P%Bn|V_N?g`SzC$oCl6ug
zf#Vn|c{GllhB>X{&~^GScJ4ohL05?dix#7~t^yvwV9y!6wP`2%bDVX;%te?!wjrJ0
z_hHK$@1VbCG#1ZmMR~!&_jlv1x3}SnyB|j9>yP8bou^U*`7|sG6%Dv#_84s6wim^c
zM_K(CtXMb;<%QHKPVIjeJv9@6w*5HTegUqs0rRGh!~WfS&@)&vg#`d4WtEsRV>ZS&
z*P`vnKI}Pi26ZiyFn7iTbe%eiQdKRE?AwD2g9#>0pM^=I8gTB!LF_$v0wX3)M@{b;
z968&I)|stnKe7+){T^c`wxV_7DD-!o!`?mNso)GQVL~eadkpMoBBQLRz4`({bRKbW
zrwcdFH`=D0<HL7g9IqzgF({eR_`I_5_Wq~10L}iDq?}jKWn>O=kwfMs*{s`A+6y!5
zB2{6W!HfQ@iV7y?w4so+BEDQ~V(lZWL|-uGsb8mExMx)Sg6*w1IAiBXFIq7>B`XzP
zTED1|90cR0{X^T7Wg<M(cqZXS@(K>#%%jj(_K`)Ya%G;4U4;)g0T@1eoB>tmkkdw)
zQYZBEmKCL~b*o6IEC$j<-IHTt!wH@`&Xq64dJ|o17vz{BX=BbXcqqZ?%W<44yb1-e
zYou&0tTK1rTycCt`KRamm`piK3xEvhX_3o|Id8m?dP~e9LCb_`XdPFN1G{(OOlL34
z6Whu3%kwjX9qcjfi>gRwKBV~cmgBO1po=&f(%M3HIZbtv;Sl{YF-;tlw`qX10o#*t
z=C^5E_DwyHEbr}nUVex^IImt9+64xrX362b{M@IoiFh#H$<1<;hxY|cnzI1Y$27ou
z;KG@M*tGi;W-nNXQFRsQI(rP;b{)b{DIIt8)OlzuoyP9NZSVlfYer(;yg8^ZFXG_t
z9XNWnn@6u|oP=4;gV?<HBpf2QYMnXD+iFy_7vfDnaq@~lA|*9Q_N9Zt9L|y%V4O1r
zXPAaL^AySo@WmoZ+AmFj4m+)xKhuZF!zK>ZAeVG<M7*fZ`CWv0B|zjuTwE{(g=onp
z+7JlCB}-CMC#NvEb9^AQ)da8lF-(ChX?luaMI2^Fj)#qQ<Y*2N(G&u@U}1g20ll!|
z8TI2nxIN_}AN5h$IU+nwxF!kkLohAIdI!<Q98E!!TJn$`K;4s?di(q~L)v-;8;uzi
zAf&6_nab=oY20;O>RhgJtTr2~WxHjI8)w_awG{Mwz)nU@xNo*FF%!~H?`h`K$&vgk
zD8|<Ff){-&&P8Ja;N31UiW1pWRIq-*zH1jg(m3`<@*&4ABo2KM!>Gh1*tn2@wasYZ
zpb9j&Ff18O%bkuJFT%9nnTM+e-zQ&oUZPuSYu<ZTen>lxwrDqaOJjKx7fDf3BsPi`
zNBYI`gY^+}I_5(1$Mh)<)JFM&Sro%y!9JiS<(!=1I8UM|6Wd&>ea2o64m>PL6Ncx9
z`7HhlrBBQmcGp|i8*F2;3oQn+qzwr=n=7&}%dVtje2#fe<s>6Zl&9?cSv-E1^e;YR
z7dm<Uvqd_IV*QlA@4$`d!HPcU0ct@qg#S4wpPfC=g1N9$;UtJ<3p$J(j`2A|vZQqi
zcDBFm>j0FB#f+W3nFm3x;s1GMIFeCx^I5q?Ya^pJ2LJUrI*X>P2h@0!VD<RYR^2lZ
z?B#>)d?bTlCNjk&#d$&zuf1_x`VqFa7>oTN^dVo;1PS6<i<c&Pwux%biHC|CPAVJ~
zQ4$o%sjI<2Yo`;X8e5WtB0p8QN$}@$Or{u95lcFVpJXwK!jNYrG8Y{5<)o-kS41Yc
z&WUenYE?<t$m?}$8Ix_Rf{){kg|}_!6#Fyeukfgf9fPsx9JgUSBK%DJk(it-q^}fz
zVjCXBheSj3v8$z*i|I3~tZYKbR#=q$T*;e5<nWMKfbtL0J=j(j8&JnQ>lkCjR?%Ee
z`~cD4)IiEg%oCcoqnI;#5Qfy0xl8;>CvF`f$~4U1D)*|E<QLx9TX;9t-C{28?lai1
z4;i~iNA_DMg;pvjY3F%P!cuAlGExqCkYF^f;zNx$`4-{BI4kT;ZsVNb*__k|{GR+N
zmP_XhZ#zdu(ciT(`Ax-wtpXwM12!Q9B69U=9?C}&pB#u@&a?mN;>_eE*$K9KseBiK
zq<2BJe)%<u*n<6w7ZWzWYfPk8(7v4tk!aE)9V5TC0z)t%$28}I#j5L@F?H%xj2qpA
zvM^y;RXrw6nSxOxYtsbi%Fr@u6e^O)7K(65-TJ06sH;d=A@4oPYw9pz(qv2;Hwxv6
zI9!x(Qbo(FF@Dk%Od3B16$NL`r8k)7I3xfRD$v~Ah=PkS39|C)9x52k%R0Z-%iI$a
z=GDAl*(K*#7(*9>I*>&!8*mJ@dN1-pd`HEU-kI>+&`QL*skjkrb^Gk<R?9~u<yCtT
z(8Jsh$NDLmlwO%tG_9D`oT=EJb>pP0i<r>Rtkf0m6gYN13PsOjro3Z!d34)U_N-k9
z=SUT!M=F!Oyb`~tVDV76D$dpZWMf1U0^23ZIhuP#F5(E<+3}zPj!84vgvnFknT~65
zw>E)2rt^TpMs&$3#sSa`(2NHtF$zWmAbFrZ=e;+!C68jfa&oiE|FRH^?_={~6F_X{
zBc_O7<O5_QyE_uj1@1EbXk@W+NU<Rj7=O>qqq=*+RO9KK;wl{*VIIAxW`END4zZh#
zDavu2G@W3_{J`xaZd1Ad9;H$dL#1LAySg0>E4K^KXBs1eyBLabM1;GxE)}~mr*mJ`
z&e30jl?#xF2icdxe+{PxOyQKxwNx~|UFdl+CAapOzBbQt&SE)WYettdJ*1E3nY57h
zsz}pZY~zJAWEyl1&WU|>u7=Z#<LpS@dmu?r-#7|WrcT9}#yYljQeKUTlc!?B=#iGd
zHK%xw@~Zl@o*6$1Wr^%LSJsWhl&MoOwy8e1_$)LvwS2)=(cl_@9p4$WSz%!(FBmzX
zX50e&(WgF$)=3kvV&!}s+Pnqrz5$PZ<}+9^s}<K=cLh%EcpJwCN8(Ta<TE(*#<Muv
zr)9H*3^?5WiND0ev%kfzGyN$zCE)72KZ5I5UxMYAU5mL*z1Y0-Fp8Xjg>(uvEqLFf
zpTHFhr(@27CFnc0701r^qf{z}ADq>@Vjjg}38hjAE=iD#oQltU;x24|=~Z+LmXIWr
z;7TYKi|{2fK6o4=g%m>Vfudhr>rs^*(;ttkDtI5+K{zx7R2sReu(QM}eWVD96QkXM
zkn?nzBh!znHnk|AU4~ds*+qcCO;$9V#?5%(DY7!l!o>4A);pZcUk%ZH74eupGlvF9
z)yOvC`50%hi6d-Der3zpKABj9^hYGCZm$zerhv`pX=B*f%kg1wd=N>~7g{3u?PCj^
z(KF6W6d#mKoqmX>w>H!|%8nVRmWXf@e0ZJ^pXsS=;lnFtZxdav?VXKxAx+zc^d|eW
ztPb%N@f2!~7a1|#*^^lC(L841?N#CdG8{pFDMpx`6zB5poAE2k1rEl#DyGuUB)49i
z$oRTnESn>F^WbJ&q^QellPPv}mBqeIGKFK_$!D|hQatJ^uw>l^Y*;fN-Di&B{Dpx`
zya^xDP9KDKoHIJ>of&&8h)TXu&fu|-BO|Bh2vgQq+bWBh*hzR#Go<Dc{;UccO8u#S
zr2eW{2D%(HdIkQ_14^#6d60k6fjde7@wEWw)cwagKDFN}e<%5smR^yb;U*C|6YWlo
zS=~4h4?pr@ESx<98#Zo0_rAAryeq+-kNzR9UN9NUuDA{rXSQMgscxRz)9pqcxccr#
zapUTFSh{Wl7L4l08(R*dX3SiC`ZFKLys48gV|*1hy|dSRao)k9%)`sA5-x<G*I289
zNEuX<lUZzdhnXukpm5-MeCgkRjY4HLO8xy9w|FDwRUX3s{nuZ|qPstbTdrDwH-C5l
zl~q+JDDHMHAFA@os_?4}p-lq3`oy>JV$TJXPgst>`Q&wI{>5)`qE{6FJ{`1m)eTrw
zbqJsT;`h*xGPu$JlA0D=zhOPbH`U<Gp>25X#kbKjSi;yD%W(Cji%?bS!mpot3I~P~
zR5!HXs+;dd{Roeze*Oy_JllijNtfWd^{Y@-?8J-DK8?Mny10<ALF>&iP3&4nCNVG0
zoUXn?0@BGclMu735g#SMxLA;Yj;_sX=rzGo+sOVhA9;%mE};?hL9`0>@`j0x$l6PP
z7|g7Dcyb)pEj5B|?;|~OM9?qwbbn5$hOAP%kdyJEA%%b`$TO(`y!wyYa6-sgRTB{@
zm~4`^tgPx2FKvvnv;&d;#|2#xlDA2&&<hQ!J{;>P(~7O*#Lkis{!aU%_&crO^NpdV
zbC%p$Qu2Qv$wOZdQNqZD3Q2;x(55kL@f(DP2l$HuI(bK+0K5u!!S5aWi?r2L7cV&Y
zFE`i68CgQW<VU@mr6ZB%q<TTZ+-3TO16-I<v0(0JjEX(7sc;;!^P0sanZ@p4$z+NF
zbTZ8;<n->o<`RyfyhW{;JOd7Kb=8<wTspTI?;PmG<*OIs@XuZX3gsx3iqJd6x#Ke?
zmfJKhdWC~2(iFzVHwX`?@yPET6{gC+wLBWzFpsI_^xW~ix0#It&tC@79IOnM**q|(
zX>83!&^#N>tMZNbzXRnY&GB(=jd@g|%ndSmN$0-!yoSniqQXGeDSY!Qe~X@;J}kfI
z5AmMMF2P$rZNqg-D)9Rs{xYiO--5rqeFL6*_uD7|Tq=@(zxu>C@nTOm%EvFo-+%UY
zG=Bd%%-{H4^lg0-KYZl?y4%}OOd_5^_{9c&Q?x8d9JnY@B%@c~782^_adg+)Xj=Ln
zeC$IX!0NdZ;84QY39aZnu^-(-MI7CC0Hdc)LLmnjatiI(dED;c(RKbjdiw`3aoS9r
zJ-id01D<<`gBLJ!b}J6-*oN-@K@9fyqJO9eIES8&Htaue1dBF4fJ>)0A*r8?kA3(q
zbRO7>w{{(da|Hk;)HjU5z`6bC95D~KUa=4r)h+nIhu)9Uv2ECXGQk7)-i(?um7I)}
z8XHaIKEe^Ql2BEHJdiXS40*}ag&)Nwrv_9YL;*$qXA6TI=_ArLb5j!J_maRQ@Isq#
z?+^b^6O)c1E`HK~j*>r5tg(|sr!VWs&W+MRCmq!;Q$J`ka*`LFsfT`GrwfH6wdX0-
zP?Cs3f#{X4is(f*vMB=hS|TL@ndj)d%Kfqv<uW#7tckSLKfzl5kR1(7EU&lz5s|Mc
zgo0glk!yBG^+F|JV=f~qyYjk&i)-;cDVEhGE>Pd#A1Zt3y4V?8XJe0iQYp-FXA22%
zktI-i!OaO*0lEP2(Ei3grQDfxjcfI|s4xbEm7C*Lwa@G#5pK*wj(v!2pE{fMU-s`H
z13{_cgZ1O5&3Z0wIqxvSf{{1#!^#fp-+IAGaEN<){#L6Nlja-jZby5u0V@~JLQ-Cj
zORv5jtLINkHy$*`4vZ6TL^!d9$P?MoTVE2>#QUyb%-O2J`Sk{Ye91;%=8h2b*5?>g
zMS2u%F^6P)Bt8#`LyB9n7u!9Wg^MM!LX_9+#)ZsdVo>?3###7AjS1lmeN5*}JwM5%
z`P*q(!oT9+06IFmFyyK*dvYVT>^hX5^BzORArwm<lO|6Nn^uA?Wr?oy?da_v!1$>%
z(SGn<oFA;ltXb1AW!?(hdiy>2^vCbVh=OOm>2IP$ii`?*E)pw=Y&+=9*Hp2yyMBW|
z`>TJ#j$@tJ_|Ru??Se58(L@L`V4SvNc=H<r7&Gfq+<V11eCx4a;zE(G)9Ye0a^zsW
zhUN)av2r=)&T2(n!x&5)U5&;mb5Pf@8_&P=Cf<7OHS9fg9zcSwvj_0ntFPmgO?xn+
zX$0yTr()r(7Bq~XfmsuqFn)3?YAef3A|`aWpa3!$$Sw>eA#rHW7zHB-hB3*PX2~nA
zK<=x8&fB(xmG?F%Gev|Nn^%}BA($OJ)kd#|X}8WKV}giCj}x=({tEA?0E>!InO}+z
zE)cvk*cyUR$z49H9mXJl-T{mkIUE>)SY#4U|L0)UyJt)Y8WUb9Y4S(d!2<XjkE%e@
zg@>%hBnfkI0q_@J#3zHUPUTjUC?x)c3T4;MLEzM+YV@T3yrWEim7gfx0fn(~a<PFn
zBFUjw;Hi!xH`_N#^l*+JGR-E{+%EyC5{4{l%>+Ig-&-Ax>^d<~BA_oz$Y64bS^0(J
zIK3QFuTV%zAt?PNn6%8|pTtE__6J}d#GTG8yrykP{p=!1=A+A(Y(HgRNs@G)*h|{V
z9O7#tw>M*w9DIuj7dAG(#N0zRcflV@1Krqn=r}ssPN9CvBCNh-B0AdJQ7l>8I<Mjj
z!OuMuj|j(VH%r^<;zng$)sKo{6C-iq&HZh$#1Hs|_??3bJ89xCvWuteV0<^ostU6B
zMJ`9h3)@x5j`3#Ng2pl$*P2!Jv7AW_q;3^F;=-BZFpl9cfG&XbP8(s9s^K#E2>@JD
ziOb$|KPH}g2|s!L2nH@3!ZYud@X6o%2p+tC87>SI89whzDDfg9Jve&SrFj2_Dfri~
z{}=;F0hQGS?0)%I_=kV|XPj$ZiDgr3!1*owEsGJet4n5#pf{OUj;FcBo9AIw)z+c!
z>=8Wo<X#lVF2|xNEqL?DevDi>1NG&<#^jmPaO&W*=?@9RAt@A4D5T_2EENF)NTdJ&
zBuN2<LIHqBu~@{2iHq>aee3b7@BSNhpX%o7TFA};u7uq?_Tjoq7og!+ThQk!;fj5j
zzT^s29DfC0{^2IvJ7F!#3J!hUohYkYfDv_-80ZI*(hvp#luARO--?BEC=Pbx?CCb_
z-tjKldW(4bl?&+T8xnb=A|uAutdw=(<l_D+x#%%Gpubj60YaB0;`v=>9LSuYq?)}f
z5Wj@{B&DZCf)FMVaGtJ`0(3#9sy`(o(}~MsK;-VD$X2O6D#Wsdf-SbmwrbH5*#JOz
zLbggks(7?bB@@uRGG-|5oP*=}EM}U#0p%N<msLAWMEvk=A~}1HlP7Fo?W}R5Vw85^
z867rxbh(1<s>?j}a^hp6nzgHzQ3@odPqOsv|M7#Sjh{q|&&+vgm$6+$Ji}d<(lL`W
zu%b>qhm9M}uaw#22TGe4$w7XTI+u|-w||ao7d;<0V3)U?B|1o~adEHXp8ZeBZRXQD
z(y0hDEt{HN{8V%>S1vM~X8loyjCe*BtVeggh27gLao2m+WBb9gSi5!w_8of?eI=`l
zwl4ceWW<cdigWmiy94ZKS*VQ8;yAW(#;&m-&J=>6*o*v0=Nr99i|wrC@+xk*Y>f&l
z!#~_!aaBBSXcJ#L2eR>P_(<zFTplJ43O>cMYk!JzWSlEqQjVn??!@{jJ^1ohpTGbT
zl!kioldpdflO~MA$VmejKh%k0;Ow!j3(;IUf$ay|001K<Ey73McMYESkAKFlGkqwy
z687yoj@E=^51K`|6dGGvzTj9<idF>#6De5~97i0K{Ls^MrqOdR!=v}#f~Ct>Vd=DT
z{Os{3vG>qXOj&U)u3WVY3nwM`{`Y>4Q+?&Q_J*4=clu;3TDA%cXEtHSmfaZ8&J_$d
zchz;ce93goTCf~zR?NncclTi9$NmJ%nuk!)G7XDnj>Ep)yU;rb2A>?H<Mc5!FIbQ3
z)~&$G)vM5Zcry;P16N*i4Q8~?#Q0Go(06<*UfHw*l~b4Fy33Yh;o=1-o;r-<z2#WD
zY&tf*_!^S2b1}E^0$zCSZS+=7z?vnqF@C~Sv{dwA+nz)GdwMZx=~96Mzyy|(SKVtR
zC==)!f5*<o?<2=#`zXVUEJ4l?9Ckb<S+vb(a~$oMIXix4HAFMofYFz=;=)nSn@#kN
zzlCWDSgpF8_@!KGFj6B+O3utLJ1^8jlaVe=RAK71ENN4d$P}DfXCIzb#);ZzZJ~AK
za8xpicFLX_Kw}Ljq^1&{aG*1G71c#Axj!Y+Mavoa26>?DQh76`mV*||j_stdc6qi+
zXZI{WH;@=2?J3!)qn7qmTThn@8M&Moi}cEA<Gsc>@>NJdVhith44=W;+Re%?LIodl
zidmF>bR6Yk%Ge90CU$(9uAyw2Tl~VQN7h-}+Z|jHoyW{6cC_4<Gi>C<dh)`^Bf?YW
z^6>D*L3DL>qJIdOHfKI2jv9gXV|%fE-*FU4AI3dT2DwOSIFHY};GfR6;Tk{7la^~0
zPq?hlu@42%%#OE?Bd(Pl>mqt+iv&J{>ZYA4!e3fu?V<bFUs+K3$qHxe72DXpv@eT3
znW9?d)Bw2BFPeuqESjHeUN>eo{`j-MhoQ4=n7L>LMknpqaijyQuX`^xtXYCZvzzh5
zZ$5?-odW=X_4oe)Hcaisi<|bMSX_W=j{pE507*naR4T*WpZRmFY#KnJr4>tOPr#n-
zTXFP67dG5+2Nqqj0HX#E;73oqn$}0!9<8%x^FzSRN>OQU6S+XaP9Ra3I#g1Q#-=9J
zRF&bv`Lj6J)eFF*ys81CTSlO-^Bm4~_N0#(89h3^6bYa>)Ps{J&SD7G<Q_oX$QCr!
zSBK+6ICcCqsv29+P+b8BpfuQrlc&#Muo%K2BKbl^HAb~GqfqR_nYMEnC;=mzN1?Xd
z<9uf?%H1G3y8GbDDluwQ3(EWe+S<-xz$a)NQH##@4!Fu%)K!#lzOx5N%F#Tk1yyAZ
zUFXlCv!@TSqvj>AxJ}d{bri43Trl^!y(z@==B5vFc`{mP*`U72E?l}q2}BS+YSM~V
zXt@RCFmo{(fqB*XRXDE$h`YFKax*gJ7P8)uf$?ue@_AUq1rjA^D!@|0gatX4$!AVy
z#?u9FT(mG~42>j1K<r`*N1ZUqBfVTg35k_;Bzpq+Hv4b=DAmSZ?ofUh2D#Xx!^{mz
z{92c+KUv=tU9G?Zo-Wp<iB?Q~H2xTl=lEq7lf1~KM6Ks`Rvy+xVG|0<OnD#6k>ej$
z48bKY5yx?zNvK%|5Fe6?BJ1dS2uORgm{43}vC(uOe#QLfa*pT5khRBf%xvSvqq24c
znj5NcuB{DygT-9mbJQ<<$sk@_(|hK6RbcU?gn8QdBDToR9S}1d#kLq`3@RdH7l{i&
z=1k=RU^7|&%iXO~^B`xAOnh-5!}yE2@6a^WS9xOG6fXu`jja`LD|X;<;ur+LaY%kr
zD6hikF{4lszI@Qzc^0S7_n@Ys5zQlN(9>}iXFGctm+JaPRQg_Yboar-p>gzB)K{fH
z=~f!*$H|kYFj#V^YivPNT?NjaIfd@NB4bW+y4G2<ym_)e5rhtCb_y70$iCzz+sq0t
zXFTf#IY!NU4U^Qe2`6owm#9s^!AzeSN6(uZ<v9WM-@S$l#Npms?)8~f%Ww%)%dJxA
zMbybF7Gko7xmQx3(0dXWLm@9O=I$^U(k6S^;_r9TA#+?3!^Ne$$RQ?!@E?_TeiuI&
z&d($^Xzc9dx&X?TKb0vDTT{q_NGK+Cn5#2kI;AhnSefY~Cy0sD?2L+#*@Pcrou?fJ
z(|AD;Kf)tVRud04b1AupoibyVJS={vW4e|b?)BdVlxl`N&WPJxgu^pCHN%f$oaA>|
z4Ce73oXdm$qW$8Qsm-&-$}SZDf8wkQ=waLD#Op=y02NQaWA4b4DbAs(6tV3t#(9zC
zT%>7^BMyivh>Htn<}?dB4tZyAU`cdPaDj-74tjxN?(L=SUg2Q~%!{5Nn@Vmaj?%XP
zWH?`;mR$@wu?;mRK65Vj&I&VLe295nF?_%wNym0_&<h9wzv!l03~xMd!DXy`dIP2R
z;U;T7mK}=%gxYG+seNoj0^AmCs$gM&rH%0y57W+)z0b(+X$~VtIx6cJ$rtZt>W@}#
zMWDKZVTj4e^81sXaw$#!5qWvn#Umq|nKLe|oFsm&ABZx1WKwFUEa_qZ>@-0>&x<fV
z@1$YsN8ZW-3{O%y7z4V0zWO4t4x=!Sa~wM{k9y=_s?Iv)#}*qIFCKxDFJhn3=Lx7l
zda&o&|GmtW87rF;;9P82BH*|o#YWUFRW!d?Ct)K!O$@kfZ1Dij8EkZ6VRk}dAA~G4
zy*;6*H2aG6i{%fEq6H;2b5;lEO;<Um_;Xk}A`!xWh+{MD6&4x~BaWKgTwVnt>>cA4
z+eG_?!92plsf4LjwmRk&!uE5jch*{hHR&CVt>&PChUM!pfQe7<1Akg+^6=35=;JP@
z4b7KJ%dwVDg4K+TI5wRsED&KG@KHGM87AIX#;kCJ2XnqDW;C}sllxPDq#?7BM~^15
zf<>|_&I;5FDi#5`3ZZSwqEZeFvTFXxkYNZi@e{>Yq(8k6W9S+}fYI}U=jHKCokU}S
zomlD@k3v&(%NKYhK!)b07Kdt7@4RSoCIS`R0*D1noH-v$7tX`jrfQr!a~935OE7O@
zEl!;}i>4_Hv1ob|PMvH+5t;P@kd$HOf|VFwGk`Pg7bJswwSo^owTzpH@?sx9HU!?I
zrfD+fPOit9bDhwITYhq~!OgxDiWTR9vdTKlS-1rA=d_}Hup8}Ny_rHUzV=T9GHK2-
ztXw=7=st_{J^d_CUai5{L*JieNlZH`<2Bp^b#vvTMjoE;8**G!7}B&E(XMK_L0t4k
zGPBd!LV<U7O_EjfaIq0>!RHj0RMg}^Dl^2%AQ+nEFuswVjMV23a+iwZ02J(qxRmZH
zrxM<JJefNuIdM`T64NVB!A>xYE+;4@^h(x8WT1Cv(&_Z26f2u}X1r^IU@Kh;P!IAk
z4Nc@bfyA{w3N%%GC}|vz&V)^p$8n%{9peR?^<cH5oJm|dzQqk=-_w^oO_9d-qt4mb
z)i#6^8T76LBHq#y>jeTIj`SuxGV&4cX?+>1tfquBqOtf!_?)PW@c_kcE<a~?ie1iD
zYkb5<GH2|gIZ@ea!mPzuwRj$q-gdNKkXvN(pr)Od7;C4Lv~uO_wvnSjoQYh6IbKtq
zDuAIEuKGB1j72g~{5ZituCocB(XY|)%|KL;MS-VrG(N(9K<^#&_LJl|C7lB~pkU|`
z*<J^nlY9M)7U93aujn$rOUf~O@p6nQyMWW3eHb}mHZEVa1k+omVN`VqM^B$eQdWtX
z3zlKwoM}Msd31!FO*oUO3YE2(Gk-A_%$<q4!XVC`??Of0Xso?#4Q97a!}RIX(NZ~x
z6CJ?nD=x#_>C@0UeLBWB6maxZdl=8=)TRkDux!Z!Oq)0wy&dOpp??U4idxKHvI3XP
zn23Rnv*_v@3j7`T%sC`xYu(vu-u#9JA9nKB5TNvHoP7oU@FVZV*p_B2UbPaVDhpV4
z!~601|M5XIS6AV>2S15FeDqe-+IpJ{)Uv83eB_g##sB!g4e8HM1euaT0fn+ek`aKL
z-v1FSnAnJ<kRW7b)VEB*f|=t*QkM|Fr-UI4jU-8IFgi*JY44J&z;*Y16n9^<3}Yv>
zV)5LmC^&~i1x}JA3`deAC=?1DK&7Dp%v^UXuAV=EpTp;aOob!~@OqZBEGZx{5A?I?
zh2F#qc9K6+Xz5}h%$41~rZMB44>iv)MN@{8h!C|Ksnrc9;;^%mNu-A0sVLCnJD!`9
zAQc|Nyfmp&Vs~m>$3Ido@=_lzIh_<CLNY$3KvlpAnIbGYQcco+F%E>+#a>>umANSK
z=#?&CrX<pZ*vJ%3A$CYMQ>^g0$llO>=(z49z4T6aw&sp^lJ#y`z+CTy_>@BR4=u<p
zvOY+|IeT91N@+cHVMOxkn4)4!4#^&fEmXj<!!NJ#ljCe3ipe~mRamM+LB+JdlV_@B
zUu0)*NFu9uJg1&R<7KLW98VQ~h`GS=c{YK!bIE-1GQrB0*}crg#vUjX3Msd%0)t~S
zaq4)2LZbM(IE1OIHsboFQ;h|kOG2(lmD0P3u`E#XW*%#yRpW7d1fKF)pAI4>(3=u=
z5u|rOjZ_?ugfp|6wAC8{&Z%E7iTp+bp6lmoo{ZmIoaw;64HabIVwKxy4+=?@ct=+x
z&WoNNK-09P`1BwBDehP|9mSHv)Kxd(?#nMhZGAmzt1AFFw64AlpSXKHTE@@BCqMNO
zv{WW)T+z2-><nDBZUx4TpNfY+`3G1ttr0FMLw$V%>KjI2*>(5emK9R~m!P)30rd?H
zn7Q_5JaF}V72P`7c}$$O2s0*)LF<xh@W-FH7gePpEZO)V9@@AJqoyv#AAagVRHMi|
zLe?ouW7h7}#4EJ?9T<Z>z(L|LdCp>V?|unC`RViMA1cB5AuMpc*mt5I3zuJpiS8&)
zogawCkOVMd%2HGueID&i^D(ZbfWzHI)Q_Ew2kv<fMpgj(-hK{0etHM4d*A)ouzCrW
z)PDqX-+c{_{pcy6aR%Od`v%k<`!xWdR7$Y^&WBOH`{#Ii^J%Qu_<oFU`!D?b?RIRu
z^FAz|G7=Zg?8jq|{S>FWNhWd3s?jrX&7ubU!=L><_MPvCa}L!_Gw|>|tMT2he;3{5
zWALGeZo=bV{}1$zXvMvE-Hhfchy9y>gP%P829E97jCc1R$4oB)RNXWUcinRvCXT4U
zk!`=lkAC?g1}aD4_IvKZ^p+~LAKZ*@KK=}P20XNVBoEr)(%aT%C9Sp!cx4KwL%^=k
zb?6g$8XHAqLq$v8-{@s`XPhJp;s!0Bm*Pef32jI@)!sW1EAP^a$WHQU9n!({xWZ2K
zDL-)usZlX1eQ7GYlVo?1El@FmJEXr6EAz+DGiGu3jKVoL>QYfetMV{-Y$GG0d7t&G
z_<f!q*xMTPJl>3**)fGPKA)#Ak4-k_Ir9BMc01(8WFB1kvA9gIt+GA)jkA8DEoa?L
zcAxVwV?U1UV*)}vf)@@xU}$&j=?aR-q<X6NS-%pPB|e<OL`2H&?rD8v--rj%Hh?_?
z`6eydGJl@<S!^D_)P<Mhwrkg*+V$bpr+<dm4kx((jy3q{x4(z>!f3qz{*8G0yWc`b
z%@o{u=X)@wCc&X?FXP9*{4GxGdl#E`AH@n@M0iv;O~IXa--gMJRXDceHT>v_-=J$C
z{kcZt=9+(<GdSnEq2o-O$-x|c!RCe&z0YE%sU~AgjrlO~8ppP#Dy=1lcg~Ko>T9`x
z9XrGKF~K<YYmJ3d_dGg8L1lC&zvRFS1z=J+64zd{3U9ym4o0zDL+Cnx4kwQu!ronn
z0auPiE0*AupMMEIe7y}V^ZyYmrq<z?JG*#J%HkbIw&I_@ycwku&^>kmmP{Xw-|X9k
zU;gxQxQY?@)XFRIi|5}&X`m0!{^#RBQjQN?vK~+T>euP7HRR~H<@q0BYoQGFQ<r1)
zr*B40p%QD>%*7M`_E|i$ADHuZ|A6^pzKS<bhb}>;ME?+%Sv07Zu7aJaO7%5hHn0PB
zzw#>#&AbtR@jw3zAG-4z)K`=NNdfP?{Vv{f?=3j8Wh;71(Ijl21PfO!$DTJ|!j|I&
zteVpT=MvoV!H?p?+fU%jU-=ijwDmB&>&27b{#U%TWjB8Et*_zVfAlOa6oCtEd-0#Y
z+=P*1o5PSpIC#7h>n>e^%Bp%?v34E~A3TF8D{jCw(+2Qw|NPI`S)7iW)=W1fODpt-
z#_=fYJA<~KA-J$>*V}m#ef6`ke0np+&%YF7OJ{I?0J!mihcUF{Iehu6U&9NV52U~2
z8mU<-Ib40$0~pcv4!-=AZ=&v!tFU5v6Glv4inXm}_|bR1fuBCV3H_?GPUQ5LLepZX
zE`*TN>D7tPlD1yGc{`L@f#{C<9su3I^E95j$Q(3@4s0vF2RXNDq6$gQW4iEU8Ka#O
z<(MY1g#XYz3wg?GO!OFZuMz8Gz@*ENl)ST<k#;5YwAq)8M+GfSBkvV!DI(NGnAutP
zvir)iP?4IG#aVWB*orG)njJhU3R$jf!Z3Qr_u39Ksg<3MiQL)3N)fyfTt3)B!_FJa
zYU<J`cCta3j>S7Lk-KXxvI#YGk>{DDeN1tt=AeQYk^VW(WflV}aXKX%$)N<~c04)9
zfJkF^afKV~r9CMo*gA2D@YH&jT(vjaYg>w~lDhG@|L%?0`qX3ir>}np?;bvjo{p2~
ztDS}wv&LZD+!dHm-i5Ya;F>${N5!F+@lRj<8lK;@m-$+YRq_e0xb<F)>e`O4eDxbB
zpRo?B=g7nBO$p*CW+~M>Wq+MT<2UkN7o;&f={VwdIx6yb#Vb5>UxXXw@i3<4Zx8ch
zoy?mOmzoQeRagM8c+4YGo4Htj!ND0nPIsSW9<9H=*Gw>WJP}!9&Z-R<arSMzbGVDq
zXxqOP+s+JP@wyxFxlcccI#)zT=LL)%I|fxXBQSQ<D6}-!lT}D*E;K!Wx^Z)`a@+uZ
zv-JoH!EQ~H7hzWU0lay*C-{Iv<;WShbYdr7*ma&|^coBYc$DMndp?Z6_{_uD_0m%~
zH|WvcHGuJBTToNqgt5&nXlkrADa!*$9BtKx0Mt7^hKF(zJr`b1TgapL)OP&O&wUR6
z`1Qw7yZBDrzG@QS9l8!~#Y=CziPv@>jo3`hRauWU%PxU0uY(&J#Ol?{QAo-#b3zqf
zduJayyE<_EWE)BVibF#vl{|)qhA=c#1eG>JgGF6I;P|#z;U+J_j2VkDrf&~+p6$ov
zX)`ct!c5%%fd{c-S`%vO>yaP?B2^2{VQ`=qzEFh<uIfE7*o7B<yA3NYy9`&Zy#%km
z@CpV{fvMxFuxaxibab4@iIZnAL=LZBC0~OXtrO9@cr6}!@NP^QS%s>aTAV+)2`}#L
z$2|{!6!+h9IqJ$1OQ^89N&U%<(fINSukf=?LH9t$q?+9kk>k0mC2lJB^_a``0B@Ja
zl%~l#J?goT1dzdk?xm5HAL(a<I&BwOf$@_|iIRuBn!&TpdB;E{7sXQm)Fls}&ME(6
zh$30+d*NJLY&eC7#>gh#h!J$3sH6Gl)D7-z3yTBE#7DyAJh7!$zs4cB3BuyLE^6n)
z?gjVDnMaBNna{<2l%33t#!lwZyU?*eY20<p@GRV4jqSy^SSKpR*vGPC>G)webh0N-
zRj+)n$w^@JJ5D_JFdU*rAh}5l#?OhoW`G<LN$mh2qZ|1`EQ!)LoF&_7J9}t(l&<Vh
z8WoS~5zVOg=df+hF`Pep2B*(;!Vh)hx3BNO{MDD?@)dKi<<(cw=PEIIObs?~-i7m>
z9XNUN42E(P^S&Cbt&=cq!D>ACfqT%}QiICsYC35mPJG?^D>cx%rpF}lWp?hO7ybMq
z@d_OHml$*mIE9DAoQ@lkR5(Md<2l!hER+jb5Hd6KitvlN?C7lSLe*Xkr{|;sgJMnO
zqVvwkLGC8^))}5tq9--uaQn?`(B1{ioiQGxr%cD_`U)J|{5<~kTi?UizV<bAj#`D8
zO+$F?mp?-J!dvm_k3EF?QU@;d_A#EcmL#97Y?y$DK6EcO{rKOo<wQ@)F0KsAuf7`F
zpMM$yNfP9Dm~;7cIQh!c=q!?tD6T{rtIiei;!nPb|Mma=E2gZy0h2})@vHCu0Ik<N
zh);g#F8IE)=<Dm392YPz^I-saGIlVlS4cq_H8^AHbn5;VRrxiHnT+O&0i10+g`;P>
zF}k`O08qdMeE&bbh4QhB#92K{0C<nOmYJB@(2l2*X802aFz(6~s4II4`%d&@`NC;9
z_V#hq)|BDQ>2oLn=q~{y>uXR^UWUP;A$S0TgBMU<Hy#bO)#&KyLx0B!ynVC`4}IW9
z?0)VW=p8EJ*x~&+II4*6fA7~oWj%&EkHc;brWyMDi9I+~x*cnmOvg+6I#5?Wgrg_V
z;^5|&QF;64v9^B)KD(z4@C6(?*^ea)TCu<95UOj+aQ5^$6yea{JBWtnMpRc<qI97b
z2lgF8*U1<0#4Ec|Ra=YplgHsui<f`#J^Z?I3_kgPJ&e|oPvfmq-KiNoCBNXC{NTnY
z$s>Eofs7)~4jG90sprULUMiru8wD<42;1x!s2fyu0?gi<BbT#s`~NouBguf^MR8kB
z0GeREmV?@;gy$srrn1=nKI$)6tAbR=1f3ZME8Rc$8DMN9N_5s%=8~0aBMQP4Ai59X
zmcdUOo7*TB%h@53oh!jI&*z9<oCXleR~(FuVmXKF&xL#r#<~#G`s>T7ymMk}VfKCQ
zPyR1?hq0z4sCA`SMUHH|drb6e3NZchY8QqaK;MI?vC+kZ_6^7Wu6OaYUwB%q=K84Y
zQ0h~>BcsRh8e_pxkE87X$WDwboT#Jq&%77m*xPvy7m_B-ojwjd$9hoi2XO9u7Y=QG
z6@^<qhs*q4{P{PI!xsxUdb$@27tFxB?YmJ`U5<07+fW2B&^Lg(rbg6MRpLT#9}ez6
zh*H-Z_}L3PP+41x&eKN&{7(4MIm653PuX*-XeV2Fm^Kz`dkccqKj)Gy`6Pdoc-A{u
zF?QCLk^iYYLPdJoNMz6Y3;6>Dt8fHkr^eKeh!KxaGO^#QDZDJgg{SoCPyiNt@ydU_
zfaZp=7~NQpdbFUnvI5D7Q79YeLB*(6G*w){`5vHW=n%g7wXdMEb~HZp(Pq56zY_qE
zR5fAUsyW#5+Uq#qH-xh4W<2odqv+f6OZ?aNQ>dyeL*Kv<${WVxszuHC_wQ{7-SM83
z*W$*j7T{O^`>PQDc9?hRO_);LiKkxOgAxiDKV>3@y3_fps_Rfu=5X@Zc6{xthfv)(
z6(65C0y~fP0j`XB#>1n~*hJq>b&ON$FxF`_!O1Gig(AX|U;Tu+xbLppux{-dv<&XW
z_kQ*QY9`Ia(BaM4*LDF()d<X+HWF{VwiyF1QHijGvGdnr^tny=*B|^6+ji_g<J`6A
zdiQ0#w&eiU-f$}}Te%b?T?cmVKaP?Ey80_{?Z#^{YkW0!?bwHb63~C411+;J#m2P@
zaO&U=v~~92e19!AuAYP+eD`s*^%l`_@-V6<Eyac_*JA$cX=ppJ4JXd`sU#MHW2hT@
zk96Sj>u<vPbt_TRcM{w7pTt0aFUHJUgQ4Be;J-E>f%hdGId}}qHr#|ORxd?kp$j|r
z9YfIro#(r-;_4f*Y<dg!?%Ix>dyZoE$}4c?+LdS>HxdVSZ^z(>8MyD>JF#y0B{;S9
zCH#8RE(|E2Re4dZuyn|%WN64U=e~#V-UI4#TAaL9(a0;VoO59!`YU5X1y~@dDB;Eh
z3AM>43&;ACxVHEdKy*LPJB&_^Z86e7(!DGiD?JA3sf*;en6j6Ua&V-XEYu=n1hBr>
zi(McpVzcu*qYKGm3b%{spx@hC)T!fk(V)pR#a<S8WJAp#glERqu`khoy-h~cR>oHg
zl}J~?T#{<;yb<2%sJ8H9|BI6>%N_pd6_&<RB~xwRth`ElFaD#8BWrj2UKZRozWA>)
ztjzC0ztj=i-xOttoJQvX$XHqbVS%~$)HWdR&&NaSU~QH55B}KSjbrU4Tz$h0SiN)}
zhRz+s!ISM6?CZtoS<8_ec?nOyu^---aQw&#%)fjiu3WPeEtP%Px#uv704{WPW6_lx
zv0~O(?Ay5wyZ0T%^re^K%1c*b#`q>2-n#?sJ?SIqomqT(_9Gh;v8|Ef9L1>=aMsqA
zE5*NQd=tZ(cU@k8vX}hh!Y{OE-yxBldgH`C=68oxd^6vWZWJ53kaUp#K`@;=BC8ut
zA*i8zLB{YTO_0d}#XcO^y9?X4Z%3zVf<OEcp51x|E3dl)58QGcmR>RyFaP8lcy(VJ
zDjQqz@Q2@zH4A59(-YspD|^qt11M{3#h-lskI?<jv)FZ}7o%pb#JxAIK~2LLtX{na
z73cS1*YOUtOk0Y@BTwP+|Js(y<AAE>+1M}#c<h(2qX@c{#bd#h@5O=<XYty*hfwkb
ztiAa@ym#YztX(!6uRZZy{C3B2)Qw+=M;^H!%jZnQv)}(WY&qU#c*r>iH+|MDuLG0~
z9*}bMUq+Zsmb=43ucS~w0-#hXq2x<&NdYL8xT;PPhf=Y`&LNi}5+neMz66K#+oQfz
zLdl13r#hfiDx%~mC=wLHH&ctn^pdJa_?D=1C>D$9kIcFRg@Qw|sP@~OLm{EQb{|eB
z5Fq{C)`aFxDlmV=)wu80)%flg{|Z}A_OLU#BthaFe2|gx2;YDz7K<!j5*)Z#N^g=)
z3Q0OgkeS?3h%~Ud`AH!XG4_6w0{I-TPUJt)ONT-511{tNop|j`;k_4<x0eUJBBkhl
z8SWchVgk%(>tlL<kfig5utaG3<J62%aX|P69nDTqMnSE6RLroAw66JB%iOY6FFR&I
zH&qSh?K0{)CB<6Y+4ed1U??Cy<vH%9yH4a{4~08PY;3Es5ezbV1I7l_RxUM=U%~jF
z^^>R%B?{#GkEl+Ci-!-HMfRtnVnem%xrV_+7P)pG+w7!p0?hF}!k?Sk1V#Ar{MxoE
zq3PW(i-F~Ytn7lfVCJn*JYMW$(KtCVk67QxKPU;Mc2O9FV>&Ka-{7#$Ip*6^sR*A`
zVD7T@xcBB&_~}1?5pN#pOq0BGa3N2mdCIdc#P>tP@U@naK#8%4Ifi_j&~*xf7%OcL
z<~0cX89QoT@@hxld($V+*2X@1cZ+e&wu$qA?4XNHxolug((q)tm|lA4Eze)Y=J60w
zZZg#?H7OuL2}KViF5xFZP+oRG_<eSSdLoslytWzlKm0zt`Teiq-P1jAiOYPex#$7s
z5+p9ICnSCx3W*DOUbIPE0tcV^okv<b1bIruB1!=}sc%XOYsLH1XU+C`TjzQHVJ<tV
zs@MXTXBV4Xhg)T-){3gKbL7q!DaR{=sJ|gL>Kge+?jd!+6)LfC`6?vs`>}cV5tJea
z)PxV{*vclja3cMb)9#}9aH7geOx!|&MbG++3#8%akp9_1Cml5IpDillm3)@s0G)92
z?d(GvZRc1AYkRQbTpW2h78^Uy58}%fkvrvMmV=W6_@2>>|7A##r&9W;JQ^2}hU|iz
zQtcoE9OMx1NDZgECc=P$E%Negk7zDVR}g0niA_1L3-1}H00I{c4)$^sB`DCawx7iZ
z3VYF&Q=1bu$QG1pIH&qBhlSLX$2ZXN<`9RPj$uUu%GNZeikBQ-$Q_>tN(<BB=GMEL
zq+O;U9ByvSJ#mop`LvOXZK5#dV`-qI2b|T}v{$%LlGnv7sZR}^15j3p1xr?-qU$i;
z*>wm-FPxJ3WfIn1%293Kyp4-oyU_fcL$i9**!oujGHn!h#~9Kp-to-OAJG!dQi(a3
zyCCp_OWe^=PAK%(5dZ)n07*naR8*`xGcN2Ge)Ms|BGQN0LOxMvtE;6fvX@sX7$?-l
z2p^-l1uq1j^k^7A19O{-cyrqUlwzlW*c--%p8Fd;Nh|M%w_S*9iLqRRr9)4jHQU1s
zBj;n>^h(E&sg(k7F5R&z$*RcvBD@bfYd+njDijKtpG)!HqrAES<3~3laX{P2qv+}@
zW&#KBC{)&A+?W;=P{g^@r_k9uU=z6xyquy>!O2Qqv;fpa3cu846jopDIx%6A-^eOE
zFIXi;oLX$CoeM%jKNOK<Bu$hpwBFv>2-$l$GOI2L07-B%g!Ki-vKw6;;I)sabGkA4
zf!SY+#sw5PD!UjPDL^yC=h!PupV-c(PsUDJf8|8TWG%Zj=EXID4PM##aHg2$0>djG
zplGE;rV^O;7b#9`|Co%;jzZu)X)EMx%%N>Za%aY;aRBqU2u-MQg0R^%@?{`yVzCXR
z>sUMX|7`q5gs)`Ki=E%8PZTR$IIx{mad?sO^Twr)dPh<3U|vja?T@lwosWmcH^osd
zSEi6g{56c+^j>@-;sI6A>0K#{2gnIYpka#|9V4>1gze+W6X`_@EAbDbgZB#Wh|hvg
z8BSs!&gXaTqjbsl0F*#$zaN{_RWN7cN<WA@!+R=5h;P&)GJYABVkfsnhCY{JH5C|{
zLND)J6hoPFy54{qPm))rn28JP;TAht-qAD`agfi>HJrU-J$<PJ?-CTkGj6>vg<42I
zYmoFL-O~F|>8f0zkg#xaB{E#)i{>vhHMe}h(<K63VTvtPa?-YO5m^hFp-_!=*WZr!
zTzdr;&K`%uyZ2$*Wq0GYm1D4N`(CuJy9@WMpNef;_F@po7=Gf~yYcz=T@GJfgZ86)
zaIU9_5zV7eUjg*?5Ar<KjT3PBr7LmG9q+@~_U+hm<Xn&osvwXcV1kXDFD*nV5EOCv
zxH=Ddhkt~X1d%3uy@MoUrv02D=~!0wAvK@g)iBA*I4mU)IYxzn5StJH7nwm92Pbk9
zp$74JJY_0_i7t5PS0|if>)M-OA&ExemmVXzr41((Z3l8pMRalwpdA1yxJ7<G#CoQL
zsc|-AI`+^ENr<n-V_Lt~b7YGHmz5_omt8EdUBbOJDI6jGoFF8yDZduz5fAFr=Q!DE
zJ18rKIh_<u%Gar|A$`o);f=_olQVG)tZx~e5aFX4nEb<w+Yn4Ku55dldF*@7F+#`1
zKcH+t?<L*o7#=Rkn6ZKl^gT*GnqS5Z5v_SADlS>BNM6G$Dzj+rVEe-Qq`a3Hi1ncm
zk7YADvwv`eWw3#g4G|t3r%@M@+Z1nx!$PrT{FcNZze(k-svCjk5j7a->qE)sV`cD(
z^>YG`j(@N`QBX&mu8UHgk91B>4=A09H;4K1f#!C@5&T*CtX3dXLkae_J;_e`ll(Dm
z)_!`UMha;aJ5gV~u*t-hDm<;PXbzHGWo2iJ^4Oe@6t5fK@gA;FjjQkY02b7BVCS*(
zD3n!U?RB@~J=a}{#k0oZ$bkds87xU`1S5EOv@W{__uO_pRxg{6q4wiA*?s{@MLn*$
z^={mF<q9NS$8qFrw<+WRAmP<A0Ddfju7)++I|t9ZH(ngciC1{<Fmc5#xMRggyz=tP
z*ng@M1$azbxEwcKyAfkXwqX5D8*$a@g{Y*8AF9v>P$)0p$kvzeo$r1h``Y@^Fl8AY
zy6;xp|KR&Dy|Id?>p61}kN@Dic;VgSC@(8W9IN2a$<;pgomMx&V1ewJ&?#93L(%RA
z;gL1IRzjRaJ2*R@A|w;YD(TVx1pV;Y!<^y2lvq{KBL!79z(_AnN*X9H_h6XBo8eX!
z+)(7?k$Fi`XS%Q)Q0LT?mi!bB<hNQRbpb@p5qdZ)zwVnBp+X#5R*j9uH%zDkS$s#Z
zv7k9b0BU?RNjQa7t}kg$^`eN^2tn8!%9dV8UG%ldK-<7Y1qb_Cj3FZ3jb1rpbGsaT
zob`RU$R7HXN)+ioZ{~Dz0<>7uGRjzFLmQ9zdMg1uGQJZIoAK<NRu_F-<hA}{`9S-T
zGuY}yf#9p;^0vPj*E<;`G&Aw%2&X8PIf2+X*E891oYh&yPTUSr+$*+$`4R@489Nop
z=Zvc)qp^7m8V4LZg}O0#@7?#}y?5S?tCmhj0?K`{PwLMWA1Q{j{uEg<lE*1|tv&2K
zDSxq!TF;C>2o9;K?IK6N^zNVTC+D+$nD;n=Sox`qk8(u3a3$fjz4YdYu1m}U-^>^E
z4h2SBz%r?7K2lg{zae>qPMj&WQ>hXxyyi|ke9vuIIcr>cBq>8pVE}Ku`U)<LS&NU~
zwh9U4_A)rh`Nh6YY<}Yvymfd0kN)1n7=Z!IzTz(2G`9+G?K*?^f8zHrs;m?hszAbo
z5hnn$13NT0f$3I4Ji%EVP&c9xWyJxsojQi!{`y6n?j3+jhOl*C8!o;2T8wDlg;U)_
z%1~Kn(Q`J8pN4@WTX3MK8WWov&@yf^TIwrO!p^H1y!@dQl_s#PLwaz$@_XVrABs0m
z8b?Wy=k<1XKw~B~jad`M5(~Zn7t&rSX9{aQoXu$b3kIOS-ki>Z$-F1+Ts$_N)Pd{C
zdDSszgVcxJOdmz9{%D4bqZ<)^R#1bxwy(J<UTJE@SM4Px%qz3X4}tI1xS8krTER2N
zY@$`Ccw0mD)dcGUp1}cp%%Y@&>Z^AHcx9?dD>RHv%>y~L5DN2?4o1G=bf-SUyd3O#
zqmk;%ZDOZ&_VdLGW$%565AjKfc^6_;=F`5ziNj#vLp=H%KX+<uJ&)GO#zCxJd@^1i
zX-HJvC3EJ$?m|Z+18Et>0BO!`5f<?}c3Fmq2MiX}S0Yiyl!4-N0P=rgjm{~KHgdBl
zc1|A0aE8`a%^Mn2)HR|p>B2j^PGanY@n~!sh4G`CP)O{!MlLmM#(%Xe(O=3r+41sy
zIxg^xT_QdBcxHGiY{Z##(U<3M@hrnF+;tzh%$_4VY#7^JNT@<n%c#c@;^QT$#v(3$
z70%|kx3Lso#fcp(V-=TRpnV^{{hu!)Nzy~b!3%iyiJ#$(x8BAZ?;b*P(?~GilAneY
zb8y=m*naRNx-Rshe_#kDUyc>47vP<@4q)lDO6=?zi}@3)tW6z|Fp28mi;^C#05VQv
z6p#vh&r6Tvm9x$G_^1CEfA&X@VoGBb;0oCO*4wz^#;dS<^9~H^zQblg0M6s&mKSht
z)GDl*REueAuEWMFmZ7G+kV{f#XM*TS#iuo}Q~yP90-|3hRbCi_e(#f4V&PO;yt3tG
z=1?*7EIyxDjqH%|(kn7P=R8!<dgXj^6npdBCm08pNH9bb0!DsVL6bN*2`ZIBawGj!
zVZ?ZX7*~!vNrX;~DZ6=MG^cRW_f`4OhGcYxJ=ObgoZCC~-izK;9MOCB9hIbm0+G)+
zHAdE63J}Vosu+XF;OtJXE-2`jpi4Uy8W+!V7&)i_46x|@MRUh;8(!vO$J;qcXWcVr
zEJx*#PSgr}E;IqVBTPjo6!*B{76qO$%hQPmQS;f|5D%!uq`jo$CFV#br&i(rA7}3!
zEZLRc_kB)>_xk0WbAFTa3<i?`%wT{4Fu)=#fCY95>~gtWl1s`|NoBjr<)Tz9XP2rZ
zla@-BXceuZELk$O6vL8S?qZP!VK6h8c{%5tW5?q^?mgk&?l-30D9r1=_k>^k{(k3o
zPWRQTVrkhW+6IO7-lGBQ$7?-{4!<ku`_g=)5yzXQo)^nz)h>aO14i~%6m|G=dA4ZK
z<SANITjdLuk0_rr6%wERm7Pl^C)Lm1!7zN-da`X$Ee!oWujOp9=DDwIKZ9R-DmWJM
zYkQZlw=}`MnIt!_o}{R%o!c)yOMPjsaVGDG#!oyAJYHw)Ud=3(KbR!MmWQ;A!k?6C
zKCrOa7MxEXI^MhEsN_i7u<ysnA0P59`?oOxJuvD=f6e&bIN)vX=giP%vEwb0Saaxq
zZE>qkdNl8`CDX*Q6`@HqW9qn~Y}`}zQ@j8&LGNcDF|}#38>OwB(iYx%ww*uvlaFj<
zw`?9D$cd}Bc=wg3nHzY>PBNR4yb#;TT#j|tvbhtbpg{f5w4ptaE;6YFpz|`%ed~We
zNV>Enw-@-s|Meg75B}lra&NDb+ZUTbO5#g{{J|gmA$Nx7&FaEm>B#Vk<ma(I*~h>C
z^E*`6S5Z=4L4H;!W7`9_AVh|#q|Zq0%7lC06G+s5zD(FrzA5WU+bJ*(iV*3xYCD)!
zB~jL+K}=I+byktZazx>|XA&qCT!h`vnm28mBZFMD{49qx$-UX)1bb^rjVDboaIf2K
z{96sup2()(Qeq~$?UjZU-ZMKwcy*|QxJ8eWyI$eBKRuz^N;?KyD;vG_<Kx4Th6_Yi
zTYFKY<ltplxPUILcEX~~4iJWJES@|$te(RM(w2RLGWiVKIN;;W5v<g-xapdbfp#U8
zY6v|}rd6<dvVxs}dKM6hO*<j*$ib4K&;RTMF;fN9VNvMc9aM8087YJJ5Nr?a^&PDP
z+XqxV-_pDG4G$Zh;I5|9I`P{ZUY@+$dbX{J^m+W_X?qb`1|FL2@s<55yOYL7yj!Kl
zUL}}qKWXN{qEYcLwZD2_{;@S-8QST2?59`=;h8tBpPJik8_LQ;?ymCJKlw{`f<=^;
z6jN4G;I=mCtSPtfP2rkJmfW_s&uQ1tMq$RaFk2;cLJJ)?Ur|0|Nx|aX$jl^qw(U5>
z_?$iCd+Vb#H2O0D4}HE)(lpX-y~&hk0)5S1;A`x&#f2G9`mxKJ&uQtuX~wowJuh9-
zU^li@%%}9Y>0+0Jto$l|=?CAX@5ld~J7eo;I|+pe1p|694GBK{<A23}{SW^U73W{1
zr8LRfPLk}z9DnxnM-=5p*xKI1H<6RXX?t>4TS8_&+oV^6kakwaR%%7ZDW1D}fwq=r
zYDz<_uJ1a(#}kP2=?5ROvXjuihoU$*aLYvsc9$pl%b)y&{#8L@`aV5t5xVLMGx?w%
zQq!cd-VS<p_Upd2Kuk5)fz&}Ci%V|;Ya5v}tjEGUpy*ssV;OKst6ta+ETt7c($5ok
zc-O7)>BsAm_Sp8k_I>H=Wmdj62s%O9lXPKyLBOl#G@5Lh)qA}nm6n{``TUCqzbN#|
zU(aLeSLtm%B<m#6RKcWA_bQ>vz=KbYwk*w=DVxs&j2FKzcws|&G%QI2f9a>y|MCWS
zzb!d#kiOc_Jiix!wZyF?Os`<wpq(iZmlhwA^gu0L!nD6w(|U^z(mST^<F*lYCrehI
zY&SF4`mbH#v4_mWyN@RS;Qp$Gv_%Wv1)Yla((3D8_g8^U_090>;ZZmqs{U0KXFIR3
z<>(ceA=0yhBfR*bM)LT9ZfE@5<O4n&wCMGFyKbUJ625D~+!fmPEL%~!ljhn%xSot|
z@!2o_ijN;okd@lxvj?Lzx3!TKkVXcIQz<POYU&Ucfp!HRu(tQT{;G31Av|^>T?TBu
zDcu<vxc^%&s`%9LL`%rX)sB(2-fel9`__WTZf|Ds-ZzT&yu3CeHGk%wwVPvMk9R&x
z{`Q!bE!ovqsjvBQSek#W{Klr;^RM3y2~b|&N_%|;r8UiTG?Wtv=kd+o_|IwF?PXvs
zNK0M0?tkgkU*nfwK1UFVlp#)Cewy<qk8$?=ISS+sd+``w-1(Zb7mhK#l|@(19FL~<
z{E{W$0Dy1}K^VoW|D`WaR|%ePrE5R%dvAM}ik8D%yLy?`FaAA0zB|kKgHIV=+941S
z$h{Rl{PaFCgbT|4fDj-nl0$xeKB1uVLwXXy&gf@6*e<6oKFo*xONc<2y!--kBUz@m
zw9HGbmfhhs@cAkaO1a*o&L~f4<5C@Lhs<D@%2B0dG$=r^oIgXrP~qNygD?rEZZEOH
zKm|yJU!Hu_!=JD_XqwF2UIC~SO?Od=r7wW#wXFw5(v#DJl5YRwL_JkE+$2aEXSP?d
z{=~tV#|=sfPc&)1q}CT#J*#6KAFP$T4O}*_SjmrtpgNFdN)9~eXl%wL{Af_J+j-xc
zwn9e?VZhzy)n}(VMHlJ3pzYqG&m!4l)2`iVUvPO)<Ow14<2+50?8*l-ufib{Y>jUT
zcXA;uJ5#dt2-me(>%Q=OHZqWQBB`KX*|tYi_BI|GtGvE1mBKu>tCCK|i(X9Vz5+W@
z61HF4|10-*{nisa+yg(-vU3ygq~X=px+Yf|Sz6Dm#iL8ZyMk)z<JHXT<xknA!6y@F
z1=cpYu;;MHRI*V0?8EJDmC%oQw*6t*oPs~wb}Sq-{$cD{(UOuNw-@-ugGoA@O8EGr
z&)83<SJWJSEb#TZxE|if(BPR+wruf<!2AT4B^Tub_AZ9r6|u0`J}>Sm)F^DXzL29E
zx!9#^iw0m_{<Je&X@byrug8)$Y6{cNClGICOnB*3Gw5J!#VVDv_%L0KDW&1ZuD;qk
z9*zubf`qX%9f?gJ<-r~(u;(L4^U-s3<g7C#tGRTn1rf+5E55=C6+Cm}1{aPrYg#tf
z7Fb-{)`?*-r<8L~Ug2D4A^+Py{wK`t1{wI^-}2`}NuE4j&;Rki{2oh*pf#TxomxA)
z%#H}k%@xr;@bxDwuJNf=uM$2uzb-7Lbbk3d5J0AqU_D|hnM&zjM%MESkTONZ(Hs2e
zm9wm@t@G3W`w!`v+co_XNhHqis1($8^VZAPsj4dF&cFK?{7vtyOLMwnZ64CLkF+0}
zz-6Rh_eU#QB{}H>Nnc;@XUDiCX$CC}1!;lXj;2QVUgF)`b_GHO*tGuont^+r@T4Vs
z!qdl3;X%zkFBNONewg`Xx3OchzGTh+w;AuC!87%>beQ2LCSiEoq<`D{{&txe_sZMf
zCR6_mLVFy2X-fOs^69p<e(}|d94-w98iko#B0bNIv}y2Io0ZU57U;J~(&XSZa9?lZ
z1zzb|38Sy7n^_w&1Mj5&ew)9eS0I^h*z{W>cSuX`<M$E$yJ82(!8g3;w>#BXo@%Kz
zuTC0OoB-oPnRJ=~Q#74Ajz|-H4jf1I&2Zn-7mxlj+JMSSZ1H$N%lVjcfA1?047yKm
z@C|Rv-!ql4uGuxvG-C!S&DLE;Jo4K$=`W=eTMj;}eVki*F~o1>=DI(DlKyuRqt8s^
z+5Vx?BV8`q1}&l=i=0xX&ubZ~bX)3qJx^*eqZ*I4X~T?Zw{|AhzmE&JyN>~q?laxh
zwNNk!Qj$ugkivOtl#~*gN;$t1Cj`M@kbp~{GbRK>uHE^ib>NTvf^X$4J$mpaLjJ8)
zuW0>Ak4Na9zUGcMDNKK<mY#MV@HxP~)pZZJxmSg0Vazo%U!L)8gHtOAVe_wsx5jp5
zXv^6D!^e-qbbPwWl%RUrXBbLXAEj|hlLY#)CB4dER>qd-J+kx+U;!BE{q=QQu5KSc
zmRVH|RROcyKb>~n&03Ya0#PzZO9RqzFXKYsAelcNU`57xA1^PDd8Sno4w~0a=5_G*
z%gbM_@Qvl4`v6kYY{TPYHZAt>9=iTteD+{5rH$zi00+v)PqS}c?H30Gy);{~##HYe
zIIw)evVn}YlRkrwXJ1t$(>?Xx=Zofk8qtr7>gkJL4zNoy+e#)mOUto!h_qsbA1mxR
zu!cWqVdt0A<LoE{$!Y%Km4^|REoTd&Jh=8)d3pn4nr--FjVFn>1#ti`X>!D^l4?6i
z_aXIfCfiTLd_~lOv{?GnBfHHr22r1VWj6H;oE|*4S1)#K_Ugg9<Nhns=CzYbyPA9u
zX8ZM&(U&A9_S*7QEix2q9Qg?XNU*m;4f$qPUFt>X{~jYXcK}NiicuvcH!({CJGE5{
zq*Ccym7Xm#_p@EPH`^8FzQ4aVYz3?qpY~R#ZFJiBtjMX9qdwT%o6b!mBO&ZwMv~4w
ztyxzz#?pP?=J)U*JR>Sv>}^<cU8r_a$yw1QG$NRTomP@#(PNBG(Q7%SEgxZRlUe=k
zgP~Glw&hhDDbw|)9KxDfk*~nR*p1D!q04h!)95&X)~DTHqPHQu`Ze-Zz-F$Qz6Pk$
zZO_=3)6bN}6+Kp4(&0(^Z6cF@SaHD>ypfr{=Jf@02F)7FnnGU|mOib9s-H>ztt3@%
zt6Q?N#?=)HO_!Hn6y4HGlg+ugE<FI}%;5z~<EviT*}AaUwZPh@%O(<!U8W_2R>j8d
zV$<fm=OaR<D@7QAr_*E6XrDba<d`EJs-0co@Q#so$;_v4LACS6aL>2cy%c!-+mB^t
z>&jvnVd>7`nQepSZH77ZV%Numk5P{VEW7udjmj{o=ZvR4gasl}nmLs(=wYmE%PdZ<
zBtzZPK7~nYl<nDAtY}y5G_D!!_8ig45YBa+&tMup^TXDUPc(Wg>w^;y1s)OT97LKn
zemBbavc8Puxh&0nMVh^vX2$i=ri7=6JU})I=e#$BubuVR!fhjabG$&sV0#!9p3@cJ
z`iXlzpbiz-=Tnpc*uhitE~WV<nO}O^V%z}~19tMHK+czq+J~9_@<>l4Nc*e}!y^Q7
zV1JJ;1wJAAS3;`4mnM4^p&Z%YN9zB)VzQO0=CtXRt_NDDJ1oMjzn-~$;lLAtUD^8$
zQ`o`W{cQV_UbU;wE^k_9rXM~XYFVN{*OH^K|N4#BaP9XY{Y^7B2x|I-zKr==I|jkD
zj7<>GRR9|_oNxBHeryA^^I4r66}q2BNJpn?ZaY}H0~mgt_O?77UeNUTAwuJds|lg_
z(VDRY2-Z^$Y&n?VuJqxFOLkzwqLqVv&|_cP&n@4v@W93+kB=%9S~|4bn5tzOn?@#z
zFONN0(G5JbnMBAtmi>A>?szf5*p>6fKEI^sO6#9af~9qt>1M`P8jkv~)yGs?d;hMA
zx#yij$9!KO9gL;)_^pj2rs)Bde*Lc{H=W!`ZLc28wP}-nA4socdGOI{m&yJ!%37G~
zkKZYhXg{{F&9*r$8=roKuzgZ``e50O^VaL*T809vkMDmw8folNB>OT!OFAmvdludv
z(oI3dHeoyA*YcHSUUgq6pAp`%btJIHk)H7k=6J>yCPqsP?&-K_dLTWt`_906S}IZ;
zT2zeo((J_?EoaYtJhmk9%1F^`<EaOwrA05+PmJgla|ZB`Qo5BZ9d4W;mF|TYv0I`Q
z#t1Y>xnSdGHiOJObR77~a7$V=J#JiePYKV^XS$oJjgMCAgLVdtVueaJESfx9dOq^g
zk}UP>y*@w4Jh!bh6Wkv6GOIJ6empF?b7<2L+5r`C3%k8%N(+FoD_9P__Lz2GO}j<E
zed<b0sDesbWk6GIA*RihNDG&kYO2zK0%~7hPvydHYgJ(sXj^~<rV3)V@x3zF;mZeR
z-azP&Fz(zo`1lhKtB(%0(x*57xGt?`yVVMK7KnJLu=+UdR1ov7nA143!Pa-rjOzzC
zA3qpC)`MDpY*#=OP<txw7CFjKeZ+dlwqMeF-=7$zL7lx|Q?k?;<cyzI@$-#wARcB)
z;4F2d^|P?WuO~B?ClByW=XMsBPdRbPV3Y7=Ep`A0^lndj05vlRV<h0$n-Eq@iz*8T
zy*^~#>B>lkIN{m3@C$^4Y{Z(!I7|j?OS2-yC4O$4v84B!!ad6u(~@`1x3Syw1%-Rx
zw+OcrAGJ8q3nU+&c>1aYM?<8+T>pa1!(WCXfIhJAkBg?4{_Er^27@!n!sy#1k$&Ek
zE!$O4Kb_jvg*k$xo7#&COD{S$=|R;v!s;XRmv#Jn+FPi$uLW!c+xF%clOkA!QoUaA
zWW|bcWmN8cwTbT%pRwc(dQ=W-@tD<5rY*cQL0A_u;)Il51HuY+mH}$TTP^i~u3y;)
zg~4_*<s4}Y_}&ZD84^=Y$zYgi{3!y=7-~@e){??4`@M%G>62-OsXf5$p+n6gd>5@s
zpOry)7Z1|9Zvl!guz7;HHIBVS4K|Rb+lc{iSGJkt>${}%VpQ%=JKT9(_PvSmWAukf
zF7&DpqX%zN=YKEUN5>uPBVbkVgaI=$j!&8_EOlwnvlDrVABd%=VqoKp@LX|+bOzA)
zV)7-c;_6QBjo-%3g~#UYc<-}ci+*i8wvF34f-v&-RDCSgRPt!w*9#mWJVvOJy0pNn
z<5ULS9pEdOk<@!MLt1^(f-jiFPGg(*K6(`QuO%;iRRN{tyFNEe<IMIqBWnxy{2kOs
zUms~t;w8w^y*j>-Nq1=8@xAxZVo%_|?(1W)TU{5vImF{5p&xryA764HT^Xk1u<4^b
z!whM^mnQM>^cT9=;PE$0MvAty<Vz(c>YBxi@lBsIVyn(-k2wwkzIl{Pt+54uePQCd
zjg1z~j_C`}IQpJPHqtYuSvVNID9#N|dhs|j|Gv1T2C&*n{Bg$2A5yd_V3oRqAy)gR
zf2&jqy<MW7sfq-VTgsUuZ3Klth&Tfe9<r9o;rNj*BH;i+BpK`LWno9sedsVnxgije
z`OyI;R(5IXJVI4bHXvD@8)JBSg_@SbG?eE7u(dMH!00T+b)B@=76Y)qwM1Y47}@1b
z9BQsY2uSR2(AzUeD!+<j9SsBn0-4;U@9REx!$q7p)J8ZcfEYs$ds$0nbM$a0xmiIV
z$;3b(^V=!9x(`vD7Y0(yj}I}iyhCIAA*zdW0Lj|iIK$H`R5f?eSmFE-<o4<e10ypO
z)wR=BSBg+H4vdjq-bi<IH30$f{SEqi2TA2sa-_YHP*9Lc>@wKX&vvkYW8H0JhXNpD
z4EOf28V_^iP#1aG4vmw8{mgGC>FDaFBrgj{u`n^r#L_km?cLOr<T^CYPcl5UL`7pK
z%~gc}?5xevKRiWYO&cBcWsV%Tm+9{xBV1a~p_Xa_LJ;5IV4!b^WNrnA+ZzeHG!FIl
zvmLsYy6OM`AOJ~3K~yN<a90~SVTZ=izJ69>LAtxUD2RlBBvZo!%x%VL@9d^DKii>k
za)gP+E$Umls4dL{B<l-P3{5Ul-q1-)brB%hSzn-kXp;Qu7P=ZM0Eq6a(APIgsI;EL
ztu+pf(M<+=2T9~qaHONjrE!npzJ9hu9)~*H$;}ENQ!&N|`&r%((%scXVU9!N%*Y@!
z>oM9oyD2YlXk45cWqe_i`qnP$6pf2h42>^P-q1;FO)((Z-B_e=V1nGLW)3w~0ub9<
zqqk>>Kye+%+UpQNGPXtkqXA+$WgP8lCL9o?l6wsI_OnSYhr2o)8fBb`fqoYEpsV{Z
zMLCYHXGaH_UfZX&^N91<IV8)|qm0h2Q`g!}U717U#?mx{WAl{Ob<kd0j0E;J7wPRE
zC#SNRBh8gaAhEyBqlbftq8g5OHXwjha);greMGZMIC-em@tIVVp+`Mz$Xt#d?sRpX
zU}B(;g<VNk_YsP6gN}cU4KTT~M@!dHDqI>@XGa;HS*5P^DD`Ez0BkN#F)%tyX?+(R
zE{*$JOZ4=O5~*nBXiF6mNX9mJc)uT6RKuyRMh9zBd-U9U#9mejr;j=~0}jrt%Lpfq
zb`uFXI5XbY!`zPK(BY#L<pdEzGB-NF#L5oMoyVvw%mQF-c7&nn6>3_K(BRUzy)sST
z$P6X*T^^k2?HeV#yon<%RRAQSoAi9$PpYt*lbwyuLM^pN@BJQj!^NCB(ncsCK*Sk*
z@QAfkHph>2krNJrNHWpi%lx*a^UxuRbG<mz)Nz=qqKJhv%TzZXrm-U5#hGdPM`kIm
z>!7{1#EXR$O>{R`X`Jcn8DcN#3%XJg%q`*gp*FID0U*Zkqh3}MSsXjurLk~wu#dT|
z6rJ7aIMdj6i0Tp-3+E>oo?51=sf(scg@v;Wj7(El(@uN6!op?x2gV4OHE^i8%HT}z
z5Q)4Bj#xM|*wfEWuz(|7ZA4t08R_j~B_8JRp-%F%U7Q)}XKph=dsjNnG_-b8Q|e;j
z!X(3!OH?*?&|F>QVBz{4{X<g}RJYLCQ0~E*a9KTvT3npj-(;X~h@E)K356<&kdjbt
zF^4<b$;onP9P96AIU3}Ug@x0@1I%v5Y3uB!EZ@SJc?V}|%PgFkSfsqZot7HM_I5WG
z=pS@&rrX8B*zPL5y(0vRYdO+ZM?gR#y2U{60P#o}M>?CmSV%61G|t4B7#Lt_U(nUn
zt+CL-nJBHDhg>X_EKQFxHn&b)TRP5^)wk1H<Jg|UncT`|4mCSi7~fl`r)Q8rQ7y+h
z>K!bMZ`1#<pJ=3vW8Ez-7Vb0L+sCHN;c$1S#=^<LeinA2tLu=HE0APvY>27VJz6@C
zXq;J@8D(U4jk?yu)R*O1I5S6ST|4cyB@Pz4KAcn0%n=u7644DFeLVn$HJt2faBwEM
zOV5KI_OnYl<zgW+%HYEu)@2UI4tF_Nh=YamI}R2WYb+dOVr7?>&Z92Q7%a4LW@~wx
z{?Qpq>N^b<ZZFZ(GeV@Y#b9B4lZW^Eh*}F(DIw&P^7J}d72C`bRtRemnIU^b_-+YA
zRz9t*ZG-~?A@-T>>18byp|QD@!blJy;;fAiF~66hrlFbI;%pEp63dfJtn5=!+emv&
zJ|LNjE;BN{LUCn1?JcDsU@5f2(C7^LW!1E`)ggpnb3roLKT4#ioVK<$0s+DP)-1!l
zgCuf`X>D`pNygV1@9AeRE05;ZR&qiCV2`<h0oLN#G&Z$RoEt`nI2)5A%<m<sX=tLZ
zGzUS7#Oe$aD|?jJG}2yI=+GEjb!e=pqrJTXBrJ#b7#f)&ue6%>wgv=(%|#d*93xU#
zPHS5mA>q(C);~l%x0vSEHX<QGBDT)dz#x0!JepeC$j=HO<t_^&L#)XN^-V35<%L1S
z*_s|>ZZ}SKT@wxExj>TS`WzF>dz4o<(B9xA8q<k&My8f2DzBlvy&5E}Oh*|So+7un
zlD4*H1hBOnV0d7RNMRYRZS8~v>~GC8J}^uyr-<g(HgZCOM0A~*p&@pIximJnQWOay
zQ@bpWjj)mmQ{U7=MSd2DI6HF_%xyzeT@y_ec|elX#sU*dyOdQo(B4=ANM=%-3{Ngm
zSYAVWdkp}qGYN)<Cdn<Xq`kcrB<!q(815e>E5FpxxV^wc{}B5*g*3OckrNgqq8rQ(
z53&<*Xe`bNB2#;;O^&jX2vXPBN>yPPh_k;i$;@Vws`_SH%5y=aU~7@_g>A~J8fb4S
z1yan)Ek-65DJZL<qrDD^WNj|R;J^gAMHRHSw}X`IZcH=OGej`2#IZ*pwzte!-yq3I
z0WB?#u9NXi=7xsYCdZ<2pN+{8mg0iihGuGtvVa7!r75O2;#Af((OTiq7~EN8bbf=<
z>IT}I968Rj&G6Vf1!dK=x7Q<)tj|jZ`W@SAZ*NCR$^P~%0}lrvw}kfgHiU#ke2vkE
zeZ;d1Xl`vIJ19scx0xO2XG2Deu4R<<$zc|vP}A5#ZBZCVIzGG_rJ}Zp_DUycPVOx-
zJhMhgRRitKPC!}=Y%@4AOMY1m?XDa*7oo3rgzUl!O=E0#p1%A2q;iV2Y-MVb;rox+
z&(3%JS_lyPO!xG-IMYU9b^sv~tc?vayC<n_Y@wzw3?fB*d4lnkJt}IOX|Hk~V>1<9
zWN3QDp|PnLgkUMS&A`aCOJgk{*jj{vz7ZmY4$cT6h;7d?_^^*;PO+nF0jcB$qhI&1
zmz770D@VjWv%P(+C6ydQ4$e#rGq;<hrlFa-k_eC@u{v#VrnbPvnH7eommMr@brxOA
zp<M<?X2>tCrmd~s#X=Yw7$du|jJCEmf&oEvYmU)ACyB$eqK80MKFuv{<c1wAoF5!y
zl?V+@EtI%exH&b-+)jd;`ey3Oa)1=cwOJ;XcPXoBpuOH%bWO+C9Gt1BrM<n<!I`Xm
zhDN5zFRh}ztr3A>Ye_IXI7+0TjMmn6LIHz?BDaVZ3k#<Q2iXnf($vyQL3R)+_gESo
zW>scW-`GOAi!<A^<IL_vsjhFLv0UNIyo)pS24|8Rj7~07SYB(ea3;pk&@_1^RT>Mo
zSAq=pkCI(rW8nf5gTw5^t!+CkM;8lSoLLzkVL2J5uCay60*A)k`3YvXBvo}yG*{#S
zDcD?Kd|{i?>UtMvBr_?4Gmh=S+H8Ws!ATcq+8iug4>Qy^LO8$F(Y1i+&LZRegQRRM
zjBPMCGQ>8yG&HwT;?lT2Im%KzKy5<{)kRrAoao{t(;G<_XYxR#5Zeok&2Lg#RZn}f
zL*pFV438~PP+CKW#+eiY{o_Q6%C*1lZO$;*Gf2R~!ub9QW4!|;JXpBJ{Lmm<GRLBE
zpN**z7Nf$&83zmF%Tr9P#i*=pqOH=w!qDy_qjT#n&NxZsg4kwgbdLPe8jXb;3oy_-
zO16cC(VaO4zV1in71QQoVKTAK$irS@S@{|ZWonn%{(ja|5gM9XDUJkz7#owr%<rYB
zZESXFOp;ugVq$fliaLXZQ~OH{&#Zc|a4ERs;7qB)LfBdo^!E&tU0`U8?=8@OuaB6j
z9=gRv(bn1J*=aXh7{<*#$EVZon~%Co>1*SG4C|}NdW%e$f}v?@%7qfCw_1==KZeHM
zHpJ?0mw2)6=oZ#g_LiFeH)~WG)*er9nWT%rb5Hkmc^3ZpykuD05v0$FIZvjxV|8+!
zVO-Tm@u0@E#j0hZ#v*+jGjBT2URu1RUXLvFUsD~{<LbXgKU$lfa<xxZ{`S_^BmObX
zPKJjTB|I~ZZRcM4{JP2{N4uY21{vj{_2cp&46XQd>7_{#Bi%~4dqI86IOE)k*7PlP
zBX6Hw9H1L~va!L#BV)^U^k#+d_j`=I+<$B_q}hNLVLDC5*1SCG-2w~WG=*t)?w)4M
z#FKBE$V`Ie_e!@SgFmGIb$r?yJ5xKqPiFEj8y63l2md&VSZU~0i~6*_X*3!3Z9VyE
zwI%MKJL4HEuPD;6Nzv-*2R=FZY#~jKN=IHBOvg!|Ev9|;@#v%3|5wvvNyTjGWzvl$
z4@=gW=RUy3EM7eLml1Ca9co6Wo0oCF9K5mBt3SIB!sgf2fz>IEN1u%xJipYBO*;tF
z6keKN{V}o9{uKqkEO2uW^W6Y5xgAFjl1aLpy<Gb94iA5s5)LJ|){ai84)z&2Si#O0
z+?-y<E7EC_?!@WZ%Vfyuy!aS9u#dE&yPrGT=tx(VH8uj1AX+WVxiz0fdLZ5kB(|gY
z1n#B%AjtE7ba{A8l9kSLrYklWN{?PVMxAE1%GgQcJ8cqS%S*iuusAY+xXqI%Qd`03
zaaN2X)#&Wt*6DImUsKOSf~~JiplBS<3rBwYQ41F3A9ldcNTTpw@CFl~uB`E#>3v9V
z`9PX2>b^FpVQBV)Nv8Xro4$#$@B3wGF6o8Y0iwtV5gDdZJQ;nO26kF{5aeXy)6dmG
z*S#a#p~KIcL}vJb(M)sr(c=HhE8ULM?euTQ*}?Rs-91oI{`oRov;5tMvIo-O#p?sw
zS)gpbWMs{4|6sL8|9<1jAc4o+%1ph#p9X@A^V?sFQXQrB{8>8)KUO~(AU&hKX`CV*
zpJfI*^>{XpTl%yxPq$HgWmv5Eh*5#d$ifqgWaj2N5kl+MUkKtr+MR*4Z2Su#r5O+N
zMn4%7XpGG5IFhCyY2(Ggv?(6cedh;&k-D?zprRUHNb^$xY9*#EL-kZMtD}9&A>D(|
zt^oYO-bqrd1G3)i$6~_Ll%nH6lQr}!C-5A?b>9g!hLueVq{W|QD4GDJOm|%2H6+`#
zY^B>RqyjsO9Vwl8rSPgzXnavLTb+!caJ##=9KKYwGR-$MC4Qb1%~l6{C1`b4GLTwc
zYE@<HMf%3YH49g|9?-RI$joi#b!292$&9NgvB$Gzrst4u0;PTz&I&rht2m!5gl7y@
zxwVvsdCZtSp+%2hj+j-OEg!Y8@Y;^jB?kDUMW1cQN(KVYc%Jy7Whh*ba^KpdE1Lc0
z@80xCHZ6(8`fP!YwoPM!8UUqF7M3|!hV5;manxXq8q<-Ju=HVXe)*8Yl6S`Sk)}G6
zKihU~kF0#$9P;*zmi7zr-jqRF{2zpJnu3gY;KOFykK9p}zdB7lhKsEyWiP^3lHF8~
z<6yaIW644XIeUEH7|O3qLsang&ShTur*!<+J%gM1GHBnUSA8tq|Jxt-U!Tu<Zu#-f
zL%Wf^;?+K&4#LDFh28jNVkK~Xrqkw5=TR?or4?zVv~;Yh$kJN)V{FCa{1%SeiJqTj
zX}2-#KSr&nBt^~R!6Oa-!c3>)inOiG^;#*dT??CbFFq?dAcV&h?KrCb_sCcg;O7(L
z;|h;d8-;Ny?rSWx5^_fjCULcG%a&OHa7&2Fd@W~Fzj>tuBpjzvNt&m6V#&sKI@@-g
zgv1*S6m!C5N`tm*+$Ly>w5fC;lD-=b%R1p!3O4I-q5G%9s4o~=ZM{NumE-vd1rrE7
z*>fgm^Ia3hI()#W8<=ap<VU(+1rt76!6$E>oLl{s^9sX<HMnkX`>oXN-1(H)Y`{@H
zRmDnJZOI@{Pd}OV&Y8n$tL9dHg@<ly#bi5#4$`h2D@U^VRnDo@X0Oa#=Tof>&(1u$
z-r^h2<{{InR#<b{fbQQOv6E}Fw)4IuUfmnulb(5$4D2@km+*|@o5u|T-i*gDe>)hN
zxpiBubF;#>Y4ij|VeCjjfqfQNc;>Wd5k3<Cf4mHq`I8)lAHr(qla=sa^Y?S_OR4)S
zOi~!)56GFWrRz@^zj$oIqW*d_0)Go(B^PPdU<WkcqBzsb_FX-m$7j+p)2nMAFT&SP
z-Ln%(YmuI2BZ@Z_sQkCFFbMBG&o*>MK22kpNhHdyGw9gnNgBs+>B!7?<{cU@JHj`X
zrUQcuIu==a7w#b1s{HtAe@?@5A5Pd<=EGWhx6iwKWnwE9J_sxBd8ZNHacs=MmSMUa
zl%1%g(Ja1|4=9^)cvbn6trKYzWXB<&XfkbMn<APm1D8*oNt$-S(z5d3G1IBU$rccv
z#K(TwXJ%|i5kNS%kjnBB0?8P$c#?1=pMsnK`>_PUa1N!#xkUG(q|~;8?Ysu5E#<0P
zUZYjOjsSvTDk`feE-IiPzktHRLc$_RJdj0Rc7Rwc;gacaW!@VR8sk*_1t=;iM8=~e
zQ+BD@n%6@i=v9s{$l08Dg13}p<rYy@QA${(*pJ0AK&Q#o894+gE-oY)i{U(K&CsPe
zQhKyk{3?O)*sWt6;p}XJNK(=Sl3-RYg?U*-V=<7Bm0L(vMHyLv6#LPb&P7EqOi@uj
ziD*>&r!7OrUnEilC@3sKq~au!QqQey(-em+9jhRYo4iQXHhufrwh^a%?(!<_<j;V+
zUDj8Ljt~KIA`w!F1S#yk#t%G|TptLT<jnX(Fq}<R5Rysf8*QQNTnchRL}PIm5RBf`
zs!t_`Mm`o0DC%8)q=Y~yi?Z?x3UjlF?(dUKNf1E_OG+upj}YD4C#g-<uC_a~<m44m
zURp>Z7A28PA#tAVr{w0Z&<h_ZI<!ecmxN$;4waP^WCxP$#S)&j%Jrm_WJPia$Rv(<
zB-wd|<OEVg6Df}z?fJdW55)vx3->iPcyko%vor$<p{qKPf)XMkPBh_sE-0y_qO6cq
zbf1_@lL1?)BT*=lPeHa|U*U`t1S0uVS5*)RN_O{SNNH@|*}}Gx2+w;9%zc>WT@2U)
z^(A9E#+ri@!VH8?qlAD!h)7nDL^9>W8rR0~(xmSxy4A31Qi=DGYd*i#yrnG$oUv`p
z@+;Ac&+4kTjqoepUQ}ClU={|}wig1MEG1f<!e-Esr66+ii>W9pLdK)S5-Cgq;<aVR
z++^By8_V_l9$8q6Nz68(bZOKJXIr!x{3uNrB3)PENFEgx<z$H@`_Z^FkA<l!am&ie
zCLmId4-3f7D<CJBB9=^H9&=?cc2!cNe8{zJZO;x5**W>-gmC4kL~q*%8pD;aq!i@j
z7m*!E5=$iYcmk6kEBPByNQ1=|wkd2;NuRo2P*Op8X)*cv`4kowk{8Khe{YwRHYoHw
zTBt683i}MEn`=r98e^>M`W&OQWGE%UlCtvOF$QiEt~v@ATr7S2f-iOgAq0WETK?{T
z`S&<`_%OGgInARlzog>)yZrE}LVotu9Jjysceq-g;;Y_ql2S3?HC$t*Du`!tc0AWZ
z1(m$^&9}II<2FC|_H&e#*N`7uB-Zi_@1Drzlds1}VOAY7<pe6jmH_~jktee%_`84q
zM=bYz!s2$q%F5jb*m{CUN|#4dmLniRx(~Kh(4!uvtb#{b^ATQq=eyiGlFLV5^pZ47
z6x3r<RHBgzl<=MR-(+R<5$n4N_g>1SM<r?wjcJ*_U8zyku$1JPH@`>O&N!3H+isr}
zrEQmZ<7y+1`o>76BxOy9dG(#|a_eXjpM3t1g!9xkLV5Lk=iL{X>U+RW+!<HVqE|Hm
zNXiJe-+q_u^&w`~_7UJFjgBnr03}l?EX`<Mgk@)_zREFezNGWOYJ?zwp~-m#aFQSQ
ze?SQ5cdvu_6}<o6YmD5x&u&s80?s_PY;Dk$DWhves{tBE?}wgxor~2`dM1_;K;5~Q
z_};Y!KK-(vr1B+<9Mbe9CCYq+1vGlTK$vIV{B_>Bd4{9suW_g(%Kg4c8csdSfA)iK
za{SZ<+6s5L*FWu{U-i!|Yvb?!{omsF;bUAoUdb0<^;6w_o)@n_Nor$`rOiFVm&L0w
zC09ORcp=F1@BcksKGOiZtBlUBdV-fqE>g)9x8C~=au*&ky%8k<oO$C%TrXVU^RZ=5
zqG)tzDox5R%-F^zbP|Wuz9XgcNIww(QjQ;LAI9;clefOl@yIv>b2}hYbe+4-cYpmy
zR4w1*{?xjrN7;n(;ijwa@czj#zj!#Q%{H&Hjh9|~hxe~H^V1LS5!d>)WoGHsHe}T@
z<=VJbyw4t8S{y%6wqo$eu_-I;t8+r?x|Ec5UgleuYx(NYxK1p!A47WbsHJc5BCU2f
zpB>r_MtW>dI*Uf-muk`A*R^iz&LMq9m`a&*he8M~8*@D~?x{piRaI0>kkaT?={Dv5
zZ-Sn|kCbvwDG3(T^Xq@-M;xgyW_5au#jU6-AMZIUQqDq2<Bx4mxR~ORt<g31ya+@<
zZO=(#_i9WJ?wRV6PN;Qa<<&Jvk|LYi@Bbz*oNFPmwamoas$&~^0qtsEgt__Vdlak=
zF}=P|AQ0rj>p$Rf{t{n}FMAAH`KO{=r7-?Qs&p~$^zHBST=fQDj4ZigGj^u^I^ZNt
zy1FX`XP^HzNAu_DpWZYNjo0zWHbE;6yE^nFP#8I(uLU@B;|*SZ<{H2JYrjHQZ5j1d
z#SA=p$X=2(pRxJU1qNSSRxv{*o9@hb7N-*ZyMT%qAq2tF(z4%C0PWg`&dF>Lv`d!u
z!<=<lQZK*+9oOFBT3#Q&`;Y&3@=m?YQTd3eU>9ercS*H9%lX_%{@EY>1v?3oaQFe<
zV3zNYU|J%lXpy9%J3PGiIsLN%PBm`x5B{6qr)PYD(vGuS?x>)&<rv*nVMZqANQxkb
z&Ogmd&t9XkG{n@*0`a6jPZXqr`P{g1iT=+%ps?#Qr|Pnpm|LN+rh}JXeu*<jn%P{K
zW_>qCN#k*zI@?9p$%~xts%CtAlFlctbE3V2i<d7_kX&PSc@vRc$mQ#|xN+q?`Er}t
z<#o1JW|`Q|;e1mzzxcd|q%u^SXCXjlbMd+7Xv$CW@L>;I(Ig<K?KsU#FW%%-cO&bI
zGi>a}ECX`&7YK9asb|RDm}fIs%FU-vvpBuPx#wTuo3GrawYr3|k_eMylZ1*Ix%uoR
zlFMWCj?R)Iz|P7nliLx_H0AK|=MRY^2!so`^4zmj26p(mr=R@<i0mS6-hP#5E?wZ_
zr6<VVnP+lomt#*p&GCi;2710`c`Jqp=kesz&v3Z?5KlgFlK9#zOPl-TmezCY_H9nK
zHqqSP$jbC6dkIG$#-=5KtO71Q`w~}AAEL3f9l15bdOVwJPd!azQ#;o#pC`1l!0dXA
zj+2+U{roeuROB){JwrT@%h}7<xpD0xZ8gO#&dwn6>v`>4Z}H+YmnbbNB$C`<YJQD^
zs#adO{UYa&wX?f2%j(uX#r0i0bMYiChtG2HNF%e86YRxPKKl%F?9xr%dgE12wpLM6
zQOx|v2;sUTTsziCVNEATn+h2ppC*}A#N}t6<LS#6Xei5JZe|vlSHW}7Jwr>&5iXu?
zXK`|z?Rd%)<0X<{Rz7ck?_2!k@BMfD*>I9~Zk^`CkM8oqdp}_L&Y$sF-#k}u-K6)!
zpR$z*;C%^>KKE^o#vbsK&w8l4_zf8SjElE!^XRKVp1FRUdtW|820}mzwHnn25k!Ex
z?(@8M`zGCuWz0-WkO=1T{5Rj@TdzLN@=z~R(-Vwk3IH;Pr(b=8Z@zMy#<Bt`N+XO6
zj!}2|8BSG1XgG0!`p7QB(<_KjgtJe*z^$h*QW1$VF|*<UMxDTg^0@Ns3p{iA0?n1V
zOixXdQ+1T*&Q{Ymx<Gbe9j`ohnt_2SI?r6=g=epEq`i*S*(tW8397p<aQ#dxQ+;1C
zv9b>V!Qyx?$(l1%tUu(di8TU9@+#Z8{n9NioH|NQa+BGmbxJ$VbNy&Fr7cHkE)k4P
zEwaD4%-54qZk?~;uRi#SxVsPEeC$bHe&GgPwS~+~&#<3RiGTrbDJ7Aj8lJm-n=9v!
z6W&{8eszo7@@8Is<u*^8=wNeUn$>77FTDB^Pn<bPV|^=UkGC*8K1$7rYg{>VjI-y@
zQX1T4dSR8a_A^{)&SheLlap7Uqi}bINaGpae*2qTJK0WgWd$2kW328bXgYGyp|P%z
z*_m1PUHjHS&Bfi)`a`^Y`vp#SH?TT4&E{U5;>Ke<eeMulCogcmtA@$RDP7rA*q#!(
zTz&opuADnfTT2V4PaI-%ev*w?h*OttaO?VIs`8Rd&#usR<{Hmld4i+etsFjfn%w9T
z^XvPx9)FVC&tIpbCXbn!Iim3-U1zUxyrYG)7q8G>Dwx{}^YoPyEKE<63>EOyjce@9
zPdmP({ne5A;U}(h@x)QiojXl#VuSgWO+<D9PdxoRH?N+jFtE$)!WtsGfU8eEMN@MJ
zS1+F@xINEaZUb+=`%64~s)5m=Ax4LXS=x@%eDn!kzIB7n+5%>$XNksBw4c7h$<7we
zT)aYOS&)Uj9G-vSInJIqOiODwhZ~EToSdbi<s>h@aD!8a+evOMvAh|j?aWo4IMT(r
zix(*iZZo~G4#6yroPUZJp1np}O+IszvqZ_}_{C><;l>qe3IsDV3&gErqWUXDkjBI3
zdF6#?>8dYgc4~@bRuRv?{vF<W@e*s}LrhGJGcmualL(bp1jB_~yZst(+<u;xsv=4X
zvlt(prvB6wj#uW>c>F9iS-XtSt|G#DoPX+hp1bx0CE+O3^DAyq1)Zn}Km@Zn{^SjA
zJ#~)y##X}1J$yE@LT&p=zH$3`j<i&;Fg1zHYv9(kZiYu^2u4bI@%c+kjZRSAeSxbd
zo0uGUz{Jun2nc2ua`E~tp1t}6m3b*9rxplAin((0HrFqmAuqYf^y0cH<Y0)>J1%M=
zf@EgoAs^rA<@lvj{JVeh-}0Y6`kK9@ptkEAuf6;nhnmWm8J}Q3>2_6%b%7^Fcy}1=
zxmEo4T+qq6lb1Su13FRDh$zhxTidsFc0uBN(Lt?dJdb*E%At+T%}kFC6HO!-9iF1G
zwGJUuv|oOc-+HlwKl(p^%tlnJ&l^%SEmHZGVa7!O`#crGWZ(d)RFYFt_VNG#AOJ~3
zK~!Wig_J3h$%I?!3DM9{$;8MQ7he4ihsq+9cU|LGUpvph*Y~JA`wGvUs5g}}tw6QT
z4AAw&i@f#pA*QD15s@<9|KazEtc^0Zk;i-Q-X<pmg$+k||Lxmsjz6MjWR|3qbYFUz
z%PnE<4lMKPo3B!y72y1f-{Jb9BJO`Z%H?l-i{o{LX)6TFgp6#Y1mg7H>!s%KaZ2;U
z4$q+y-v0KR1eONr8=XUnfHi`uFi1oo#EDDSXe!PnkXymi*DjJ5iqU`f4g-_3JpA%g
zKDpb=UJ|0)%M6TdaqM`9^FyT>UILKGJq8|*&~)rLMcMAWi9H_P{gjXI4bs@zO+=&+
zq?qXIBdfZDhVop;#6me-x%Dbdp(Td5@_FNx>qG)!o_Y0MI&(HzkY&96`b!i=gbOyN
z;wB|@TzZ*XC(0O}-R9QoZ_!Z^flw~jU;7THDwEv3`!(zPaVk45@XK#L&3Mn(<hEVn
z)oX{5B7jKlF*Pwo*^%qK@zfDwyUTq3>F3PMPVv#tKcIJf5s_WY+u!?TO7~|NSqby4
zcV43?EGew%;N7=hCo$j8gTYCXpaYzu5t2*}+~?lFI3qoG`Nc<f*@&f(1gWX1Vt#6Z
z?q^=(d{YSoLdfVA6O%KvTzrMw=b8!Tm2>;G*O2o)Y!n^mo7WEG?mT#2$=EKx_{BqB
zeD7EI;hWd^>XQ%INfpynD%i<u=jMqjR+ACROL9DR;jE-Xv@}*Ty^!GTS1&O=w@pKB
z0kf<7oO|L7B9`RD#Q?NGOTVYN{^Uur0@CZ7K%}IJU;ou_vC#K`K;u=ufBP(nM3j4X
zK4fxnkvl*88y<|zdAA013K-+jXCE^>y}*MzAM(lFKH>zBWYgSOO#k3CuYB)Us17D+
ze&SW$f4Y&ocOP*4<?nO8K4STaG-C!r1Y+Bajf~QC`AxoYu8Ey^iW}c}ozg&phBMD`
zp&^H)5QGCMW+ukj&OgF$e)}1O5UkG*F;5*w8jG~3GFg8x9A<lEj`4*(-ul50Y0L#F
zL$oy2Fx1n>>2LfB7n|})*Onxw_7uPM{_{-rd_|z?3U54hM1!-FOsGjBy!QPc(H>sq
z?t=ly&OwMUZ~p3!DBYc6WUGW9egAdxBE>v&`4}6Ca-KSpPhRT<jy0Chc={$UovP%s
z&wIG?`gb^76`{E01ecFBBM5Tr$?LS0XR|Uh#GU*7Opo^Pv!8v;{8p6Q+T;Ajd$*Vy
zcz|fQ$m>rX!@z(^-~y3SzWx3839pW_lGVl!-nm6~KvGnHnD^ek&DKOOk49!lC^(mf
zzf>THi<iz3iw1e>i8hi&tvq?GnT`uD^48N`^gigJ<I1aCJzPu6v5S<ZwmAFLQ|v?o
z+<fLdg*8X`!Mo2fH`qhPsh4>5ayLSzXgl{D-+bm6BM-k~d|`uVJVw`rXF1tWNJ-0S
zuAgZkmWUfa(1krg=aVn-R7Vb<-JjsrTkp_RoXx45Z}a@|G9Ek};lc}Va;m8qLV4VH
z?c1EFNO1S=*K9-+?5xi4>6iDIof_eTpMA*m>JE8TUHr;-USeUWhoYm;@y(}?Adobl
zzQJ41o@DgVSB%cDl2h8s*{({$<((X^4byPqGA(7f1cI<SH_b|-m|uGDZAwDWa^?m%
zj+St*cbXU8e2c1xpzg#C-h1&h!w>H>x0N6q5Hz2;$vZcX(f8mHjptwF#;FE($HUd4
zTQte7Y~xqne~rbV9&$RK=G~hoNyT>f>WfdAn4RU$&p+Vd*gPpK3@cMeCHLsPdxycv
zIUaoW5qItn5JMnD4h^;Wj7%)?;=A9bIy*r3l~;M=i5ea}=;zc6?{KcE(35MZs$^y7
zMZWXWX@>g8sBUT?kdow<xA1E}e3!MMuUXG(=ew_8A(>2Y?&deB&ka)EeVMC=DoDx{
z%hN-w2C6ySRO-?j;LHo(=EY+r+`D_9Eg2>x0-Sx}+dSW$!&eVSdFt)&(_I$QSYmVO
ziOsIDNH@<=mCj@`Nmg+qzwv9|W})vsvi>T+`pP+78MVhXUotU*)c39YL+3Zvb&0BG
znq)Absy%vPZ9jQI^%WT+hcJKr<xo$iiAqEy@udO&dN9h(E5`|1%-Bfj5N=JT5-Xuf
zk#?odOM9k72tbGw17Cg4XP<vY?@XM^;w-vPo+m$$q^Z4=>`)emx*G{-+9mc-k<tdf
z{q7qq_uXN5VS~JqR!(-55-zEuu{@9Z7WY@fWs1>9clhjn4-X#ovl~r<+~VFBU(?&u
z$9}kooa}rqTsT4?6rru9mi&SWn(NCg`C3aQDgBQ_1tD3TpJ!t?rma9E_ZS+Rqy6MX
z4!2a1N~rTkR%ZpADd!NiRPSYym4#VWH?~-un`U}$nS_L7e3ym!MWXIGF0X72_{mg)
z<@tHGqj8gWCu1zkFVlJY2}XYUNBp9H9*JagWr>xoU5{+{w->ngRS&%bV`LSSkei!F
zS5qNB|LM>9<fBg*TiSGIbi$nD=n`ar?!#?7y7LP@`Q%gX4@{GCw|Vw9rug~KKc%<7
zmyziWTDwnBT@<3Kv6bT796CE&$&M6o<kUG%oj5{GMFoxZ<)q@f%+AcRy|u;E#3aib
zJ46Z_Ien;#a6u(a)rC~nwNT<d(P3)nD?a}00X@BatnVgtHCiXnQnI<Sz{1KVYfE!X
zOwSWdN(3p!dhhb-ojW`n+oG~8LZqOS<7duu^4KA&$|`86Eg>Z(8%tx{yVp<ez!a4=
z<ysO-C(dd(pQBy1Jo@SjKEF3Y^PxlJW;u2oi|(<zAJ-<JzVMS$=R4i%W-1kBYdhxV
zXbA>-`-#W5+0SX>=H=sTugnums;x4!>MN>fp?GhAKl|uQ{_4*^q~q*yQi%i$vy*J@
z>@hnz!Q$Gsn|w-*Qw}a9S(=?@b!(S}*(qyf9^?MUAM*K^pEI$QLup=!Gnbzvxfi3W
zyAweUN4siVH&H<AjG3KR!trzGId$?F6(yCl)D^QkKg|8*e9m^&arN?1e)`i-35K%h
zJb8{Yr;gKHRYFsf^Q49StyLCQHp~{iun+SayRU3$=j_?D9PMnOw4#xU!T^XAJ)eKb
z7hiqB2M-oG)?J&P07^;o;d4~vNUB>p$<GRN=uo@f@>h_CgiuZ~CtGv*<3Iju9^Ajn
z=U<N?gC(4(m;9%{`GgPt^v?;_byAocW@lrC$>|x#hu8Ngj6?{CUGCoblHvYGjIM=f
zZ!Fi_M-rSP9ymPD&abeww#@j%6g#m5ja_G`%7e<Lb_ya9I=h`8OLuI}ETnS^YiZ0~
z<1c>xDL?y*pOfF*L4I}+WQwt#&-v`$Bffsr&(40r*se4|X@7T}smVze=H{7LTqifD
zm=mW?k{b@v)Y?LBq>#?`I#P)}<|f9Nom*yNdWLXrK6Q<qB<Jq)^E+SgS3iG5oBON!
zLhf<*<BxdI)62ub38I^ee0+bB)924|?(7MMzW9W-eedp)!X%m6<cm-5GTi%srDP$s
zRaKlhdz4Tpo3@sE@(aspX{-P#*;}9D0~cq;X4gsVZ!<GD&(`KTlao_y?k6a(Z6mny
zh@XD)1%LfF_h~!aPA~w-eZKnSBOW~J<>A0M@nnMarCFw?=a`$BVkIgm%*myp{Rn5z
zoT8(tj>?*P3UVYexy!>Z?=vts$a*S|!ib>j<Z(vt{FG1b-sjVg?y$HEM^2t3KPy0E
zOA7_L1$4AGI57oPqHw&hw6cYq&3^v!<1hH>Uw=XC@h(UvSeTt=V|$y~=}DH?w{?-m
zyaZClS(=+=bz_@_*=c6xS4aX<?s5N<kGb>t7tHQPD9Ov_^o6qoQiAsOX0juNbhK8P
zn4x#u1ocgwY>a%yFFyN>55DL}1O(+Zhv=@2kYC$Mbzv6m-Q6U%7r8UE%abRXIRE5n
z?tb(!u@vlYu6cbp5aQIS4u0_`f5KN^Kj4#3zG6=laN*)963HN4?TutbO6h1UMV*IK
zaniv6#jV*j%=9WOZ=rO*-@(E^{eX^hCy*%>xr81_7gf}C{a4|uRRP4Dc|`RQ>Z3zP
zNWrOrDa*Jm1~hu<Qkiw|S|_d&q+n`dg6Fzg$qEk8(pt~dy&0r9%0&N{{PTY^$$$U5
zzs8yS!+bumZsjb({FT<czAaR9;LxR8pA#_2V%kec63L{M1WLBIH(6O+XQ02I)Zh>s
z^W&%^sJiUw4wcxN<M;pYKXUbjS2+IlD1-aEtS+xGJ~GJiUXn*&Zn3_fBo~P8@1wrG
z;s`nA377#Atgmeln;vIiaF*fWK4!*e9i=3b1j1R)V}ztpwkcmL-czJxfKWK($|=fU
z{o8-Z=OyL5^xhA6CA!2v{qg5WYYSQ(0!}0adAZr-1q#W}4r>vm5QKw4qj>=-nIsU*
zavz`Osw3sn7!c0awxtA;qzDoUIgjWOfgs1Py~2^mB)|WYyEsp%adeYPK{zXH1>dB}
z!-YV~1iNuTNnsw@GLO8hFuJ!i_92mM??$OEEF?R!LScTc)>kSPBc>iNBw%l6ljQ};
z;9x%k!$WMZ%uv;Qp2Nkf{LcUIuQ~PFZ*#jiplKu+a2~M&q!N3quB<XKHq6XsoJU{&
zjQPz3RZ<d*?vu0@Fj{c-Tj@AkC={}+Q<6v|T_189HXXV^bz+d;{bxVn<}d$OoC>MM
zM3R*9vCIWgLfK^;Zp`L?{=Ltb53ce0&9}%8{|Bbl1(Ee0{&=j2A09cz^7<agAh{*g
zbksz6*gJ+4amJ?>I9eXy5B~LE@cIvbo3Ccp*h!4>AOFKWo_*szhQ5Bpg&Q~N9sU<=
zCX|gh{vVI+LZq02tRMxYWh8dC)9BR4B$QuyL?uZ9;b73}ks_Y34w{P;8*A&V#3$+N
zn_*yZkj2S4GrlzEdTNhcqdDHofARZ&!u8+i=2AFFK<x9uFTUmnue?UEc#Z#SaE+|O
zL%evtl;8f*KOq13Yg~Q1Ow*c3B*@OrwzlpenMe`N4kHjGlOdjd{yL9-{6F%O;Q-C2
z&O3n_K~YHwS)nXSit^YV-E{~|CK2H*0s+TXc6YW|Tbg5Vpr6FxFdwc?JLf+==SU!x
zjI)ynQBji1bbOz1P!LZ>*-b?#&5N)SE+L$Zl1QqBjajWbSeHdbMKPhQT#E9tSlf@1
z$W0Q-%^@o*LTPa!tIB6&ieM;20D%yaz1=NVm**K87$7z<#D}Z1#8de2S(1$JlL+Kd
zl#_)lE+!a{l1Mp78r_dNSQyCstyWE}PU<89kz#9egQe6I0|R{w4h^t0JI&D-p3qGl
z0s=($cM0cJlbaQyu(XKy?vjH?$s~Jm=gVOMAs`^>|Kd|_|MvHIR<7|+|9n^{Zfd8@
zMCxoR%8Lj`Hp$Np5R2}!vA)T|;y42X(+m#vF*h-V2o#ZwN1a8P6>QZBSH)f|x=Sdx
zp1kZZg{8&B_vRcPQc3*<HdW!2S{}~laHNcjPafj0{`vpJc=!;VzxgWaymCr<7hzkQ
zyOflbk{t??3WZ2UlWcEpvOK@Qz+gXv!-K5OPdGLR+8-qli|rD~DIz~BL_ui@iJfhu
zFTZ?TCFm~!DEko*4q8Vwq$Hk{?&3s%NV2iM&f@MQ{R2}B4ED1yHKmI*_BQ?A-Zt5J
zMMT0`loS`jq9hjGW@TxS;emb<fgwKs*)mebx%=@K{QcKnVn08?KmAv8n$Bc0Nm%)?
zOtHBgr?R4i;P5)(aEL@a!P@FNiJ39_238pu=wW(v9SMYUOF7z6!J~(Rmd#6@!1;?u
z>XnG?LZpO(tPuHSWhAz@u?@}sGE&;{FljfnjMM$%r>b75JizW70O6_j*Z^z3UT<tm
zl|1#qZKSiksv02~{^ZZtsJX&#{^oCUscf3R_;Q+5GEOX>BD&Pazx;6zZ@=>j1!1*K
ztMu-IkX?P!?^U*Ez{-2ww3RYRG-|#LmrBI-w^ZV>7|E2R_oF{$CeX;`OHXp;$~EeX
z!q`;~r6hr5Z+nZGfjj)0zaHk*H(n&XJ<6YcIKuhMm$`K5GN-y42_TWl1hIHh)8i;M
zMk3`TbFo+q0#W|*$A3;i^9in8dXh^Q&QTNrM1ZZCK{kpG@!j{|p{+ck^k(Q%I3u^X
z-@nW&Z+(}W=h_k3MZECpo4o$gv($!n8Jb*ht5UX<BqU?>KOE)hcfQLz&tE1WV>+oA
z92n<`Td(ojtxM#GoNCL)!Vqg&?fmk4-=d?Y0Eu90W|;N-cE0=myL43MB7tOVmEMUc
zZ~xNwxO%LCU||FAeB%b8tYTh$`z_A2RU(8WApRfD-a5*TBhUBzWaecqCP}3dGgx4;
zWLad3CCie`3=KnXXZO85JG*ms=j_?H`}WM6?rE^yZrg2U%M5LqLAIEgN-C+CZ`Fm&
z*gujYZ<X9Vdws0BHzDGO-`_8O5s}%?;Uj06xB3n?FTIX{Cdg&8=7`DA$!7EDnI69W
zYClW1-pk#aZ=xisNmG=ru2BU0zWjoU*{gZ*?rn@4lCT!=3bNTO_WE%}5uDujB_}%L
z+^}dq3+B(Kt~5eh(>XeWqj})IdssXsiSCc00NLIS&bRy6e%I~HpER6I=Xu`$><H85
zn7Y1h!bmhl$FS&??beZV!Lj<&C#jviira2kMOj$G(DUT1YN?*fk~a*lUOb6XHH(Mt
zy_1<kf*8R#7zWv_Sw#~@&b2LEF$f^r*TCodI=JtVpYh9wZ{ggJ-_X+4&1WC(WcsSj
zY+1jYv){bOr5@8JhD=?_KR&XBxHW7!{^bWSb{U&DZX|j2Yks`Y&E*ScY3b;qxuci5
zkt4`-w2;#WW|18Y$JtdM;_v_N7u>yJD(~$0(h5pu5Vz{MC<AYG^mFvcS(a|PjZG`(
z62w5y=g3(V;A|#?K<E4SUPT`|llgP!vT)%diUZPaSH&I#Dm0%zNWOY54?gf9v#Wqm
z0tJ<e2frjbX*KP8KBq%y<oX*qcqPPx_us>|g(K)u!q_3`^#1RtUAUF|HZLdYR}hNE
znSDP}cf+mRwP`uYV3q^>PcVPYZQQ$k10kcANHl_x%aR;9lLsDri20T6e764z0tL~2
zjx+jD?tkE3CR9ebxbG7VwkKJ*U_J{LE@otT)JYbFqBwSL^mXv|d%M{5(9e13{`*+7
zU?Pgq#oM18;kE}J;_rU3jl*AjLVItPTsDVc<jLi9^jwa-VIXvav2$1P^Itqdb@mi{
zPq)!>>M((^OL*ws+o(+`&PI#&OJ`7r%;4@jH&a&>q5j}!9BhfOV8J{VEL_Z}3TbLs
z(q(^VJ)a+F<?e@n#v^wwXZPoy(~}VxG8W>ED-y_(%jM7wgKRF3p3kF;96#**h@SXJ
z7A=_1g8B2Ph^gfBIdt71m&;=4I@w%~##4JZn;Xg_Kl?db7F6)r*N4!BLO!20ciBpx
z?xwSxY*ENvIm-FAKIBbO-sv)xsW+_W=Rbb{|J8$>YG~o}_dg&wWHJlp&tt*$vnUFi
zK{S`mO0$6`V?vm;*~Nn9^M^RzUB=IT{xfb}GL%n0--j+Rb)*|o?9<6vGRW&X`CN`{
ze<#Pzcd~ZtU2MN;K8QS_V8A@5TUeLLAb=xZen@usLVothL)>@AMyg{fd%pabw%{-p
zFPP7QdGn}F_^l9a`J)xzTI-K-uqDAS|K{i1wqi6NfAWJ97jhXJOTD^Rto*>-43gu_
z>B}tMu$}eGt|x>*mvoTLWH5{z-+uTGO3hT}&zsAFd2=Za*=<P$97S;M$SyilGx(cF
zA7Jjt6zQBn%f<bCcDRS7H!NWOyah}cT8u!U?c5&vs^+0y*u&Ly-pUOO&Kx;F?VL^A
zv++h^euFRGdymmK-O0}$yr0`PEhTL9@afy{P&9rn^XJZG{+!vwd^r>#UN@Wn@;}^5
zxzF7Jid`YIiw_pYWiuJ4;@5fg1m9i=@DG3g3+~%|9XsCo+$;vzF<kNHJ1&CT-XImP
ztp0=@_@x1*v+2f4g_IGa#|rF<nltppUA$V@xWIYH^k5IHv=g~tyqL<;1YONnY3u1H
z5J`~?<Y?~bMezlxEGwe5sgbOXD=0~!g-s3?0|wvdAkh3#N)kSrT00Q}0?{NfEk|2-
zABmz;^qy9Fvj%}klB)7zjBJ|b<`yz}!)D8o0g8{3k`z51tz?Ct^0E}|&CO)-Q&CYt
z#19><Ep+ts;S0w}1Pt0bdz}<FUR;Xk?W89MrNs%l+S|zJf<#dnrKu?WJzX@nwwsBb
zq7X}!QJ#v>eB~0|{dr4&^03uZuI7(WU0sRZ)54Y39^yr%lq92MdOB!qX?He8c;qSs
zia$g}ML9-aH+ck|?H%YsLG=ZxsI0^<`e<lqG8e!Kh^0y>FN)IA*g#j>9682PrIZ&%
zXt~-zSD!ge;t$2Csw^eb*+f%!o~p`9B0(RDB4}^ALTh)QNrx7sqN)mCuA7GDP7<jk
zeVy&()DT56Kke<Ego~<~ICePUWDPe=FXrF=+wW=5tB&yrD=F{=6HJ&qfk-IE(i^Ym
zoyY!_gN<2=i<0zow39X5gx?>IQ(0bulJBRbxrKDrAXZdLX(C8_Ydij+Mr&IaLO>{<
zqN=QjbVoByZQY>yDXpj=7F6kMYoWa>jV}}>5!PsH?{Y>+*4l%DwUwZ#KFTVqi3JQA
z>MxTI#7G7OZ5_QtQ^hF#opkpbq)IDDg;m<xyYTy=y*op3Dn@%t8)`T~GLWaGvkwOl
zgzGz+KTKt11wN6X@oJ;BP+p_BypotN&(*6<WUW%OKNO=luF=%oVS$4}G*L=fDn!fG
zEA(XYj_vq@F{&$z>1b@At3NN<lItVBV2tXjGV;CcTxo8%M(HZ4(o)i`&14J;0;WBR
zH&b5KAEvsx5})2n{iP-XiDH8J9y<DTic3rAZEGT<LpV`FWl4f;UpGxnt>lI6qgZQm
z&GwR|RTM`xnp-*u2Njyzx<Sz>tE?p3(M)Gw7ElPrQdE~E>1b}mABoY@++qbRh0>}T
zl75{Fm#$d8shK+T<+*hE3V9_!RdqE=ri-qu0{xxzt5Fg`l}I8<Uwb1hU44!m!--O=
z$`Z6ST%@BfhZc-eU0I5%XJ~G2CY{N<a*?o1pixp$MIxlq+H{rfbQYodsjR9ZsARcv
zrID=Rqog=WPdZQ3uai;2kZ$MtyZ;NL8$Rd5!}WBuG@45=6ophtC5fOxcVC`tPY1nO
zbGKlstdf#MfTl~A=*i?!gHfs~%Fv7qEfS4(Kyjvbd?6~TD)8mgTxo13Z-74>BVpM%
z9$Uq=UsO^>2{P$^!XY2&yh@M^ojv`8<3*I0q{#R8(A?67KaxPnrtwFjq|<%G!amyD
zyYYn+R8|y|>29OBtqVgyG+B(6>!mxLG3ixRQWYb(^S(Pc{`T+q=+ITv!b*r~*{uJY
ze<Hf?ZN9$HLR;fiy8CU+E1|R~My9ummezLiLZisW8QtCRq58uV$Ah%DcVJi*6Mr~P
zRYeKe?sl5mIxz%95=Hp(X}Z&95~c+rBqAD_yoyg`5&jVQ-Y#-lh^mTGvOOJSgpdC2
z4%A2zU$%#?j6rd6ir)5CGP)oXOHol;M7Fnsrq&L0LlBA;QBj&g&-BpT+)BoNREMoc
z8y`b)swzv!^|aI2(rM1(C@Mw8MWn6v3wKJ|$%zo)3xuhxszl58a;c$-K)i@RzL&0k
zouc9*hsH><g!1A9*>o4pEp29{Ymm4eNt95Q3e(-4M$h)r*_%Q2hpDQnz)z0W<|exP
za-gUbl~$nix6|I;?<U3A{+i=bgSlxjUR*|LGD1&#Gc6rvj5d!&>F?>FsigzMvgh*R
z2+d7x7{X0fUC^`mRy0b>O6X~8G4U4g1*23~m7}NIX=t*sP^zp+JPtT6-1uI&<Nl}q
zI`2-!(T%(9*h%87#_h~q$m`}TMY?qo;jF70EXgW7=6&Xim(l;|$6*eyVGN$^72L}p
z#$+W8b~60elIH{#>n1Y^c4aweqADGiH)<%}_lEtX{00D}*}#8DtUpPg)!zTFk6b+U
zfIwkCP&RxL>uy?1SoE^%(+~LZ)YSr>O=}CrD_Om99YYctC-;2GpT9p!PPj?<U^I9J
zB!7Wk_ia3~quFg2bmCQ00ps55VDM`_cJmW7+XD^fIa~$wdE2&X=GU?T*=VGxY7ZTD
zo7dufKwCH?&O)sX_VMm^97G<{`<Acu8PslNV3R1Y3k!dpDZ7ua7|`@j;L1Qh9SmQt
zH8_QV;<y$agAAkrVdzY{aTBFi_wdEhdhC(?pw||}5k-D=;sAmNr`o>Hqw#CV_?mb$
zpy>hydd_%x`6-N30#R0O*iJ>}B(J{vCB3>8;|59MW~|(Rdg3d7Jl`^aumOF-pwGD*
zl$HU0?=m~s_17v;48W3W#D&2wm+1q!PboR^_=*+L-Kvc=g(?P*Ez$w!tCR>8PMr>j
ztJh?40C5r{CXHSj9+V`ntYmC0h|>d;2>Z7snL>FBX9b<>yMJZiy{0U#3I5)Atc)Bp
z#vU8H7de7)8AR&Dx_6*Yd$K`SC|-~lSoIs6vx0U96==aRhi(B@vT65QNQdWEIga6O
zx3Dv1&+luITL6R(k(Mmv<Wjg7;W!4eaSe2kzhn=xU3+43f$Tl~;nMb(`1#4c2Xzwj
zxNGe4<3WD94PhAMb#u*_=F{wV!+E2yrDt;oGH9BHs;Yw~@7GLB@!Ib-!S^rtq304u
zcW&P>&~^$~acxFXJP$8+?F6Yk+qUJZpBs1wvvu3)1p{?rpO@)$Dv8QqVc*NA=_Bq|
z{6ZaEvyDLnC@iJ?g*!Yqi2*WlNOy6ez^@DCZ>I(F^@GYsoQ+@r03ZNKL_t)-?dm57
z1cTY08wUo#6pL>uF5zOX7@)I(b~P|XGjtuHsK}ekJ;}1ya0~o(pezSZHm)5oy%6pu
zP7W=Sf5<jcIG`OEL;f;7aw2Tw1d4{Hs?xoZysbWjkw+*RnyNV0$U%D`p1SvYk4btG
z!g(pX+eSg(3=D|_6CO9H%l_>4Wyj^h0*tA5chHBM2wX#egXv(<kH>cl1}>6~4koLg
z!d96-dGpd=krR1o!|uZ?Z~KD5bWli#jFZ+~1Mul5X%jd}>@}~K3}EnLz_sG6{fH3>
zxJ*m48MBVsLNG&s#k{aul|c>*h|J^OOy7l3`+cJ>lzAP(y#w#{!uwlhPACF<y_^<^
zGGXd0=FFT#)aP<NXaa|D1fVFO_#;$TmY`Wm#UEwt*b#)RomtMhJ&R@`we9R@ap)7$
zXhz~$2*Rmq#*L^#Q>62ju?q)0?fy&}g(Q3DJl^2o3F-{gkA2>D5}R&W4>GN$p-6o*
zQ1;$7tUggGA2ynK^X5=nmasreT5~8}Z%>apt*a`2pC6x3v+UZnDaS@J9Z&Q53Hbe1
z0_V!h1r*`&2w8uH_A4izu5@j>B)JH$yzD?BubHrg6G~Mvwzk~i*S*)vKLg}v=~D|b
zV(bJ`VHJTu^@o@+eJ=CoTt_l!0gviq<fK_Fn0Gzp(STQ0vThZH(qZFRv~WJ7t5UAM
z0e_@~$>WAOcBF-inK)(`nk-Y>_Au9K2qDmdF{aL*&%D`Fh-mK9=^c64a+WC(Lb9_$
z8x^>>d2QaKKjG>HpmOARDq;c0FOV@t0ki-$Ln=`%*mA+dZre$5r6fck41=1{<0*><
z%xi6WrB`a%yx7f}&Dnv_>u&;3!bMCNJq(}3w*U-$j2Jh8;;7H{HUY7c8m^x^kBN1a
zsMdR?%c`mg`!$b#6(Q3iP&Exz0igyMIc5xrpz85sFRuC+I(jnm=g+1*E^X>?0-%dQ
zLh8qUwU`I5CB3`f&3jU1l_bIe%V!LVYw8$Unn1Dbs?Z+5BS)YB<0Q{1#7yUmg68v~
zX|9e<Ls!k^20@?GhoYK;MmsJzcpxW97Vn{0it^%w2cy(LjIra!5b>MG{E;NnX3yvP
z=@W?f+@&A_AGM>#QI-gLFvJy?i-|TK*k<Zrn=izu(W8m_BpKnATQHiUq9p0D)qxp;
zt0U~V=Vp@E8~{7Mg+!AjUIE04$|#P9u#=ux`7lOQ7P-FXiQ}?2+iG)nYua&W0H#TI
zJN(IK9s5;CRt#tK&|+r|n?NC&Dx)MGvhVh^D3RgPC+VnwRHy#|Ehq(kB-w6ZUk<KF
z_+<C(!6k`LbCBlfs4y!pWXT>)*#8fvT*VDS_MLW8V(V_O-U^zMn|60)4t}v&0I)|m
z3bnJ>@$juTQdye9r<&_?Tu@S7FjZAlMZ>3AyF3+zWZg_2+`58jz=x`U60hOj`?gXZ
z@}p^*G%^BB^Wjsir~g}RtBQiA`tVuT+5{^0NX())RW*j|r;I?e=u$NepQfQ{8a`FC
z<g21-8k*)aN00aYB1s)~8sDL0rvW$u8p?zIvd6a__9v6oD1n$OxvisfE%qHF$A
zRRvAe%w3h9k&sD~s<=R-C<>~gS;rL=RQ!n=9(d$orjDp0YVCqm6ve8T+3g8XR8&PZ
z`L`-*nyRAt%zk{<j#<UJMzem~Nw4am$Ns4Je5TA~Ycpl3TA<?EuMB>QG>AcfqAI9n
z)W-HkRW;vKp{klAN83(S)s(f*XX>zQ>|*X)KNn50?M?C8wDsM#En7#bqMGvZN4aj!
ze9EJmr749_qLeN7J;Lau=D0&3oM6@V2beu9jwoo|mOX$k5Ms={b=<ymg1P>~(swvD
zl*Mx<IIIWDC-d{W*AP_AWWe23rQ*|U*_!f+6c6FvM<1rjuRA?!nu`4td`;SUYA3pm
zKPZ~nrnP(5GG*tOX~C{doA&4^&GcVYVbRu~F|Rgm`JrpmiUn}tl3M=uXLk`+6>}%4
zWiOh~^i><|B$qW089F!I@d&ePik+&ku&%Z3QMDE?n_UU14U0z0k2Gs%pjI}XNAJFw
zsCB=Cu4<U;<}RQz;rff}kFfr}zhUX*T2iqv3JN}7fZHDV1;dM?jvj<iP&Bh2Rq|I&
z(@=sbw%&UmBa-fJba@GjqACQ+#`E_-zmt(Q<%E2yC0o-5ZC^5XUz)b<(^Lz0RaDJ~
zPg7A<&Ee6;Vy7R)#I2iddw>}u%1yovgGtM8;ieg*@u`}*3*NE;RaHH*wawW{%PdUO
zEZk5v4OO-ARrY@~@R&a4v+T^W(O{Gfcizv$>Xf4gxti?kG`0QKvO&ArJ$CL*Y*{`N
zzp9!Tfr39=!koD?i3crzQ8l8a!@1|)JE#aNZk*CWOrAN1A=c`CCkFUTo3Ql?3aaYE
z=Toh9e3o6P8d|KJ+wZ-L>Zt0-#;&y4*Nwh@C0kd{l4vw_VfSGVVC3Y^;ayS9<X0-?
zDyoXAs;0hiD{PwPGkxBI98C)_Z|z+yomgf1mjbo3Z|2rT6YyDh?e?o$vCNI7s;WBJ
zVIEhc|J{#j=~%&(kEt8gv<*APS-4eMH<g)VE3I{IplUv@Uwb#pr&OEv>~QaX+#E-7
zazb}~+`u%<#x;vOxg}5A-lZ0bBzw#1JU~<sqpg1&OgFo-DkOHQ@8$*0O`c@VG7zWK
zC~@LRv@F9=>=Ba8yDTg56VqPq@W(AlGx$ds1{0Uw!kydKQY9Lw9ac?1F{qh&6HAg;
zc>dEPEZg<~?O#5_?uH!O?theQM;k+jRB+_8S2);H&7F5`X4aTEe-kR_<5yo~cZY!*
zDrU|02PmryvE$jN*mtp$;Zqi|X~p&U^F4h2-s}8u@+##M7qex~P<qt_seB{Pyu5?<
zwEIv=TdlFuQLJCHh*15PpnxHQtlD}PlgeW#WXR%Y?}smNSRcs)>t@m2tE2Q?;f>ed
zrLiZ+%3JTJMCqlZqJk^?KjHN+k27WA&8(O+9?{##+poXQrF=EpZl1vFFTF)ysD|5b
zS;RXpy$ltT*|L5a315~ypS{P<J*Tmky1F74OkR2`H;yYJr}$|*yO$jwe@iw{%&PSp
znOs{;<C*=u@y@4o=KZYOb~n*{H)R!NociuvK0jx0+uipwXTnhSbq=FA9s+z!UT`z3
z=8i}7HS_N4Z*c5V8>8pk%+d*^<N`?oU1xaxvy*JzzJZt`=uh{eie6rR{w1ioj?Js)
z5>j<e?)jV#zdpo>1?yQcCP_YAME=S_UU=&>`jjwpm#<~P<XZaLFS6s6H>pqiSg>+E
z3nmSr^XggNdgE;_cV;Y8R~(!67_gNvPg$~&%I>{<@WVM2R3^?_OPz6)@2<qywsARe
zEz6HP-{I3eXNXmd;+|WU)6|)xCLwtH)#vFK2D)JoD<8$iHH-La$4i`UPJ8TJ^(Wc9
zeLdgrc!7(Zc~oDL4cj+z;JsI<oWF+YBg!~p0jZwrWAArgvT)gAch{Jn=jfiVIlAaZ
zynFII-hqO`)zb&~YVQoTmb+&H(Mq;#TSHOz0ms<|!hC>byn^+cH*sp`%j~|`&%9L|
zSv+MJy)74b{gpSk)Y;AVUwy&yWwQ_#D2GyYJbKrBe*3HE&_m@s`p5>J`Sl;@)?LT9
zNnd>HgN*j|5Us4HVebdL`sqPJMYY_z{T6ECD(4S=#Va3t50O$fY`cvy<zbr6?BV%0
zc9QWIvGwkI7#7Kr8W!h!&nwI9z67zdvE29QT}+)in!o)|f6I@byurr@E-`B6a<(j+
zj*{=;-53AF;f5~jHY=sKpis}j%#~Z19FKAK`}cVLvjg~3!?<<(W`-pFoImg-uYI(O
ztieG1(ju(ddMA^IB*-+L<jI%crC%8M6GPZ`|1YRYWqJCEr#ah_W!~y_%p6h939Ah?
z5M#rgkFajxMCyNNp{n#4ik4#Y{dciw##E{v{y9eve96o2enYTw9NV_sOler<@HZdu
z!8b<;SB>Mgt!wb-vkXc4olP5ZVj~cg)J^BEJMUopkRn=ZYe@wC2$ANdZ4Z((x+$xy
zq+$O@yz%96rYu^+itEQA(oO7m^%WZZW4V9Z5(271XGbqu-({YD>0Nqq!cl>#>j*2h
zZ)4ShS#($W7;|_RFTM0WdfuRR$^w2lWhH$VcJa&`JIMu0xM{;CCJim3>Fj=9d;8CH
z^czxaF&iE;cP(pX)e#6lSD!-b(a(7Gv%^eUa5F3CPC%rac<1%kIexi|+Nq1!uzWUw
zd@o;p^ai_5XIXpu?JS!&i^)kCxBnYned}}jgftatf*})@Y+zLH5#IjsGV8W);q=be
zFzOa^+on|v4|EbN9mYE^zrvYLAIsOTWmI|IN%XQkZG8Lfw=A1C0=Iagaov(tTtB9Y
zcYd(?QvA%kaRWC@A5Ju0LdS_O`0aD=5v!fTEgM#l^5xk7)rWlY!%0%ZCbNC>DwKSV
z^0?*}wroFF6bwUU{G8?7vH2!OMtblU59QsLU*S}z%97Qam@}@5t}7>b^|g0s?8#et
z5>7ip3o&=uI_69qik@y`=X*Oid?mw%JMSiy?WL%qn1egt<dePUDXp8vts9q<(6a3L
z>|Or6_c9CDZ)NlHdHAO$sF-+w7oK?oAp}+9X7jVZTLjGq_`?ey(C<sJV*M>luPdRo
z{s^zV`5|o?l~r5sqeOI4Qdvp;?vHr&%cIWgvuqmGP?Fp4+sa43f1KW$ncTG?&f~9s
z&APiECZ6t~w5FUxpT5e+doEBmaylDU&Z6o2GgdMdWWn04+`3{ep_Vvh6A$y;ldsTW
zNDHbwiLFwQc*t211D1-)ZP?XBcR*wR5l-Um%Dy1^l0KfJ(jsLQxe*1NX*+8;WoM`6
zK*X(9c&Dq}Ox`BTu9VnBd**pP$*o&fbmz(og(y5SD=Zn?<#Sb4Ir91Iyzue&{P@Z1
z{PvHp(b8`aF0Nu&MG8=;tQ|p7#0RL<O<zc|?I4do_5?qkX`%7VkNok)cR0NJGk*22
zzh(FNW&pyWC`Z0|i=F2bZdy2%aJ-89?pe?N4_@S@KObk)wvEIB3ZY~LbEees`J2!3
z^qZfWuf(z{IBpuJ(A#pFcRt)lQFWR1SOblUs&bD0u#2Ly7~dXlVCMAkgkmL39G2wm
z=l{rAWh`swj|QM}+)Re4S9$smPw?sfvxLgVuzmG)eEiZM`SwbTt?L$%?`b4FWHRHc
z;#7{G#gI@xz4-`R@4lV3y&v%OYhST=!xm@Txj-t@3PEznXsVP(o_g{HN~SGm=7?e@
z->{MEEBkrsiKj`I&tT=O;TWh4n>dS-^eLWr;z_<e)<C-X9M3%S9ET3?<H_Iro^MYy
z5GotR_I0!Q<dvuSx<1IZ4T}k=f>_0HrjIP)qgS5h#rMA^qr@l)XV`P>DoL(zL9bwZ
zb&7OnBYSpz$F4({Shjf^!{REjiaLfxx_R!|S16vkgt4VTswOXC!|Y04ed=*ue(x(f
zGde@3EM~);N?v^ONsgy0xMjt4R%O_+ZnwJQ@fL%wtjeVEW2hTFnF*t+89iwnqF=CP
z>#el!d5>q`+sCS{w^JPv_(CzJ&YZ!yuixbHXJ6w=m-%>*c*Q6le)uk~?){WARsttk
zJ-Xfxi5eyhsbT7@SyWb6GqJ9euFfvL|L|Qd=?PLH)$xmhv7$rwAcb+qzij;uJP+C4
ztGxF7$CQp7DyQ=JW98ia=+DtEe9PW*ZB&e3$Za>&^5P#JV_#b_cdVU_?!M&F`DA;W
z$dt}x=FkY0Q*NLpcbQ)2q0!#n%STQj(Rqm9J^LXG)@~%F2$pPpfSRs-{O%7gQMG70
zGpa*OTz&`hQVl%z_){3;)^g+6BqJ7VVOHcEk3aP~B39%b7utV?N>B3%o_Op}oIZ7c
z|Nh_pjh%<;@r8<b^pP!m{Oser`dvGJ^WX-vQX!9gZM;$A7%ltw-IF_*f6HCe#(gZk
z?ICKq5AwUmU!-cmt;`%6k$RONE;W`di-xe{rDu8L(;ux@PYL3Y5MRIhEQj*Lxp7J@
zs;cwdyKiw(Pf!#!D+ormhc}*koLxsx@bVMC;mt1&q3a#I_J`lI=lE%!ee5?p|K4}>
z=QXxG^a!~VU-0{<-)82Tt<)7qSh4kX@~1xIwU2h;i^QbV#HP;FUE8^R{QlXuIC0=B
ze)H?cIojBb5KuK?HnmCvPygX@cJ4n%uxu>1uepv7UwD#l8)DqHej(9VF|o{Ljx;Ig
zjRz4`6RC;?9qe|h-lX~X#i#gU?@@L<|0GYo^f8?|1EGWnWUuhTOK(v%b15T>LQGz|
ziR&wRdFl^O(^om06*Fpamyozpj+YFf>*V)j;}sm<w~tZNrcqioj_qq^^2tk2@y(?$
z+tw|{?+>wL#R|HQ?&O)5-sJR^Zj4+z?>zrBU++J{t4}}1b34AI&q{LL&}#OUD6gTa
zIF3*>s%wXniYOfU=6zm$?+bQ)_BOx%?VmW)+K0$@@yVO-(H}2!mhsrtNZB?OboPAu
z0cX-disC_Q+>m3>&Nuk=<Ii!iy@&d9XUO@IY`pz8(ue=d)31HO>@{1cD~_@9mRspM
z_#tn9et<wE>ajQ{Vk-v6zyFYz-u;XpK7WT_{pJ}?Huq99aUM5cSIo0dJjs>tWNx`(
zg7eHd`#E>!+EAUV=TGqMS6@+|DQDZdMff$9+Od=AJ^48=e0rRvE0+=rgjl_GE1i4a
z=TGnMVdb_vs1Eh<)s7c==bHn3{QA@U{+S(g<Q2e2m=0ck@g)*dmoct5#OQfzSvE@W
z^dJ66b8HH0=Z!)a3PZ-vpvI`@iN_x2vqKln%AK8H+exeDWB8~MgcR_Fix@eq992;m
zHf}1J)1UM7dk0vxdIeF{p!v*RKKSYk)k8|G<UGUIZ@<8fuMhI+>reCBKfXbSVaZOo
zF+mYZA-s8(ba))TU>e`6bJrg15v;Qs0WV+9olXbB8Bo~w$&LHWbqjWeCEe_NwdcBB
z<F;6^h#FX>6UY@cx6)z0SXtoacZ#bRy8-7oPI6e71$#zHHW9Y?j6Av_Ffh#5Z+R~C
zj+Bjb1K;mELpq%%ZS}3|2Eq{J^*n|FDhge#7dU;k0bl7=0^@^3;x*LOl+eFwGc%MB
zv2c!{PXi%1b8H`%n!CtL39Owe*dKGDt?r@A$Y)9S_R!VYLvMdS{y-2FgDV%$(B9og
z{lx}m)m5T!6eZWj?tRDT?&$^v%Eneh$9YaQG!Z^<ly%#O<I8oh_jnUiubW0q*%*%Q
zeTkf}jL~%!l+Rm5%}hvz!bHMBl&-w9rUJ#9&ZD0*r!LUj)lOqameR5cN{81{Rj081
zj#7#%ia38hi6X9$PdBoE|0#NVyGdI*(+%C+ppefa3_&beiPn9Alb4$aDaY6_bqqnj
zieh9rapVLIEnNszArdmk_V<zQ>7l!`hrXaeI1pyW4Qp9gljnFtKfzFfNJv2;!}+rp
z>F(~Ks}CX}m8#mIG@U-o`KxUhYCA%xjIS9%Rn1T~-gXDkR2fEn(%e<1%6f72BQNZ3
zK6{41g88gkGlvp!j{cy*2hBQD6FP66IK$PBHd^v4DM<!N18tX&apY_>Y1K@+icqMV
zHkasyvwYOlAvvS8m0Ztq?rbCTX3yh>$=7k<+dYW3^E78v)S%(4;V^;XT6O4wAA3p#
zyC|Y~3;Z(Ix}kf|38)%7op8K?XWwo{7YfxwM^Ro;#fIDOCXy^8)O^u;6`Z$4BcJA@
zPxo-wszsbFo5?4y{2%DTo)WdLvS||CoIH7jbWfN*BS<I^X5#qaRCLbbzOrdlgj7-`
z#S~2(LoyZRo_mIoO!&x`6)~)KIHwMOLR;Hq4xXs@VuUqnR4j1S^SbHR`8>K$BNQy7
z%&2EyeJg6~Zi4r1LsdXG+`*|WFIDYk|K3xyTx#cXZ-Am?85711qq1u@_m)kiD&ixV
zO2CD*$EQ(1be`tjJ#B2i?-BZ&j`P?bU!xm^=F6wJc)5wtg%%dX1AzI;K%)Sm4Ly%x
z2n=1fXfrn~*suCA41qz4@uSMg7&mgy_#CB?FyTlsL(2mE_`zA~FKS%4+Fqb>36+G|
zmMI_It?cC5_+jsH3ugpnV{4%OET^tC6WM=+jdu*guhr9+?xnY<m!6&;B0?dgsqVl+
z;zhT3(Dl5vaS2qVkCR7F(a_L9OSeuq6k^zj;ZzPc*nUSD#T7};CCznbE~MIR=1KSV
zLT3*>>0X59r?|KprSm)|8=46Fj<Npsk%ZJN-+cZhTW(sz&#p_e^Q{;8vZL3Mqlxvp
z;l`nXVOyRevto^5kk<_i-P(j@J?PfZbx)8VY+!|Az9ij{D#wZt7{bS#6&r|L`Hnw*
zdmLYM1Y<^2kz*O<Qx%FsL1OVzs*5x}KXsP+j&?3Jbrm-4OehFLpzG#qq4K(pp{SIW
zR?>Ot80W7xlQ?pQsZ(k|9B|^T*@hqxEo0r5EoADC&>syEkHyeb$o02y_CgC?;SLP{
z2tuJa)k&SVkDuXcdy=N?YD!Z<E?+fm)X;VGy#2oC9H);Rr?IJtt8F==5kF-kMp0Vh
zXWJdOlPoWyr6q+TK<{tm$Gyks=}VLDv(o};wyqF&q^Wy7*UIU$Ep+?KF#;3tYXXrs
zXG98hV9G~F*WINc$hnImC17$6Bw_2o>XcUxPDR@KD=*{mvLN$k^>6)Ftlt!z2k;ek
zFC=9OdG6O<uOqDRW0wepmvDIu<7T8%;^#bpSa?9zlh}C%iq_)o!l_m#uF1id0YM6&
zKix;Nw3MRaQifKQa@B4^H_)vJCmgWPp@qUkLP0Wp?!$0h{uDBO&73`bh7aC+g-hK!
zAx)>fKTj0PiKV`sMbio{aSG&fefYvv#3Erby)vN63Q?&XGJ^8b6vJw3Xl>kYC8`Fx
za4GBUZie`5hLog;jH)Bk-imJM9Nn{zHIJ-kq~5`AUT(n96P!JLj>8|m$-ztAgaR5(
zExlgtS?wBx7%{4rqOt}mi^3dfYoYgYJx99hdH%g0(1IcQyP8o{ze$f|%>$;C6iD~9
zphp)mv?N7fR2|v&Hgb8xl(}I@<ClRSMGZ4<R25&p_3s?X4CRKUbItV^vYajbH(hC@
zWXT9Bi{qr#0NL&?TAJ!Paj27*pMM|SA4K$ZfU2NqA;wOeK=;MtG<5X2Gxw52(j6Dc
z#qXlH?=WrJSVo7=aiz7Hmb8yCL#t_yl#tSLboOLXf?|OF0S<ri0#EPJxc9Ct)c@<#
zwD#$at(&7Uo%##snXrC6-~RbKrp{Tw$v;1at}3V^Pk&aBN+k(g&6CNRK_Z)1NEO8i
zbaau+>wqAWHAoaC2>QFo<#VnctmMzOMLm-xluQv02FPX2r|KJeKT04$JQAWi-4BYw
zrG209e?51IzrXh$PX3?&FU^hhoH^Rh%g^n=2!x@(165POFvtlXsZ^9eOO9;bpnmU8
zqTBz4xm`#2hqK*Yd_e*?;f`!gH|gihnG3X?_=vanTqYEb(9zh$qzUIap?=4oJ~@s*
z93|a)nac1bZmb(cJl@90+DeYdz%C6!!Ay{ZKS(Sbq{}eK=h|pjD;Zsupl8T9@=Xl|
zQ_*gqFhrCwW2#A9jZzlO(%I9-xw98(IsOsv?x{CvY-;lK>ms(!m#_bk5A#9(@&Eo8
z#+7`)kJw{t#dD8>PJh-QnM@M&r^)7}2O1Yxdrqjtqal2q{TO;TXHVC2;p2Ds;cN?`
zV1UNUjf`*0Qd?6^d0M5iBr&j+Yb6=>$1u=^;h<k{R}&;=F|;^IWb_F7TN|whnO}oS
z;U>anQVdx`5Q|0#_<iIux--CVrJmz*eLY9o&++{GyU~JS`a7H4!M)V3Bo6Cdg>-i_
zB0ig;B`Ja<hm&b<BWDO2PyN8JkA1`Bm3Ojg@hrYObdkIe<ORed5&XU$bHM4?r`vvC
zI!mgogwl#Gs>)Ivv{aYZb%Nmt;eelhT{j<{Epp_9pJXCNpu3xF-ayfF<kbL)WE5Y2
zn!IkH2%Y|%P9m8k<ZmUD(;0jHDi#mV^Y~-mAT%F(u9GvT&vD_;JJ@}`lVCujwWW*6
z9eIY=RC6g<OnFgk5bJOg(!8E`Y0T&8=x8NcUPon7l3^o;(0=u}Db|8yBb=yaNT`+n
z`sTZgTK)iYCiroU+XYD@-%E3^#^_<yTn!gd<jc{~lL36>@&>V3j6gsmXFvE^+Ekz@
zIt`aDa6)b7g*U!K35LmaH>0YG8{=dnl?+7yhHj9}2dJn>k*KRBto96UhZf!0Oh4gR
z5z%0PTt0_Dkjv}DVln(aKl!}*Ou%5#aMms!$$M{qN?Nyrf*TwpQ*`@~Y|{BYz(nmG
z+68%^#h0*)C=$z_0fhoWcoR8+Kr1dO`Db?;({0gNV(lo`DYQB{iJcwW*D1n1W_zcT
z$=bP_O^?H{`&&)`J=3+wf>Q>blY|F?qM>6c%Qmq8Y$GTNna*w|-?)K!6KgT_ET{H-
zO?{`%#A(wwz56@bGkHf;*>oRu*DdD8h12P~behY(DpSVQaA4P7D5+z3q?_;e9-<)~
zW&Qe_xPHcTO8sdLpSnP}WEi!vG`o*nz<JY@Bo0|~{pl`hCNE+2f+;jzJWZrxC><Bi
zkt#3dbbT9@p?*$v1i4|}RECY1L~Xd4m)_h-SHI4PiPLC1wU5i~=G0uKyNztDj!o-t
zW@5F%o3Fi1eS067bQg8kEhBL0dp`R97`c2umz%S!ShI$Cvt}@~$j8y+C&}oRfk^?V
zcFJ64jvc|o=`+Y){GRu|JV1T@Wrj{2wDJ1@03ZNKL_t(t#Hyw9nLKs`E$5GNr7O?q
z@#DF2bPtW)88;CRCYUm&nuELd(W{$pLg)_<W%GucnN;Ir$Ll+|(9%oskWmziRt}uJ
z1d4($lwx#kii?*!Ne0q1<zwV8ALeMQj}05vFs33wPo|&qNA}X6s3p~RnL`)a89iYf
zjVJeV{M31ZL#MHE^)jZ6t>N;CW1P8IPk6{w)~#H^v`OR0G@apWV;5Saj7R?QpBT$o
zzCGUHjPT5nkLgKzUZ-^UBy#7z;Y3fISkFm5-+hv%jyx+jZesqlksR3h2HzdKia%D&
z__`8yeZL>w5@w{NmXV2GKKN)S!BGnt6Kdtyg%+nWX8A4Itj5%7BiZ@(`&3LA$AO(6
zbE&f*M4sM!giW_>Vo0cmeJ3t~&`FB~o42f|T<zw_nac<v=<QdzdGlsQCKZkxKTY1S
zLBSjJvguwXFWbocNy9jEXfHiE1wG$GZ1`L@-Z-1?ONY6Vi8FP0gik*FfY#^)mW=fA
z`Iq0JR!?KYsvDR-Wg>d>X--|~LeaBif>mtWax?M%s~ox5jL2swnY5Jbo>%zlM5E&f
zb~VmUC~GH9Cw*!U=ekv<Paets@4n{P=>}%6+Q{NLvzRcVn$w4V<kZQtj9$2wWecun
z%J|V-I<kwCr_M8B*%p?~nT+4aa^i<CIM<%RuA(7aY|yh=5_L0KzwCN^UG<!-zsi-~
z1b5xGp4k&Cc=@S6aj`A!0Jef+4q}C%cH9(()(mIKjSIN&&0BoF|2${UUSigYbu5`X
zn+bK*oH??W&WwadmfY3Ua2|c=9+u3z9{u7T-uq@h*<dl#>tcMr>k#o_6DjID$ANP#
zD2h%;Cd9_g8>x%+v+wvt48_m5DU-Ok_dA;V?N<j1vi=G-tiF+<NrPjj&T;a58#k_A
z!-6@p7+n?T#PP$Ny4cC`^_#hF>@ZZlo88}hOKYF*(C99U0X162v|(Ys+j9VjKzF~4
zoDe7iMozhomg9T5+>u66Ak))EHa3Dy8%&&e{nfW<(Ua5_3odv2@bz3mk5_Q{$N^gW
z<Of5$QK`>}ar4b98B-JG<grsEhL0!Gd4Z#sIvF=%ELV>2VgIr7)J|W*svG7pdE5w^
z&mG}PXU0>h5JFH=Gnz>H5_+<VD;LgEUY20@*I&~a8pg&AH#4b5WykApbD=|L!A-Za
zZT(8dl>7Mf!w)&z+-=Sn1j<;mW;w%(6pkN1haCyzzIwa*n6qjfGe%Z{(a(`RySUPo
zC6h6@Zqah)P8&hv=@Yc3a|k1Y5go?nbt{SVH*ox73*`DCHG+-nRuJf_=X66GC_3HQ
zAUCbuz~~~KL+6@VzHvL_D&vftG=m{Ao#V$(Qs30i($#C3KWhfHr6G<VJ;a5^3^#4M
zg~@f5h-@c6e)Bz@*3;RYdX)7~e}A573zsv0`be73p5WZ2D<p=`WYg-URH#>Z@%2yW
z%G#AO;owp((@S#19M<1Bhm`0gqi6VW?{S8Y8^`4XyJ=HXOdJ+x_m8`&Z_cr5^DWGu
zHj0Bg-{9M$jR-}M$@^Kcb`2BiiaB}U2!X1RR2YryJJZ6b36p3!zKa9L&rvyX5jWj9
zpQ#hZ&~f1i_3c?kO_)UM$vs@|%D9Cb%Vxn`Z-|~J6R6_0Eh|YVI{oPezTJ0*QIn@|
zY40vtlmt^p74yTc{bYqgI^DyBh3i;4Wf*6U9i*$@pg*g!a@{&6jVk5%-b3^Yh4`>(
z{OtAxeDl#)^cwc8iMM|_pOteEH)X`Jd3(lX(2aCbGUen{PM)O<NC@583^Sop6a{6(
z=rO{x@J@Ia^*SXEJ4*+5XR!R+PR@mJFLx?j<}|9)9AHlixzF*m&j`;ib3=oCrZ5F|
z1zGUJI%IM7QzOnn?k#+_&EVI%)5G2_UC>e}$kM&sUArG4sF<;aUo4FBFaP#38F{F5
z&5xVd3M+&53|J*PuFao7=mv}L{YOSM?BwZB_DQesva1V<G@|lS<ZO~bzDK4{DL967
z-Ct7DweB74QkTX-j|otHLFTO5LUGGCytC`9L$@0!_$%yY@Q*04R?os#`<Ok2eGR_7
zi(#+Lk2gt^m&q2kS73WqAI{Xkpxq3j7i-{VUgv^=6ZljgW2P<SzPoPXiU0LKbE?xA
zAjd&BF%^7V8~tg28B8=b?Oq)!1KYF`_MfOH4eVR?@1Mj8xnBnGa_#T7ol8JF;F`f1
z|Lf@SV(nl$_8uvq<Jtnh#{C126ngT&{**!bxi-B6uNed@u1U9i?O%gK-s@a#N!Vxi
z<teLp@yE@!T$;QEiNQ#}7Jag8<aP)5YuQc;iy}Xzr~mlJYT++{sh#ZqRKzt0SGder
z@*CXdujN}3PYzA8zj*?tLvtZ^nx_Y1*noIANbHv*L17z0`ZlnjRoJe{jn4sZ>qfg6
z=j@X3jHHCqi#u0rp0f>DI46Y#_TcRHvN-gX3+>~w@a&(`GgmzJHCRY=xsXLEr2f5Y
z>CCBM^8tu>u2(SU%gtm|aR{)JQ}?%U%9}F%igZok5+=oag%lJd$}KT>T{y^F8K3zI
zERQbaF1<$QLYNhz9b0!t0*<qIEKa!pstPB+`-H3M%g(zm?N*&<H4UCpOCR9{Du-^v
zGdeFca>sZbTX6x#btIW@yC27brN{KyJDOd?Gs!T2`=d#ew&Q%d`<x{$@9t;&I(J&g
z-o-8XMnMu}Tc+!PHcwtvfuuhhOl6-al&v8c@MLWP3<?s11ASrm6P}}XQszEF$J>PD
zLqgh)WXoPyR|o;BMo}ultH1m2obIyyNZ=0UynC=r`FZzenN3-*!ISilkbPTNzN&a(
z%DPUtLY8=R?L{C^41uBG=+${3st2}RJCGDq5IDrZuTtNJAv_T0)kvYR3-=h?{TIZ+
zK@uDx49PwV+Z`-Ky4F$<qOMKffMm4bmVsxy_gcCVie2%Ra8Z)6Fbc2&oa@b|r1e2g
zhpv4N7>Eg3u5!#849kRM4>CtKmH<*=0to}K{Q#UND!6i!$i%iS37-n&Xt!tkrUy%L
z<yIIEge+I#rk67AI@d@=9FI&K-}Yj)_gYymg?MiACa-C?+jSBz#gx1u3^&AC+K{hx
z^)BJCjdntKuLti&N=Id~Eeq)yX&@{oZi*8{oyg!mtI(T>*?c&-j3qls?uwK!$}z)1
zGKmHu%Tlpvlm~-uN~w$-Jyr+_SPS^HzPp_cs503MIu<1voZO(Z7EU7G0{+4>xvK=%
zAmw8NjLAMP3)P~~l$#6=Ud0q938oP!&KkKwQD93-k@~S&vt;8KxZ)a?t$4@altQQR
z@{21jw-3o->=)zO6%pCEoLlVv!PSq9BL#A|eYOzm3%5wiJUH^O^=^Z>{Vq#MXC53Z
zXxEXQlOQPWd5b?gD65hk7C@`@T>=W}J5t-StUNYg2|=+P#R75XaXL1T2roD}{UY%@
z*hd<1Djpr+$j^2g;nAlZq`j3l+0i|~Dg`*(79G(G*BE3y$_C={U|l!BmhD(BN6A5c
zJ8XOX78J|A4Oeqc_mUo@d!2S1;o#A;V%=kYSERPxin*sPOMd2tE2%BHufl%81R@NH
zM#XMy08NFyZ+}M!b622^rG@&JaZdU>PzJUhWr%ZWbr)j~q{+k>_q;b~xPeoGP8TB`
zdKHIn+3p9Eqm1uJ=VcqQy~ssrFO81kq<|^ov78*(Hsahrh>v)M@BQwHcR$6K#Ls|Z
z9-9#ZWXK@&48TQPx;=b(5pQ7Zmatsbfvsy-<~V*^n1ub*Fx~lF!{-Yz+CEkoPp);m
zFj>6TwcbkNHRv6LKkEdJkq^Q{AwOO@{zQK^USC6ogU>`*R~OJy$Y){yT2X1qKfB<C
zjd?g$rq}`5{<xswW|{>-LNPNCmw2VX`0Qgch)I7v25Ppa*kFlk81ht;eZN0e#<JDx
zS-$9cYEmi}E?gxq1j(vVj4t=n+}`5?FSf&&ZJA(T2WpcxMNv?K38qgOO-FMJd0hEA
z=Y;~QkGZREMr*34!+L=@5{;f|Oo>026K_zqX>s~-rRLCYYr+>RW$oteEW3UTmrtLh
zJ1ZH8kmy8lzIy^Bf~v7|nL8@P`OEEAP?N631`(-$#g&;OKZh<$$pvzD{wru#mJf=?
zxY<jo3wLv=t<QSMrpmZEH?eKwaw5G~xzOB=XC!3P<yf1yeOt#U8Y8Y-#MoFL^=)a7
z9-TYwb|pI$!pG#f%c+fYaj7LO$q>&+wn|&Z03}1mP+O$Z+L;ERsBS8Y#+P!wzR8g_
zNPlhJdw4Xz7mb+B16$^E`p_`~L$Bk3?Mt|D=s3N)oKWLLT(g-1+j4B&t4FtYNoPud
zk4U;h39?|-YWyANY46iP@iTMTT2kqYwDjr&@0WFg2~Hzttztr~lZ&mEt_9T1TEU%L
zZl);P$f>JcsEVM_W@LMIg1+=!2%TXwRxmly!-dw~0XC0f`s@vx?qgE2n^TwCB)$|c
z%}Rl6gihV8mE5^yHAUGb&Rp&A_!xzB7PJeXs|hx2TtRF7S<*SfbH6k0FTh3yl>zQO
z4&72USn=|JfMnH3Mpp!BZtHQ{R72%#Tsxo3=gyPUT|dXegM^WC|4tkjC_4}C75Ie*
zV?4GY`C}mu681TMu6To@2kWH!9lOTczlWB=jt_FogP$HbxaY-SJV+RbhtGlf_2^s~
z$d{YUNXG~8UO>0qqjcQko6a$*%Eq<Rl>yU%1;0IsimW5Nl|0$UoO=iP?iNSz$iTkJ
zlgG-rP@x@4ExU3AFZYgg92t49^|s}el~Pb``w5x39xutv8d#yY{S@$E$9C1>*y7XP
zoM3_#?hKCNcIgocc;2320)<<ha~wu^Z!h@jFz1Ys3)}SER4{_G4hbEWZF`96rTu*W
z<9>Sa<5LuZ;V4s=tYh`Oi4+$l@hO0!5{|}5rIJJfK2wxJ5DJ9}1Vf~fMMQ!=R4qXH
z@QG~SvYP7BBJ=G7RyD+-L=m9+m^yzE!%K@uB@_5m)43v%u=O53K_C>tuc^*s8JvNr
z5TIy8VsVO6NrHaORJ7(N5(yKFCy9lF*y;|3qa>3_qM-l^0==)3FTehQiqVrPi~7u~
zR3DLW(0V|kLMReJQxpQBC`Cm@6c?q4N5d!xG+&TN&_^cg?jZ67f`me063HatfX}2+
zfIk=^m537#g$ekqCw|+5O=}bo3Wv=%Q3&(@U?^<9IYadmPb7&)L#7K0f#weo3I>VA
zlO&>HP&Dc$&tdYg6et?eXarReTs-yzr+N}hA64Nog<v>JDw%L;QB)$)IH_cUP{3zZ
zt{@N$lT0S5n>33lwZ(uy^#w>IQzT*$i{1jgSl|8OFagcn$z+>mAQ&PXj*v_y2xzK1
zkS{RqP&Cer8#l6I<`{}n3498~tH&^J+87e?1krH7u|3rnF!dS^SU$vncH=eUS+iyn
zGsl!MW9bTRUa^GYfC{L@VhNJ*nE9p=MI{=Gkx0aeM54rF5$7G6p-7BWGETr}58M>O
z;V^-aDNDbm;17lgS~69A0iq%MwNo;C1~rrBGG%B9;Yf^NFv8p$mXp-7plZYuNfNP$
z2|-pnp<sYmJW0|zK6KK2rVUSFpc0Kn(Nwr{{6}_Q39w+&5Ubi}>c$_6l1e5Wx&#X0
zsOd9dOV_6DhAB$L88&GyGlmx<Z=iIiTm=45Ia}7RX5*$+)^n&J6p0YB<)E7VYMN=c
z$wUmFYGTKg<Ga|~6lCs%8ib`M)4xr_*Ze`kL7ziUGL<3`2{}5A#bfBXtV5ga?}=oJ
zRNRz<kmN1ffKXJT@q}yl*tA=EwVuQ;guoXFkxZqChl61G%alcHSv`Le#i=AdMKNtB
z9wndg(xdu<BrKdUg{Be?hlxhxB;rx?Eh(~n+BS<D%w!JSW8Tj-tJabqxqWy~IYGN1
zSUPPAZ~KGwG%&tgn+9Zlt$yVDWjsVWSBMM3Hk!iz2ch5oBZmtZn2^|5H$cb*-wRo^
zQy-752|L=j^be{f#ht~mlL}<|BtEgux$00H8Oe#zfC{Zep)4&s!IBeE_qf|#AzuzJ
zQXfuo<j7nyY&j3Y<<I$Tbsz>gW{aXQWht~(Z?bCB>`iG0&@8oMK9bP87{}q<(T8xF
zu@2grP@JIV8mWTfJVL}y<m_t-rWjo6oE8eRS{D(dDJV<%4uOg-ca^g8V!DqtbN<v3
z4yF6i4UI{27O`U9REAT+)^IsHUVNQK{}ArDZ7ngMN;H(?xhJ0DY)db<{^EaQh-jcI
zqtJ46Cm$ZpuzJl3MvorJE!%G8`<?Ie)u9Wn(KxgV6fMQt+wWnaFGl;Jk9gwE-Q4-N
z|0DZe{59X5@8`b1`v>;E@>_PDY4zsC2x8UaxpU)ELOu<@-o~?ke4b40BL4aQB^*3_
zk>SIO_|vcdofExLHg4TSRWe98tntB%&#>=&E3GZf^k#BSi@{VKzkK*+Ui|mp(C8n|
z-#xm9XMgoOqT?5^^7=6pRga>q{}BK4zdV7jVifB(Z6<u~O};sB9t?wNtMBL5NnuX6
z3d((r{PwXINXJI-&_j0;%XO2iuH}bU{~uoY<|3LZxt9`R%Y(n<=v%*MZ+$lgG46Qu
z5q7`v6x9p2Gp8<&=GWNy>eGC>|2z|yY~{{HwVb`&4&4`d{>@KJ-D*K*-?WVd<z4*Y
zg?H)hYNMsSM@qB=C3UlS^q#dSd7XUgaUOf}RWik6xnt8Zq5&U1ql1^9d7ehCng<`c
zhosR<N!=KZzVZ%=KhD~_9%5>_Lb|t`w_kgnlUI5zK1`jOeW?r?JCn_uH*#dhS@xc3
zL(zh)dGJ3oEzn34Nwmh@{QI*XqoX=XHLcapxLG%F<Mop$?~jm)SF+>fw-6}QPM*ho
z507U^X`WyG*MFlvm*lqlA7E5ckk_-k``nZ4x!B>pD#xA<K<CVXLyVn!18R}Zk;}b+
z#<XR(aO>jHq%#H=cfZdo-*)gn{nNwrxA&9s_t2SBdE@barLA}tzr5!r`g*eDn-B1B
zPrQi{t>Az8-~X9QClBNEYy9xxi}a4%z|zP;e)ZZ9Oj&Uk3&RKb)hpklSV@Wr(0MLg
zZed2vNdD8E_i*<8-;)gWais+o|9{-Qcbr_+mGArAr>b+eau&)N5RyO=NkYP4lCj4a
z114j@Y4XhT+)1Bz?|pMW_uaYn8;@t~aUz&tV{DR5#+V>L2xK7yLOJJ_S{<Zrb*|d)
zk8{@EYwxNS;PJfqd|vTEU3JdhYp?ZNafeg&`LA*5><0R)>R9#Y4|#6wUdAu}9Dn`k
zschKON_F2+?tbu*ECD3UzxX<?oYBX3?|zi__ErwJv}4ElSWq)+4uAa@x6*U0oqBtS
z|NT!troDbLx8M31DzFTyD{}95e#pN5A^hRj|BQy7W`@n4!_J3n5ZBVJwG_`Gv(BZs
z`3Vk8T*8#vgzYCRxBcBWsoTAV{*lw@UjG<(J-LR-XMU2amd>PTs_5SLHh2I0WxCs2
zIo#Y%QGWTt8Ww)`tIRq240pe{mGiFo6Q&=0g8Nst@wLDDQ);@~NKPE$d-pv`+X=&>
zPh8G5m(OPNTPtYk?59X2pStl6Sv0<a?yfGLee6Nr-+N3wH0IX^W}WvrZoX(b$4(U3
z{Q9Fj{q{~ynSUu?zWO}6dkVDgew%w9eu3hU>3sRiH_=e&<V5>^etP#K44=Q0OU|Fi
zl-ww;s~W*m4?oWS&MKB(`e{D9Xb^9|zLL(q0;X~(H~--uGO5;5Q&Z$;Kl&l-+lTOX
zfB#1`Zr;eSNt0Op#67(5?p|R~CBrN=s2m~o8x}k|8D)7PUb#?xUH&0_i!L~#l`MUf
zz{MyZcEjo<%4Kd?wG|&!sqT-;0a8@x)s`q%dETRBF1prOI$5pjZ?YWh&f7_DG-~@d
z<{^gE(bkwsjFB8G{G{8!LEa7U39m;Lfh+VxWTE3$ES-Agh{^yMV(_M79S)#uDAT-W
z$NGgRT`XyRIQkK75k<-5j@1N4P<qrznDDFU%7%2sSq6WpnPOvgdKVcp=>1Au7AT_9
zPo7WkMMtL4veSo*Ss?q4xvaPf5*v!32|oKm&@0vRlP%M`M%rM~Pf5POGe7$&HK$&}
z|M;K(H-CEb6%4MjeDL}+JocN_ta;^^eE&xebD*P-;)$cYzi}%E4<Dg^>baadV<-ko
z^|0X_T=OD7_}+JT?6r*?JG_OT{`4_6Zg`iwzW+m3tly)yz_O853BLN-pYqLb{g`P>
zKFOFt{k-|^L6)9#DkG-O=M>Y(_5*E7`A$9bv>s&r<{cb5(#)9Cma<^-Ajnl<k00O{
z58lJ7#u^q(9f9dN&W24}*nj9S44cjJGpD*079FZiwCrJfYYmG|pUBjCizy!1#Np#z
zY+e04Ke+EPnvXT}<m1oL-VZJNH}LeYS7Y-Pjz&dIEl0Pm;qH4L;KYy_OdVFu>~oe=
z)A%<3^shf+%aML+EA)4@NpkFF@6jG+Oq#+KpTCw<XH8*DZ7*FFlUR0YEq8tU|KZ+e
zw{X!V=Tnon<Z5auw(a4;yT8X>KYx~ET?H_C7F~WbpPN_BgFk<U_THj*3}+)b&b#zd
z4!`{b|M<=C(lKHI3ug?cr>&6>HgD(P;bun7IGcsjM=*Wf5{7rI;~W3{Jy!2*ApuO)
zU{0Sko^>mp;r^fgoLxscl_-AM+!)KwwXgB&+8tC>RybKJsw>&H>Ur+^=~IlDy^s-A
z#ULQ0+Zp|=d-X|vwPG!+UU-xr-S-%co&6XL#}91cN8kGw)-(@d-lSnnp1+I*!`t}Y
zcfZdI?>6)4%g;@}4Acd0zg2MT@HVO^FD2K!mE*kyDr!e_-BsuC%ma7xqx+v^&hjf5
zGpLHbV+Z)niZvW>+{l}IdzmqQ3`?)LoGs7&m~Z~e4>8j(;*4>X7-Og%*1)RA@8i4w
z_I=juY+>tfS21DHg$%17%mwF6=dHIlV&%6eRj_W}*U0FZ)2Qh^&e&O}Qq$W;Tg6ze
zzGw>n`i*b$?fX{nx$8bfJ~8P^T+d-1xcdiu=l&;X?@14u&;8^r+_`KRcis0G?R|Q_
zE&iP_|C3kIz4lSQ`K@oUr~Vu+m{AWWj<9yaHX08fp<((3oHxCJ3G**ua?e`+*Z=+&
zt9G<yvsAAapCj@tUb2{VtKMMEmJ=*lFfR4~K@F_?^`m_I?q9Ls{PU^K8=CfRVav{a
zG&Y^!{7bK7be%I3WikccU%8SqmS0Rw^$;#zIGPpj?1B1moH3!2SDyVPcm3=c+I#vj
zxjtTc<WY8a^pleqkgI3j=~LOT@&$hU;6rRV)E+vu%Q1%ObLVq>`)VHk>3zKX-aZP&
zD!y>@AMnmIKjXXq_G1Rm{RGoT*Rt@6&r`YMWxn}O-{SlC{gRgcem1UrHj9N%^8FwE
zjD79BP;BS9pFd7hzsX{WWz?MWm|J&<fBC0x@XK|5T)ymda!EpEu9ue{e~_QOx{HPL
zXOlPnfV|h3tUqX<_0^I;kA;t&WfO|Z7ZDAY0Ci#Ga-ns(CP<nkbk%2<iflM+wt(RZ
zB;7x2<S(1jB0#@%9xTU`D)8Kjp8SH6Og=oLPJSuiN^2)6u0kHtLL<aB*pANhCl4FR
z)^AWC%D?4SQG@N&GI&IVi<4JvBRc-odlTzT`nY0Fbe;0<3NMk$EmL|df>lD)WWbG;
z;jecdb?st9&`n^YaRFLRA7$DTwQ)S#tPV=^f;3N3;XDK0AgwRNGDA_fBHlyrk%~Rz
zML$#*#MIcB#UXT=iFf*xo(IBOUo46^&AZ;=TmSeE{M|o%oBA^@XW`T#empmpVano9
z@!7@W*}7pJTlcq9U7h~ajp=D)=l-Mg7Yg+E71Hx-Mgtkytt*)gw%9@Ak>m8Vx6oxP
zsjR4A<EmE}Jo{{}ynH!Z-+q(U?gFtr+ajF#$!l0PYY-p2w~oC>I;pP80Y#1<Yo({J
zNOyNXm6bV0&ODDBE?dC9t?Sva;|P^idGyO>Mw}G8S@qUB=A3&GpI9`7)vMOfi^1gT
zxa{*^p!<X8czOMPHNVd^;$-WirR6yNeSP%yL%t%%;6Zg9Z*Ad3S0_iCn<+ROHO^5j
zmc85eFlEuXES^7?rRSZ8ZP`P+smFA+(9+pM+u_6H>Kf7?yf5}~@W6gLdJ5^c#RDdn
zr!wis)Qw<xeU&yCC!{6MkoqbPA8n?)vxAo7J=E9NGI#kOaM9e6Y<zzmyBa&FuBxE6
zwvH3Wj?&rF#gV2KFoxoZLp=Dvql}pMNxuHqUuW@@;U)g9?NaQ}nPQ=fmevy#`upf7
zNA_FGfx`#+Ca|Ca%}1N)?ChcAL@$+<IR*_L!RU$8x%Q@;S#s)d_8n?-Gc1b^E>P$`
z#;+d#8PC1CmLibL4`$?uVO((K)!cB^g&f<zmwub1-;CeaM{jo@{Y6VfzM3KRl^i^B
zl-|w`j<)wPxFP)k?%uY8>^<6rEvDbF-qW&;cbXcw?2=0u)47EY4jxxCJ|Db~A34D2
z=?mDi;dScAo=(zwknVf~HT|s|?(C(#@gQc<5I51-zjqg%y+!rKsyS*Z3KVKaF{-{Q
z%5DC5#jv4+ICSt3{k=ULJkrjvp|wmo?{j?atPyNlzm}~BI;gElsBaiX`_V>v`g%Ed
zsL4<IvRSar>?f(JXX)a3jGc2X(}vfubm_TiqOct_we(Qv>B8nJp_t?PuY8T6eMeaH
z?t66fd0o2<YSZGX{lNQd>YKzR7hOX2-Z$CP-iK}7$ajA78fRU6HUI7J|CkB#lT?vO
z=lWZ@|DFe_o3)g$|Lxzf$Itmk001BWNkl<Zbk>*v33na`%d3yv%}z6k+yC^xaMLC8
z$(cGvju^y(3$Ng|n?H+f-cM&Qj2Pa)p@WC%>FcArvy=X!OfCXrM9qv-aV*p~45hR6
zFde;p965A^x*>xw#?aH%Mn`8qJ>9(|6&0SpB=N}@9}_aT@%tz$;@Gqu;#Wp7AhxfQ
zj=rYyLwY;RCE=N)kc8D|yt5+B(eR~$45$3qA3xBPgEVNQT}hG%3rj*31cr*TRZp~Z
zb;$VZQb@bnMcV1Hf`TQEQ5vr_=~;DO7jVcHs9Q-33oxs7XqCRD7}OX?tA|npgxDyE
zlnmCh6BJ<{Z#}Ny<ngf9w{iW9TvLUiQ4gr7qFvWO$~*~dRK^#X+@!*W<0_fuc6hu;
z>&*BX3`mfwRc1$6XUyqS5*8&^=}TAjDCvw8Y1UP|Mie2k1%;qspfaB#88vc}y6)Cu
zi!f)=1uULBi}B;eQIqu1*`01Np6Kjk<hTip89j{3#8BHXnBI<Nj&<jlJaG&)Rh4*!
z$<Qf-eZ4(UJ%q`pjHRKrN@3(36D&!71m`X}lhYQSOI=qZZ5=)Iw{B-m;}AZ(XgF`Z
zze{~(2X2`f40ZK&bRBD^hZ-h~A4PR#9;x2NVT^&Q+6GMTahlr;Oq?{1y6UQwMqdvn
z3RO&-F^%DaYB7nSaq}9wN1e^QhE~??YQ~s^Q<r^?3&wY`V%<K53>ic&I|O;6qm@Bp
zrZHvwXsYsw8xO#kgv3~O@7%{Ja~5#MX{R#p)M@0LZu|nT!P2;YCzIx$#ilpkWa^n`
z(X?kbEl2mzRX2rm=bgqmOBZr%-(I@=<>^7~6<`ZJyz%6NJp6hiH{SGF2300lES<;O
z89sSBr;Hg%Wv-WPyAHEp(K(zx?@VTotz^&sR_g2P>27bPt60T^Q$|x=nQ&nLZiY`e
zlliBf#^QN1Qd_L7rNOlG(z8#qvDq+f+%RO-OM;3I#08EWKf&0^vp8kcQ1UsQB-8<6
zaa?PI)qoS7-HaYTkuf8OQjs25bK9iGB$&jqZ|4TK@7%+y&p*R6FZ_m;?{2|WKIlWn
zhJ+&9-g$$~2iq|wp|AT8@4ml_4Xa<{sb`+!g;(C<Sa+XWoe&$tcC&S76X!2o%IvxG
znLVP&u7e%U*(@qp7khd2rPW+>`weV(?G;Y+`=by#Xm+$8re^e1_N-q=YxPKSEqm$h
zIl}(_@mzf7Y!+R#oVKl-RRIw`NoR~@^)nCf{inBb$CqzlP-^ycx3w~4@=PX<9YtkQ
zVC$xBoVDyCrq4K)^X3d@)6Q0E8-~(ztckYnJX0o(p{A;e{X0Kk#O(7pZRQ*<STsBR
zY8DGtR*Y;qe%<IZm{M5Bb8l~9<ukvecJ2~}<WsPW=ru7|ats|*#gQXT)D0WMs38sH
z(}QtzwYD*2@^mJS8%1RfD0K3|OY8Z{pIpOhFRY+o!PE|;;@D1}{^c(@UO$1+b(I<Q
zZrZx5m^nT5S6flTkcw_zdG<-x9_(Z0gppw&catO-HnfiSSN?`)R&Ha)jLB4)HrBke
zl^yF>^Yqit^6aly(0II)jqh#Yj3pOv>h!6cKJPRJWh>_=Iy)IL_7ujB8lIl#1Z?pH
zC+r|*PMw@ij`nS(e#)6Fm_3W7OHO0&_I;^5E-0v+o~z=LFMN&bE}WaLa=W;pRzI==
zPYSwV^)4#_QoB+S)veN`1Ny{hy*^dps8`U#RTr!N_r@nHhQg1LF?B4G9O(G<bY#LT
z<LXJC=Nr$X@Gb>cCbP!`_bW+Wc3BbAS;^;WkCRcD`lJ~YlZHS}SMN9()Aw>V%;jfC
zR_A7k*`hw1>Bzb|MHf_I<)e7;thvv!s4D*I@K8m8iS(tPwso=vV-jKtYPC*ewN6%@
z1Jc?j1H$EO>2I<=PLb#dx2w(^Mp7Gz+#X<`QE(Uu9=d9aR||8)h7bSm@??_)IO!Qa
zZ39yE`vh;Yl`6$@<md!W?<-ut{Oy%&^>|vlq8zvPY!p3|f-QqaPviV^7jxRo@x1@a
zuXug^K8hIHk2W!K_Cgk&K9RkfHnL~$9){04n|V`5@ZRdx9BVzy_5&?63>ik_j!m?j
z=y3*I=<eV|ZYWEaE+RR8h&_kX-@r^~?job1uC9VUGnjd^NAberkFx!6J7B38F_SU9
zAMoUB8`5tC&kSGhmmfKFn2D#K&8*Q?yti&WM~@z4Uuz$AoM7{g160-zrt`=SHg4TP
ze$;HvK7A6~*RA1bTQggCHDU|hG`9A$c<B-b^*6I)UlaZP1;)>sN85%Md1J#tNGh0i
z>H@|MZ(#C_IgB4{*t%^GeMLCY)<or~*_=0bGJAJ!r6;MSuX#Uvj<hp)a2*GBZeiE%
z-7sVd3s0X)U2P5fH>_mi{&o^6HnL^Q{_Y-XYV!Q{_18F&<k+<K9S$@$vbU+1vzMI5
z;GTUv_T+D9>FlSfwt-^D5q9oxf+S6jwe^GOJiLQ7Yd@f7!Ys_u-5fdIOMA;<hE6+^
z1=B~eYs+SK?$|-?gwr{D?j+W|{3}*%+|S|0!;G7~h&khG*zn%_G&LXQz4tfLQ+o>M
z&7aDFZJXJ-V+;G*`k8<B1uR)SkG}oy@zksD(A97KB*XYE6l0*JsfqEYF6N90^=#g@
zlYDJGt$Vj~xU<OM+8o<AZ=t(rlwEuM9zS}Nai=Y2!K^Xt*}R$Vq?W3_7Pjm@OijaJ
z+7Inu%k~`{>8ay_3(jNmm?0eAvz<e2oq?SjAYWZaRrgUg?K_G|5(W=$VCVbq@XqFa
z%sqPv^XE*(b{t{*zN6ID820RG#P+qbzqy;PBfD7p-Ukeuxrno7jpfa!ALgCiEtn+7
z;6XKPTEEeMV?EH*lViy_6Zq+azvOtI^**Tlvrx2DRVKXi<|{Py<Z0cxp3VCXvvp?^
zOFprjaaAom@Utgr?dm6AT~B3C6Px!oxwuf>FofQw9lZV9byS>k8dWX3IC#8=uGT|T
zO*)HnPoK!n_3yHO&kpS9GdO?2EH=IJb6(wWkmdus89jF?^QI1C&C0jvIChx#-~WKN
zstJ7JoZ0Nzw2|FAHnXd#LrsKIrye?KCjC3!;@LOe<>0~H^bVfFiEZ!DUpI_>8{X$w
zG0(^bSifltTlaOablDObx(@K(?!)Zcv4ys-A}3l7QZsP@=bS!?T^rx0qYrxfs<`m1
z!Tj)n=jpKqQ#F`n7hlYx`E%I0@>y1Vu#5hpB}w|(*Vx7R%a${waD>hKk1==g1uQ#v
zA<5AVJn`!{IngKIYi0r)7(4w;E?RyblN!2s{IO?fZ0};*rmakzdp3*b&7szIvVG?s
z4({4YSN$ZGoO33FbDeD2x|^ONbTl_HV&<7FnmdX8n>KN*w}7FCgKdUo%g!f%d@ozJ
zZsSmQJxi7>rf}dL9)EE)U4<M&>T_({yq&&^da8Px*}CU2MRJUvJcH!e9=7do&Q=^q
ze>^-}tqEdY5c|mPiN7g(*F~B0DM|7^wU(Pu;o6W*fPBu<W6Dn*k=-|j=(vpB)=xs}
zdr{8Ha)Q;@T;q(V3>){=Hu!w#zFxu@g5E~wP4#T2Rrh2OBC~O0Lc0;YIDXyXMW0h0
z2qj~pcv2V`^;yejj4!%0?g7>cvq;}g|E}IJ*<{Q=b?fx)SXR1->lb{R^7dj7BQs_C
zdId(G1Npq{ehWFy2gcc6`cj3i`)K_CdfXCuvcFSj&I)#b{1Rd*{R~CnBfRBWgd22G
zZ!LP+`vcuO*-I<l77O%eKYX9d<w%TQ<}VcbDO$+q^CXacHdzsp{w)>?m|QLj4%rV-
z7c3^1BbRf>&HdOm{n2+U6s;wf%VP}8T6`HdUwr{D-1BYT*m@)i20|kpqtM@vH75I7
zjr7ssg0<vwIc%YTH3^ByegNBAk|dp3CrP?;CbF9(N4~0#C6`{q6^ll4&%b<^?JYeR
zQXUJ1G&z_gM?RNBs{TTuK*3t_`8*VhSYyZ;L!nr}Bsr?;MsU$(7csc9lF8%i`T4y+
zW>-shNw7+>E|6q=n<VFc53j#a0F#i<r92ml1*|pcZ*a;=PqBzK2|1JUM3Qq)_!bKN
zSZl~<ds+^QTrR~!+F;z1xw)M2KOmkYm<UH>5^}j*I-3ggl3Jv;AB!QM%VCQtUnb)r
z$>njgI<4buW+r^+^11BCu5p-%Uac)=x-ls~F38K|D7Ei`HRN;Y6NuVg(=i~)@GfRw
zmTI$^qPL&a50YG7;aVV<&%0kQXc#w}n{K?Gsy#3AonO38(Iml)Ur(S4g+gkh#neWH
zW`BPHj3J--g7BWp{LKKxVn5d8ND?UaXS&YfOVRmjMe2ja^asn6TrSfs4p;5NwwUTV
zNeq~@t%+im=Ow|I)b~s-pW16KM}nolVAJ2|wU+)uA+;Tw#(+e8KI1W$%QJlH0&ctM
zTH1g65O+Vn-X+0)X_M(iJAQxWANeHpv-BLK_YYCL#=Pi!IJGCwOJ>hWLM}-kvayBq
zhri{T=r4$cVyCu{@slT)OHzyqsxQ!AC}x-xv)Gy8m-%&)BtZ<1V|r*iWU<G%+!X{x
zSJXw9VZcP;R+6LDDYwOG<lwvYDNYxQYDFg=JYHps`I5p(v49+UkL6D5EbD&S&2N_r
zZc-E}+_J&_*y@mb<ePq#%Q(Jb{7ZOGtH=;st3GFA#^8IJvN9#BO(x_&0?a>8S;~)-
z&-%hJx~Y4V@*IqXlS}hzT|uQ+x0KJ7)H1&Q8R%6iX9SnxrR;qz15vFN@#W5KPn$8z
zhVm<;AAtS<?3Zm_?&|MathvsVQ{+eJTBh4THc=8(J_dd5NW;fXW)LUX+junHj`(*l
zTb9>>UP+$u6DCn?Kg^-#_R`2xmVQ)RhHeN+1*6BE!mzpunhx!!=~!n;QqNirV3%<K
zDH9M*G6~SV_z3Mj>T95<xIQjl1GbBkLH2J+fEX~CeY{un{L;tni-pRD;Y=Alh<$tZ
z(Anp=5JILtgnqe6=7%-nBihLTIy)IhC!0i;wc%J+w-Qh~qP3QqA)}c%yo%ks_tH}^
zCxvOCxQNvWw@kbmzz*ZWx)hran?eu#D$mD<Qz~0949LG8<6~rv;PS`GK_MR>*S}9H
zf4SItvi2Ww%y0;$@9f``wULH?H|rztK41U}r5V`XJ_5c{_njmj28DHahXGO5mGsGq
zs&dyOoBGgbs>b+e_Fe8m8RqnY+DTq!+Kic2SNFPtkAsjd@MIPdREQ32uvx___k&+5
za#g78nj2jJXSbrD<k&E{4;d+oRoj~7Dp%C^-k4B97ByotzDN`r?poGg%{cVz%T<14
zr+x${yo|E#tE+x@*LZ%Ma>}gexTV^RO*b)6E2LO&cJ8M`9G%cG^j35*%d>diqZzHk
zLi*_k8`O+3;8_hD>(&n+8E}k!Fm`s`Xp~96cv;EjaTK|f!`VOsp-cnK&K+HEgkJyd
zluBr?#5g#I@-oZp)n>eyu!zt$Tw)szM>SgT6D|5>6o1kN0;?2T%<v3!TV^Kc{Xigb
zMhSTTli~FWryEKC&ad^yI?5F$GG2&-5Ra0Y@$JLyozO1>8x)*ftP}l-njKMZTS~{`
zmtMb_K0y5f2o3{Z7-(F@RJ~@Lerz;W%v;pP+lE(y_fx#x$}g~}IOgp?<G`(A#Mr5L
z@9dFMd7Mr{-cUMp`qlk0B2>m}r~|yYh5l+OEnKWswr@TAdY8$H?S?Ty$7mm}!dO<;
zXEG-!*=iMAH%2Yc_`Kw8QF5&kG>G?}7w5?_zUsbb+mtN6=~wBlk;<V?Ph~nQU6jUk
z0bJU7tTT>qigxztCz1{uy!<{l>63V=+}7wVZqVX}cIse_@=f}$v`Jx8vCWBFx^ftf
z<?-^OhxmDUObH(vT&sRsFWpX6135pJi&}UW18k5iwQe*SmqgU;)q~)8{b;X`)p!6B
zC_o{CrexhoDd(fp{ax+{<3%&950&^s8lC*Mzk70QJ`UZJ-*L`Bi0?;|6>py9+J00T
zAGf`lYJ4m@C&Sds{ZUsQKc37dlkMZ;bTYnvA9Q{nua9ewCkrQKdkiqZfcXFW!&O=O
z0o@Pb=|4(8|6z1L0%ik_UFOE`(+7SRKl_h*{XWMWD7KWlZlgBk>{913Ae)5vV#UR~
zlHx@bNdpu>YOY*r-|BiSN2xCi5EE3}I8T*(m8GrfrJ$`<Z3nD<R52?BrTe*3{Y-$L
z6k;GT*7|QMmY4C|rQrIfcZ?grI<2~yu*n!P7Wd?b0B&to1!n(SY2#3H*8`cXRjUcs
zT?_u#?PV;&_qL>M_}OCo@1GL#Y&8#>o`Os6*-S>yUWl)?`bi_-&qQPRezEIBFCEZb
ze;m(x^`=~Btry{aK%-Usg#2ne#8hq2KD8fD+u4+~3H0n>aKd@LWUH=DFRE=x`%qW0
zUFg}E*Rk;9L`xk>$?9lXFL5|F9!H^D#wOFg60x+6D?T)R4UZecM)<3-6&9gv<!reu
zkiKu!M)Voj-9Taxwh0>%@S|-C!Ptg<9KVXB_tyv~v2n+f_3_Hdr|4SiX$9kjZKSWU
zj@i&Z0t_{D;*BA$+MWWu%lN0G>%s9h4#6+zgs{is`S;ewowQHJ@gW?qbPP|%+W2}J
z|6;o7N=y0iv@z+iwR|=jL*KNK5@LON6D{8+OJj~sh*c@J<;Do`DH*T4Oat{X!Sz_y
z0en58<ME_?p0T+2B`@Rt*SRV4nJh^8UpBxMYdSyr)o>HYDnIQRj2VsXS31Q{mA;L2
z*8}-nOwjU`sjwUlnT*0$gs+Z6E|+CvxK-(r;D@S@6bBLH(m);vk|NDod_kAB^Gh%8
z%YJ?7Yn`=bj2hzsSGR0d8J4^hw~CC0BSXn}!5vVDwiF;L9a+^^^DQ@{ve+$^+o&*6
zLEN!q!EI3x8oWWCR<Q$?(X!<rTLMOFDQIdaU3rm%y=ld(!bg_JLT4=ZvKb!IUT+<h
z`^mG6s+%d{w%^>K;gD%#PeZS+s`1ibW-_R}UAZT7oN~Mlt@XGCK}mhoPZWt==w)|j
zrsA}USJzYN%qaUSc>+n<kbZzG70oIPWO(Zb!C=%<yW><luxy=(cw$fD3R+eVw+AE0
zDh8zp*H0HN^0ZZ|+kg?LyD;V$U+idW&G6FOur39v;4Q3+%3Y##fiBXe^5@`CcG+ba
z<88^$7kqOQ#xS@2lLh*atp042FX-FZb;qJ{x=N|KprkOC{wkPDk`vwA3|ZsVXpQFA
zkFR5bmsuI9(}%=4KY=r<z3?8omdSMGarZgT(|7Sn7*+NvUyv)sv9sx{r;FvzZv*rd
zk9x~c-iNW?r3He^X<;Gp(~3(O2U(m&!=F*I?!7ZT4SE(=!gLozX_Yt7>Q-Z@u;M~g
z8vBAaqQlsJW4j{6!P$=WyxT06=!FTL1FR|vBxc7iEB%8aM%jgnwQ}8t@Aa0hiq}py
zC4{$=lwKg8&S_9ePi!*SFIC4|f~`dB$14#yx)$l&YHan(XjH@pLYokd0@n)k>Y?+Y
zL-DS$!Z!>bB_f)^N7b_9H}ksk2Ngv_KK$FbnXV{}F76~a8|M@fiF^?8i6Bsd5;RnH
zDc>b!v9we4eQ-!&ml?aYw#?z?w50|%hQtyYkcLXeE@xC}MeZ@uvt~c15A&ioULODh
z-*b|ALpEv(WwT)DR(WI$&ApBEB0<FXGl4Rw<5LQ(jBBs^jJiN)y>+G7I}jZQY0U9F
zxI4nX4lb&ovr6Xh%#crHHfo=7>t~lLDCiRyKG^v)N8+$rwa$ryq?xTy=wzyjpTKUM
zUm4d|u<}_*W1#wrKe>-#^pWwU@N<3<_@I~BNI#`XrD8|do$XSEQm~Sm^>n`N-YcG+
z9VxwJyjyMHf~B@cWUIzF{kX`RF(mi>WX|is#oWw5ZH86A7ux=_b!QdJEZ#S4_O0sz
zm`l`xkCDD%9&!3IqVX_p2!3hdDZU~x50?~lJe6qXV=k`0kM(W<XS;s#qA^o3)`cIV
z3LuYj91>%H_SKyUY$cG@#`!=ZpSKmSHw|Q9o?bMG0rFoE2dvXe;Ny;v$Iq(5-(e;C
zRh^U<DLAZxQN>de70JqvecMpBz&5n7Vm}gNoa!Q5a<;F71P*U~uF(~1<*rFr^(D|o
zm{RfAg)p6?vqIMY@>7PX9xu?9w;7GAw-F!1UC!{X=&@Dza0R!`{7TuR2{CcGOn6sr
zk?!~@TNV&p?cyw6HPH5*w$~L%Vo?Gpu(e_70>K2yS~H@6a?YIw2)CW+f`hcpl&Jzt
ziL*(NGhTx_=;&W;2!Fjn98!Xw*r#fv-s(bE`U$U6Na5ZKE8a+qOH@AO=_Kg)`$JsC
zi41BSH0U9(F1ey`uwgTaGh$n@*uJ3D^o*jY@C<WCVEFnKBn4&osOtgd8FQxOz6r2#
zt4OY|msxb-x=R2+f}IL<%Csd|Xnwuq;WD=4LvTc;G)c6oXRu;Z2Acpgqw)4wy8=Kn
zjd8LlcXaZ2-<8!h^~(B-o;7C97W7aOpoC9XoR-QK7I&$zZVO+<0IS=g5~#zsbmAci
z%v$Lu#R6fn5#O__K!|M4D&T_nW#zv27va+imMQ?c*pL$PUSl<XVppC8iyQ;+n0O6F
zs>-yY3m5;LYYyhDIP%3);45y73?Q^W_{6FU2i?!D(7KeO?MwNNV?0O#j!7Hc7d@hA
z=<ga0@K=R^*Qs%n7agB{0p_pEu(~*tamChT;-p2;aY3*NaaC_rJa&Do_g)<<1HTp9
zRyoASH$QF|Yn30FD6W)=JCSLH7-yf;|8)WpLJD&kPsLH!R^<er|3Y1avBP(l{2~*_
z)aR5RN;@S>U<)FTG;sY?To?YW4Q(sajzd<*NTutjM$4#YeT|xAnb1yxRrZJ$#+1P!
z)QyXO&L#-jh;3M7WII)f6;yjRFj!~$?xKc6zl<727Xz78<qEYd=$FZL4j^e0uk87)
zBNf;^7U38{N9}wiBtrg-N=6aiK~mDRh@DyE>zzXP*RzzsAFZEtxd89*joDQuXoGS<
z@vT1E@RByC&<M3@>$V|%x2>Z2b&XWIPzlK~f|_l{cq55TCuC0}br3x}_QBb$^r*2*
zb)8ipsHE=lo$?QzNEQEb@Koh$^oow|Q-WQ5G(n9k7fQAt6efY0yI`+%R@%=GZo`C|
z-Ox!$zH~FK=aetXY&R~d#ENXlfkXz^AC)v+2}u}>jIuAA^>;e)j0Z_KlpyeLU2wWT
zSyI*h;xN^|79}#h3MYk6a93>Fa}x9oi$t}$-~jV|1A7&obixT%cdX-El5{j>P?M8b
z`Ld;FBixnEm&9=IVcu3P8fr;yvi0YA30FT&rU&s+R}d&X9G+JDg)Q}&OxR!oE%qyz
zSg$92FKv(}9Bro_XQelVw{>wYlTBm@Y=z9n{iMsd_#-x3e#b`#Y{6Umm7DwuQjxue
zTxT>x#FQ^;etaBH^<h;HPa%wNes$Wp^8iI{$cg{piYPRW%6_sl_y*ZdRyjxT5n5>b
z@$mxJR<{dxCLJ^WPb7y7;&l?wZCn;oal|N2rMtTl_`D)yjLR8j+RRz9c##gMgNT)Y
ztmvt15e7!9*ol)m9$BN7XKh3XF)|ap1FP6F!JU)7H2*Su^gfEoC+}}VgHp{(WcFjI
z)^UKAdl@b<ccREL=3T<uNkka<5%O}^tu5Iu)`bk^c!`LGWPLCz?GfrRJfBvEUi7A%
z9>R7m7q}!bAQZvaWrdH~qMQ=S@euk^WbZCHg@~L~Zv(vs1f<-}GNMUf4w7cnu8cYB
zm96sZX8J0jqwW7A_M=Q6%~KhXov%l6>SVkI_$V;S>sR@Vc9AHSmIfbxKUfKjc?-<!
zBtb30O!9Y$pULoAq7n@KII%63jlTow;3K{Uis`XmhBg#9Psn=|J3<-C#i}w~2exiq
z{wdeo+e&u9h)nK7*{(sB6Z^D8c_QQ|UDb_)f26N6o&DKkb&WJO513QS68jj+{#}eg
zai^l;hp8yIST;9)2ruPwz{x_>K>0VSTtfXqi4A^Kj68o~heWJSW@}bP%U-S$H>hei
zt$h8ATH2?dje2hZSDD7E4Z+FVY+Uu$Klx<>2a1oz#_#oNi!55Fs5>+qg&4bFWkJVE
zwF+WXJ2!K;>RLcsjo@Y^QK&Y8Rw#$)T9ehQAkoy^$sVB`1{?K8&Su1`dJ^~*+Yuo>
zcagpZz3PCHF-#yKcX~@faEO_9csoFhf}l(fooo!5?TR*(eic7K;@7XZ=vKJfkj=6@
zpzJ7Kl@juPW~sMkGdv<4+t3&yYO#MsyjmM*&P4QOsXrCdIvL{GWEeBl_8|kkOer1R
zBS?nc6T}k3en#0sNJek_%4(fnrPQm#kyd>lZFj0;yw(|fH0sQkQTKGks%<-<8#dM5
z9e+lJSRIt0%)@0K>vD-a7+!zUVM<y`!BBGFCn()1TnWrZv>x4%lQ>rDl`7vRPO3T{
zJLfLBU%Hr($`D0ZiyBoDt>g)9WBR;bL|8uRfgGg%gG*&m*I;#2fG$(Og{#jdij^XB
zDgMDoK_1ZY#s+5=gTF>4pg3>IeU-gj9<r*xBOTJw=evW%kFq;Ox%3IWcx)AugRwO5
zsvdBCB%YyOaV{&3wP6fVZLGf*g{~4C_k1Z@1H|<h522M1zl_U~rNr^qg{(>mK?jTA
zDAfYKFteV8Fc4%k#6dHomcxR2Il4r8s~tCXY=6#5Xa~7af#MG~D==lHAxL-`KRWsn
z8>UAsSf(zcG&wt{d6B{~Sh_XonH<l9ms045LBbnNko!!As7dLRKEZ1A2PKV>Fyh+#
zWn=|}EY7l%001BWNkl<Z+u91L7>vV6Z=cDGWuQX9FQXf0qbGe?ofy2`1m!`-gSFPv
z8;Gv;E@@rglMQcOqH*n2X7%C620AS#ci<()pZ(H_!FmIaVeoI<7K_dFNSGvb@^XAC
z`-y2vLn#hzsZW<2E|%*Ghk&99F)pW&aEn@vY4rIdquToB;aSBnis>0M*dRuDs(x&r
zSj)1R(b6(R39jUh>Zj$+{wcS0?T>g!+$r_h(qvP9pfGNoe8<CTzt1#Kx{~eVW}KG-
z9Bqg{?sLQ--fna}E=_tqdO0gm2`$|Aiq1GY+Yh5?D3vY%^k?gMw}j)TyRxNxhICJM
z-2}E27YMrG^|`kktvGjP0-JJZ>$p<Nk##Xq=YFI9g|HOMwBoiTO~5nY&!H-s<wm!`
z9gm57sk|I+o0{yaIy_Z<_S27{a%)zX_)KmCOoU&z?wZ9XLp+XtmEl>Wa*zet%Ht-n
zJG-0s#P0XzT{lS(lqry`KAIaIOSaZhQCY*_!Go!<tsybEgXn1jHZI8qf=XgEhRTWx
zmu@mmP*GJ&RX)L;t#g4Z10b_rUwfr})7e!7q4Dw=!O!HVt*Is<O{$XMgMAJSd>ojJ
z$%-yX23fMSDPHM9fu*9dn#z1G@HFQv5^S>1`N|p^1`VdJwmSQL&6IvcMLr$VDm_^p
z=+kTYib}83tbe|;nyQMNnqUZ@9&4;6$yJcgevee-F-e}<>MCCZ8GTO57ig|Mp)D!I
zGo~|xwBfl48_AXS@++A#KEe22DY^*ox0D`@KjY<TmkEAa)zMAOqwe5RgV>m!I9T5f
zu?zg9p?vT)IzQ;EVUnN>W_t2rb>NkD3Ud`2Y*=N9ATD`(J!kafM6v`lEBf~V$Ckm;
zs4?8(X33>e01(O^*cuW`eC#25@61~J2Uhu{#=<&3(GxjqDbI^eXx6!tvOfacv`zw(
zjeOWxPgm>HW1z;jVG7dzC-S)>CW;ZdNDpmA+X4~2*j}?FXFdO3799^|q)-NbAl|w-
z=p~B$+hObJdPfWC260R2(qk0bm(MF1vH`#4?J%@od<M3-1q)f-bAI8dMf#0&?I*kG
z#c(0Pl*Q9eF2gwNZPVvk50h_OvLfol(ekSap!R9q&gESIi9mM0C71Z1hcJ*)=T+(E
z7jTrlsJ;>fy&YN~U!3EswpLBpv#|r8RnnNqu2Pzwp4UWJR5^OqQ;U7jMc`oEC<hsz
z6K!CtO6NeLD@rjk;m+56xSwJXSxUFq{JpB-Gx*#8{ttZpFaDhW_V<6nltI;Q@M6&h
z09b3W#UjOG(HnO~J$L-kml>k!9E)Yqm0#i03&*CPRf3eh9{%*2@FSj9EQWP&yiH_>
z%;X>c+g~!aK5=6Tk40N_GU%;SF+%@8DL<LhdB-fK+!YI)ea%<-!ufO3A3x6+43b>-
zrgp@1{`_zMD}Vie`~`pi*MG$9kqv;QX6$L)as6dft1azx?4nY##Z(R9`de@2l!gk0
zxncfA*Yo)cre_#PFetiy7>g|yIP;U&aPgT_oX)^fKjsX+ddKHzsMJoJjbF5irVaER
ztR~|78iPCEVem`-dId7xPSa19%8tCBTxk6oKY4I816K>K8N%Wo@!8DagM;^3)!>RJ
z*Cq5PgY_$&0iVV#Hv5$U<7#Lvi+4TcYbF~@kDnzG@lSVM3RcIsr!5l?!Bd3y*ek39
z{m3jjix2wnyyzrad31f1PV`_VbAsS$zW@-`-ti>eeSsA8MM8Tt77lzhzF!&ea`;tz
zZEup4bz!E&bvEuW(o87crEzHYCg3T!PT1EdJCc6_of-ev_tkbBCKas%{l%?;jAi)n
zI*)B8i=&E3pQC)vNZ({_RPqh@bo}|-q8qE)qxe+~f_SCvMZ1MQ(4g(g+d2M=X=T!4
zQko@OqZ$uKKl|T(xos3vV!R`}j`ii_aP9nMy|kij#45Q1J-C02Dul8&(bLf-KIwF&
zaB}fX)fKAn^J8cK$GZ_46dA_Kgh;RmC#8P;6nwv%W!$eu_;1U|eAC5f8=)DF@56=m
ze1gK+p{IQq|1mHrp{22O1uecdFnZj`Z!F~HgSYpr68Cnl3KBvt#F<o4XE4lOawW;;
zU-1t=d6T5Fl44IU0OMvZWck^1=s2*2mtJ4PiM}GW!zS>F3zsmgvYWSFeTiMiYWRaM
zT+PK7ETf|R1RK}A!OQP#XY|a)oI0Y14exiQ<5)|sb~qPabRlEwi>!L}CAJ-GW#Sp<
zF>Oc{BTpGm<AyhRWBqQ57OI9$;))Aq^73;pbF8<B#BhZjn1qUo3g?tyut^1T7oNu%
zGsbh`$ZlR+xti9#3eH=)n5`>cr@d|>OXmz?#akaR_q=7)kudR;p}hOri)=dBhRIj6
z;M`@LK4l~aHowE`t3SZxa*UjGD%ajHm!_?&dF|aD6dbCVVOqn?h0CZp_!|G$Jug5-
zCAP1Z;S)~hv)5h2C*}@={9xYv&GWps?>MKObuMSjIEBumyLn^fYC6ntF1z|FF1=_e
zV=Cd@HLG~#?Tw6>KA+RaSFm%{aVL94?J&+?zMP4}D%tk#O5WPg$b#jUarI{|qrQ0(
z!zQlf*U!F0n;F8=C39)oyNkYJijg%5qb8op1?SEu*WJX5mtUi?y_fMb&t&PMITVi_
z<mFf2;Alr5CbAWs@szm*FRPNewPpI~zfr-KafqxO>BINYky%GulC&PnuM`jz8O}^J
zAK5i`=+($dn<;T%<w7O21AOGV#J@NotM*p&AJGvxUEIV6ida^ovC|l3T<drsPot)!
zJkIiuTdmE8(O<^v#I<B!^<W(?a$k5!!=pYlEPPp)v_z($fHWRs4gX2)EmA{>n<?K}
zXvDXra~ZqW1&PFd*G&moGF(8*1b(9L$1>|;x`ZG3Q097xf69eG7B7J4*eJcktP9UE
zKQ^Kj@DX9;XexOFW;34-ZNZNj<7T7wlzAUwq5Gyr?rDd7#Wtv2&$@pi`NTH#NIC{O
zfA{D1tg<&9bHq=?*MecR-oQ<)^~$v_T!NSr@vIieg1RD>$<wlavKa#q02f<QXj^ez
z>Ee_ZGRP5raW2!D3FAvJe#BRJSz=@uaP)Iqpmbra!#~Iku8jd3VOA=dyPxQv+;F~w
z3HI;xj6)0|on2-9^w-ZDIJ9#UQ!coU+rDrWb50pb(H5y5GoQcx<ImH$`8^s=zntrq
z&!Lzf%$NW2FB#RlpZB*PqQ1U?LQflSz4kf>k2Lby%P;fZ_Jdd~tp~Q!luTgWl%eTR
zMa$)1{B!0H?PhCpHGlHQw==Q|CN8{!YnDvnopn37^5$EaP@4lR)k7z+eCfFitx8f^
z@CNG9)35A8QbBdn&(5uzshP2yn=V^OE?>h1mt4%KsvK3rr*i4CGpWeuSbW8eEFRs%
zx&wyWZoHDZ1m<0KE7zQR3LD;APj6)%m5HHfYnVQDB>N5><I^|Z#MHr+p#`SXJr3{N
z#n>~i;Eo$U$J|LHvDVUlbT6w`uj9bpZM^c*3U(bij!i14O8VKgeG4_yF5rgC7h}6x
zShMOa_Uzx!sue3(w{;%{p!MJ`j`R&<{_L??wGXFXcr6!AuVmf&t@M+_m;yW3uVMYR
zJ#2b!H7i!G<ydclLeDWaZ92s1i{?|CPy5!6n#CV|?PmH8ZDr??6ExIRk<^ak_B*bn
zWA_K_InqHspTi571%+ryI7l>+H_B>b8hw;qW_LxGzW7$#11=DiIz$xfRj~l>DQZ~<
zcQZOahVbTcjTOJZ@g!S{p0ejl^QRM=lR8=z3;4ADv&PJ*!;MCG3+y}uHw|7I+w`y7
z3bmokVS<TtVyr^hx3xBE7cuXWi|d*GqkYurSX$^WT_cNGHeB^B-&c(j7+P#EHog_G
zUfMq(?CmJaWY(4v(Q-Sv<htTFR$(dM(N<f@Se~w1g|Nya19GMQ>G@OFb%J+^UeQto
z3?BtEbuY`Oq&^e#<3E?fRQd)crgSPehg;_gA9TF&@3lq5lO(;>EA$*~ks#(j^AG(~
zPeNNa7ZWLs54f1*Ca6I!5Am_!CSGaLpzg;p)%x77?Ni4uzx64Ht-^Q}7KjmE5(})S
zWbi9w!G2k02mZuaoCiFgZjDz0h{OclPR9&m75||f=m~4kTg5v43L?A-<p@sG`KsGf
zRdQQRF9a<b&*PBD_di9L1z4p)MV~FQ>UX@rouA1xedL2*^H=}yZ#4GQaL0f7AAIuE
z5loqTE<<zOjF>osDw8mG-fZeeO=4Q@Q67JK1@EqYi?!PhP%QRv=-@#*Iy*VAe;-Y4
zC$K<I$1$2(j+5=!QM7d|p4GrJFRtMA=boXpc06N;*O1V~y0_P|W$PB2dTJQfkOK_I
zcdg=Y{?Bi)r@c=lP;VCHYF=Y3`HE_0&Oe(^T)3P$QztQL!l>+Qn%|!mu#0`otX}m#
zn>K7B88nire8Smh&*7D)p5>i&>v-+;-_p@<$tAtKyK*IOys?sf$8!vA$SWOXz#GG%
z4X^TF|L1qu+gZ)^fBZLGdDaB_x{h<Gv5Ai3ZS32(pW|J<<SVL~amHdUTE2`qQztWV
z{Ad!}!{I}Xw0CrH=)iuOTHC=GdODBO)Z9ivIb8F>J><vEVd=s-^!IcFmSfFFINH`u
z>(NH`A38#B(NZk*a`bQ$-F*dT2V<wtA$R0`o_pnOR<C@6ZHJFx`%bXuPzPr%S;oZS
zRdo0CmEc@rb7H41_$%jB_NC9U>E3SjhwMC~Th$38N?5doo*}a$JsXo{d42xIg<W6a
zD2MgD8$-~~1pR$4nXP*H<a#haLwj<t@*wOu`XaZIA>u+hO8~_k_BFbq^}Jy`#>P`i
zuS)_nAw#HDOPP6E!Nn(xKqp4FJwnNaM(T5(U%|<*>I~r1CY+ecjNGc$F5t&AU@3HM
zSj6fAK&S{^w=x;nXG%kVxvWm`PET&O;FGBo1R*H?{87N0!ay_ZR~rc3BA^}WP0NP$
za(LVcbc~5?I^fsmFFpBj0~n8?!%h34#!>OA-v>!hCGK=w!Nn-|2)Ca#W`6Fcwp^w5
z2xL;ciT+hQ61#Rj?dbaWk@9F2&VplDV0ins(YUsB42O>LF%>uhd(&z##$gp4^(e7t
zp&@a@D8#)FdmMedaovKr5{>B*)|&L=f!cvp#a5&nq3Vb5R-%tC2ve#4yj=<V8ru>c
zWzrjCre($iIukkk4us}K>pGN4^NKMfx+1q&7t8%%sWNQ#g?b8CUra~WD=RB$-M^J5
zA9;YEzjlz*W{#%2>jce*_p^EPCRV@n2><%fEA;jDP|VlSP*XvYB;<05!q<?~QN~yb
z#UlB9J`<~-&i)*Ot1HP>*HWDn>FX<G?J}MnoR+#%=J8j5`6b5H=Fv|AD%F=_1Pq&W
z7W0R<^4)L$Gmov<fH4Wy7AcaWsv<}2ph48+<RRZ8g&=!aj(7GmbZ{+6l912mGv6)J
zS17tb@BK<0(6mrdnWyd04xV}RCp`Mf4rb06?~E!*{4b^rn>e2ZquTl2zkZv?R;&k;
zxFBH@Ln6sfxGY+7`Ru_FYuW$)i~O%|e1|vp6uIs8>lj{<oe8v%Gl>VvfVBlkawHkk
z-CbQ&)HG0CnUKrn$t4Ec*T%y?_!d8W=o!YIe-)Q6nBo#PItU{{TNOYK3*`_lkcxqa
zvn!jee26?@osv;eLB#jb2_gN*ubk=rQd&#ORK<+r-6$H;+c@6*m@fPT!9i$vWYnQn
z>yw+%bL|3}zTonf9bv6M8v;yy{LqPp01f?F7ceflyFjdywQ;(a#OWb2**CU3Qi0#9
z$|RWSD<=Fk$5ZLHjC2WR`PeYSNTSNF`Bj*VBbVHxB%sGI;>R|4?{GC*Hzv@(Hbu2Q
z;a7G9633fAn-e6@z}q!;GC}ce<f_oLLHqFK@9IKAH}Kfo>~ny&+;?`T#xXJp&;_JX
z^29X4Ns)Laik|E5k{S+28Q;_N*c0&?AmLVTN^wb(y!Ue{IkXM9L2MS|td=mtPTU0_
z2g8^q{S`Du#Y;qF(W;41%-UPex0()Qx2=*xbREW$a60NzPRUCqY(Z*_u=f@fM0Vxh
zfv2d!jJhCgB#+8_Ela7+rA_KvT65t$*VaUKoy9$8acX5&Vu!O)tL0OY+ISTIzmKKr
zzRLxWPvfMxgDJ(uS)B2cb#Z=EPG^=;r(ejOmz_atM<3Is*YLnM_OYv}iLFb%!o?R}
zKwqwooo_zMj@3JPc1;ss`}$w9zqx~5Z~dC*-`PQ-w~MWZi`;tqE&TTF*Ln5b9as!I
z*013Uw_nd4b#L<UuU_P-XV-A+?O$O@cMa_u-(XL3HzSHgiZ*@tz(4R{DE9Tx)^aR6
z%Ox;L<o6C<GioZgfAvpj?}g^w@9^ZxogA;bh&#XhRcc2~AlKDGuI~h!cC>TTpZ*C4
zItOFTcCb*8@vNoTU&I)A`Ik@er?=e3=miJpYTwTzKYJ>DNFrNKO&@~5@u7p_m^n+h
z<x^*K;zSP<CXC>TA8ZGpqv-%$gRbDVFJ8}z7hYgn(=Luxf0En3^c8AGOd{8PoP3hd
z-QB_7X3I4<-^8jnU*omkZlNE`-W?mb=8K=<j^Z$$c;-b$&cA|Boi(0f%?J+eSj&k*
z5n~LycI@U87hTB>gGcl0=U?S$S3h0t`#I1xf;;ZKonJruE8gAsTQ)9V#Gn7gpV8de
z$y-l7&bzHuTz~yl)Fnkmn`8X;NLyx45|-4#*<cpv!;mk?)OjC^3-;F9;OvnvtYoXe
zJAU9EYRq&De`ZY?y>q_Q?)}zcI*S(>bh;IO31l+H(?kU`@jETEU=plwsNyq)zy#PR
z>KRQUc?j^+XI}M6l%7xot4nT%pGlc963qMw|2q7P(vzOihH7^>=@n2Xv-<1wY{SHW
z`&+t#6u*3ND4$eN6--<ZHj3d=zHLCW9L^z4xALPBL<*--ctvAa8}j7%P=0}q;pOC2
zW0dGQ^05F-++lE(9h9Do@^Xl6rfml}cco<VG!>uGeb19|F+_U!l$6O)&ZmqTGq6KB
zkEH#~Tbm!>rRU6SLcj6$kJGcFf>C{gRepty%q2==K*W;=)0uGD`^sR$alA~y<VwXo
zXQRRKh*G~S#R(;QywBnB)f@#Yi%MsXzR0JZe%A<|R5pc(^Py}8nBEq>`rQPOiM+=-
zTo{NZULtFjo5T-<2R}y)CxXtL#de&WIGqJC-l)$i_5#^2Hblni{#b{b4*>3O=)(G%
z$yYLb<VYH-^R%}#(RAztV98ZAFmB99l0r94O-*$6^g~j?n6YE2$rU(q_%J7W`T;{#
z?I6aE8A0#yCJr?pPm?Kz5o5<QsG^^J2M*I$G>jNCmWB#T<Do-z^%bcdJdBFI4vuvd
z7&?3yU2R9{?guDRC|Gj2BusEA_UZdvCF918p{6qZ`i{P?HVzzYrh3Q-Mh>Z^wYi0g
z$~-MCZ6uZTj2$za&SS?QDbm{7PQ$Pf^d4)W+vXWDw3gPEHu{TDKXfD`ht$&1dX(n2
z4yp$YrJ}!+V_p3W9X6D%<1KXe6@4%f1?MUlHev(~HI;O<9pz|iM`l#eFl;2FhSt-1
z_#iDEJ=6^z&d4FPw6(O7tH{yTdW?d#RMie*%;*vHb+pjf)RqOYgkht`Fr><I;9w)R
zqK;7`hf_fxjfWc3?NS3Im5drQhPr%#g9i@L-CqO@)%8OdJ8}rej~=4Ay_<YhEn`NH
zBI)nq@R25Zi-sY?Ml!Uah7-q{X=*)AQKv39%~UI7I<M+iXlO1dyMQS+DM7Cs4Moa3
zkM56mYX3oXQbkmdSp0pL1j^ixc@By%pXp2GRd|&qz^IuSsBcQt7Zq`Wz#rre`bR+^
z(t~GS7ugaN6@-cml!_U$EWtYDU&YTduh>9LDF!87!=yKWxd3`H@KIk8MfI_)?U=Bu
zKA94>DI<k5o0CX!G67?KTZQ95{#4cuqR%I36WdBz-afQHxZkf0d@{^mA1<@TFyt?W
zy~IvWMnt!>kw3(@DUWgROG!FjmErB=@(#pXX)Muv2Dx7z*id|yC#(EQ7aY>5obOvz
zlnoI3gE$|=d$pEb#;+u{xu{f50Kqqk<K?B&JY~(xg>4%tq?}y(dbv2jz_G%re~dAh
zsWWF;R1v0wykk;>S!Sk`;$e}eJi?We(td#jlqBi&!pVY1Ih#FsF#Z@XmB-_A2q`=%
z?<VR)nf7rI)SpWNq*k0WLgfH)(2Ma<hEY0h`IQ;xOg3~1zkT%K@nHa#oS|BLn_wkC
z^J+A%QW(Sm#TN`BsHPF*120%ffY5ebE)ZF)|4AEd3)x*+1q`>8Ohh9M8a60WGrQJ0
z@xb_3xrCwvs>W06b%0`_G*FkZQB!6tC5xM_JDryI=W>0WZNyjy$tlt`G4A*$y#m>?
zxRZ?%kXLxCeBy1!wU5$s?3X9uKh{;K-vM)HX)#wyPC0ed?n8WA6Du2LgTzRQ-hARK
zAH%h5CA`SL(NmVOZTVI1jNh2P<27qpCI^+3SLHGP`;%pK@8X~G<CEa3x5H&se!!QN
z9fBy{Dqk1d4rIm0Ccz~3jdJl!W8i27@zjqJ(<;$@8GDJ+GsL<_>Bu$mG6aO3o+v4r
z2C5lK7*oTz7#IZq(&k~FgFu6tCrv-%N7HfdqnHZ>HWTv|@F&Q{wN@-7A0As1w<u>V
z2J4+H`-v?eI}j{0AnP0^Ch3!b46d0I&Mg6gn(<6#V^B%hhGR<bvF@Lgw}2lHs=U|x
z)I@Dv8wYIZJ>8{bER`6;q#f?@JGr9wDGpXSU&O!Q6rDXpun|m+?xDvuXnhHf;)GhS
z@)Z%G=L$X{rqaw~>-Axc&1j{%3APh5UQ^x_b_&xVm?*h~xA14*B4083ufoFFo_LKq
zFCk87qyA#Ek{~qJ#s{?E6(n@)0$-Ta@L#o|=s7;|0l^)3R@)oi&$L{)ex8QLGs8+5
zcxV}_U8GY3(Ro=lW@u1~c1BCn)i*Vd?v{19y1s-yCVZJJW0|Op+a@z12F}m0j<;BM
z6925SW$_WA9rN#eLH`7iPGE8^qvv6={(cl=LR(OMV~ib6*0&9Gp7EgWi4VF6p`P?j
zA+@S-isPuF>+*o_>%K=Xvc~{R$Dq=OvH@o|j*fMFXuEKz=x;}GGHCfrc}kmjt8p<Y
z@*Ba*#T}1^@|;lb?kRO;v-$vjIFZvhsWzTTHKOn-xh3PnDxYz9l`A#X*)SJd6ffE*
zQ8`TE>gxl~wAfM{8#BWGLm;gGEIA5LE72-?=weQFFSj@m<THh(Rq-NpG-;Fih}_XO
z9@0_c2Re->a3-)P&7<ha@h-XxVos)E(UZC!#*pk68`4moWfl2WdmU5UH=B#4r+wTO
zO;#lMIw_^+`8UWDWmevou%aQwS*#1u&X2W0c%*G4rhcUW3$MCg&?qxfQ?_r=<->mS
zN#PUBTFNnp=q@{5p$jgn6{zv_VN<t|8%9Me_mEA%v(X@XClXt5di2h!X=<m-ZU)@g
zOcn>Y_4fh{v_7-X0Yuj4O0iGt#__s$F<j%LbRaV6^Fub{$>{2eGBzoZQ|ZDLbEH0@
z3pu^&kY31~S<y}Jm!7SV{`!X1K~QNs(6RB;3LWNbjBg~Ho~?&=Z``k{AiiV8QhXa7
zXoXA9M-~s<H=HYsvVU};jvG$<v2g^weMW&@bL1GwXjpx|O|Gj!#V*4d!25j6TnQh>
z%i+EW-^WkiCgRnowi!?H5s$24V88CK*qe*prERSyW=tTvLCyh%@m$C2@Js!a>_S%Z
zbr2UaFswsa8${?A&SJ9h@G(}-2Bk7ue_yv%v7zLIl!^ysi?_^QvXqam0pmDm+>_AZ
zOONYjH|hZjqw!R{YNOYaEs-}(PbTaaMG5PokjNOx4SprkO5P6bx@6)R<Z_Qw2CSni
zm;`pH<Vk5uZ1nVWF{5*#=TXTowDB=TY+A=Tr$?tUUkCV@=wrXl8V292A#F%~#Q#!?
zim57&#`UBgLmQemT@$p`I@c-?-w~TE*H_m~4igKB*tqN&yZYYvd5Upgi*J#Y8j}TS
zgLU6@Y>Z!>NqJH$9Imebp;pwCf}Lx}xyRVdmqO81xDWpZ0V6Y_xYD%(pNkk_d;Jn?
z%Vt$>9Vl7_u?{ob%r&4ZW23L@;AmCgmG-G7bOI64T^@*DZZ;x0#VGoaJz=A_P(!B;
zS06>!>3g57eYA9|g^<~Xbpah`xFNGT_DFx(&b3&#0_<DFGglc<W|tY}D7o<~%vsYI
za+i$U?AiJiU~f+@w!5*zB9~xhC^J@}=`mB73I^6m?80-&%9Ha;MIo?JZ7FIsT<J*g
zMbUOVTAa)xxBJpnCy#M$grqC(;u(hX7rft#odpuf7=pWWReDr@8?RnCp2as67S3lN
zJJ;h-6{Tk@#>iFg(;0*E_`hD|b2`oPwyv{n7At)UA^d6+@uD;vhB_?CZZaN9pCXs|
zu-7`yWo>rW$huz~2{9AQ6-8^ac&+4+{;IDtN3jj-(RLBSD7^4|r2SPa5H^HHfV(d`
z{7SiANmBfX+~WU6$)mW7R}B$67ZhcD2bM+ekOWw3>|{k>1sC7e^CiJYXz4aS@A{P^
zq3iW3!YV&CF+CLn0+}UWiRpV?BR-tbj?;->sMV`TqA!J~8duA0;)HHJUoNL)_U%l=
z`j{zg0-h8xW2FYM-X`U<>npr#ix7@I=BT*t@x%Lz*kV>C>--@0DUni2CT9jwj5I3N
zN?guZ%*I#wMaz-Vl3lJOLMmV=CGF1`xX)I>;3ACjZks7tf@OByub5=_tTMe&M~ZhV
zZ2$lu07*naR9;pwDzTSnHt0-L7lJzRC}TH41j>loOe!}k)j>5hADelB2R#ck3KRVV
zN-&cN{a?pGyh&RZxLk%*rs&`m=aHlw*oF^=uAop7dorF!s~pCbX=~MCaT_t?wytz?
z$}rFct_$7@OdWKLlG9k>FXY@r`jGM5>`Tk0@Y8zqTQAC993L6q#@mDsfdgY`)fLT`
zqhUk-g7X%RwlA+rix-@Y5z}xNO1Do;HZx6KSKCRLK>gVbWfKub(ji<e4g<9G(RQ5r
zpOJx7uuS`TIh3Buyvsx+p4pdKi}qK=X7POcnYP!F2P3jL6I)g~ex%J{b;H@QvpE?<
z>|bN5Fw!zBzmYLK9@^fGOC(ZYJN~u4RdE||9!yeXbq?jHYHd9zrW98y1{q28k|$8H
zLECa<s1Bpxy4SPbNs(h0L7^QbVXs4ZXPvHf2$DXMIB|BZeJ_klzNhBY1b-z4`c)h8
zcN5rckl=%A6xY7o@d3Fnl1eNIR@s8dvd?Y)vK%+?z8L4%K<4b>1_9lQeF$bEhgwx|
z|HcX4M6t`LJR(I_z=tdnsfa7SBp8}BPnm3DmcnTqE$evDG1J?s560Gic6rFU$y~5{
z?cvD&>e%4@*n6LjuPIH*^?_%mhUq^Goos9`Q;K!;bgJ~RVIUU)T%b^GLs(qjBuU&g
z`KpgxcFknhit1Y0w@A^3-+|_fF!eL9HvKJAOya(;46@%#Eq9HCB=KCP*G!pf1bo{8
zc(fMGzlX<wpOf)O36B4M9>c)!{}qpxn4gTt2(yB<BqcniHW>1lTHf!X*Rnj8#jHG!
zMO)0SDSiq?NMaoS6L~Ba({?B25sJkEzi<C2>%&Ex@$>uSu@o~MXFfKM<$U-f@%Zul
z<3BGJ>WPz%<^Mm4g&AhQdo27YaVEskmF-yp^!xPTj}{A0=C9#3iNob$^M7nC{OCNM
zEPwpx#lqh`5C6x+!m$2Ize^)5zQZawD@xt=QB=qo)J%i4U2fzY{_0CtQ<XO)rh}jU
z%lFw*7{MQX=_ZEPSCAyVJpJRlS=CtNmM`AHv{5yfq`;fMdWh%WJH!<?e3?a4hX9sM
zZ#>B_US7|6pZOve&l?9A_OE}52cCGHnai)`^GjxeF|_V^hkG7)lA-f2<Mt~TV~nAz
zbvyTb{~`J(F6NJKxP*KH#hxSF_uYGFteeiC-Tpc1Dsvdx`T6(mW>e1)zH-M6j2=`)
zlJxNG1NZUffj+))%WcdYTaQWlS@qN}cz)dhF1z-NoIP^{P-N4}XL<Cc_gM6)o4M@F
ziCAF&h86teu@y{PdNrS4HV0!2Eqm8;--Ayvc<#mAcIDaRl7!Bd?L2VT!}N?_$XBkr
zgqn(kzOF_d`r-W?te(u*Zv7lXYVz1(JCEOeFCUy};Lh7_VBFAZjOpQphaO<%-X5;G
z=@#aUAB0H?y!Ff@Jhyfqmt1`d=gk>|rNHJ@FYw5(*K*cnH}I*&Q?M2eY+A{KkG{mD
z^ZtM@Tr?LjwC-ES{SQ1z{b`@zwoji+MJ}PUWd{%5^Dtdw7x1NPFQu+BM_*?nKfmWe
z_Ent1mv8<&!|U=C`;YVF{SWZ|@oH|n^(H0^tH#j7OTYLDukY&Q>Kkt1^a+D8roigw
zALH3Id${PTn^=0<I4lLWzWpLUe|`<~KY1OWId=xuS`Kb~iw7Tmk@06=$xWBc#{$O=
zZQ#fEK2FV?W!&-ErBoz_uC`q~aMwee7&VVCUw=9El?i=aM|kAk2icR1<<6V0Wn_Jx
zVt+f&{Nw@NZK>eqTW(>>h-$Dsta$t(UfFh>t8TcBGbRoOQ(*0jPw>=RJGk((x3FyP
zSS*(9YktGS&%Dj)pSX_CEuBfRXldN>TkikKbBtMdIk#Q0kfOD;AKA>kKl%mvnM?WF
zHOt8v=x*P~13!3>V<S%I4{yAZy2^w?Pcy%``^W5L3}3qS3yiAIQ!I4w+)p3iou-5v
zZ@G<WBdf8tmzBSIh~I2#;WIbf$%09PfCBHm{7Zhhax2TO`XU$39fJjSuKf)UJ-Lcg
zFT9q|UoeYe(Q;(>JKTHU(~MkjDPQ``Vv0peN6R+u`nQKL)6V73Zny}5{;osZ`>*%X
zGVE0T{I<_hQ(>@$HXi=o-E6ZX`Pv;fGO{iYwv*=`ypOjITW-AVPNt2l0^7%`ryk<@
z4~}r<4PWJ~$qj&I!-^+(WW`37eD+Q*nmZaW?0xSy-2cSu%vpXt*Djk$u?Wq3-{G!%
ze#P(wm-CgY7E>r#dXDYn``><m!qjv4t6MHjlUV;ze)x^=b7bfo{^rY9Q<*a)Ne4gq
z_V?J_KY~C0>a7f`&5`7KdHS9o@n&O@Tkre|Q%6)`Op#T;dXQ(|JH)4M`Xd%jZUA7@
z>re3Wm)CRtXYb_VGe%>xICKAFE10$Xdak)(Ce}jhzO~$S?=KlT?{dEU2aB;5x{vMP
zNB{aD{gW5-7dKy$#>BoR?)m4tXl$6l*YCWV>WYNKbnwu3e#qwDVSMdNH!`xJk|gQn
znfvcy)q#Gl|Kjb;7+r@k1y()vGoD*_kjt;Xlf~19rLl13Q#|t0`<!#-ZG3XUcmVc)
z@G1{JzJlq?uHo}bXJL$?b?;j4ec(xk%=;u?yz*?4EEfLwhYxe4r=Su=x(ZM+Y8HQb
z`_<G}rLpiAKe&gDU4!`2?Kd)Za1}|?%X2@uk2m-AaNW(fF>72y8fTvV1<$SB&!yMi
z#<{abVzF#~^I0DG%{mrdeiN53oRspo@pXRk*egs~auwGsKaIo~TK2umeGfcI!|4}s
z+o#W=B1z-S{dfP2?r~@FmFq60w!+ZYb%dYYbwB$nC-T)>u4Y(m8VjGe_s6VnujBSx
zZ{(C=X)OHp!w>St?k+xm<1L(e%3w@V_<y;3?*KW9Gw=UXGqYJaXQ7BB1PG%52!s$I
zqC~PyurcQx&N;pI`OfF_IsESKbvoZUoX_WgBM#V{6PRdnkO2uvpn#-J+Lg4s(uUcc
zotdupkM62^s=8MazW4p-*EZUj?y9Gr_<f$LuI}N<-~F85ZE5C`>u=@!Wz*4AdFhFJ
z_}SfCIBVn0Tz$@h7-wF3jN9+Hm)YlD!%df*fdY=UZR302{S}SN*7L^AYhx_zd6^%4
z`=_)Glrf8kW~I4d@<QHn>uZ>l;LLAs{~pgBt>d;g+{&DZF%~}Xvmf!$%Y9sb^KG0y
zC&t2O?zxlSJoz#kuDg|W%Vz))yPp0dKmFa)oPP0ZxpwVhwALKl{UqQ2@!iZ=bv3tM
zaTZ!bZ^sV4^X*?ycj`Lca{UF=<Z=uh+t2sE^+Wn5ox$60zLHUOIm$za`T4iM&ptJc
zx4hxT7-uSd{QgI`v*k#h*WY>@^C!nx_@_I6%>BE1xaQ_Jv0`qFh0osmEAD!H4;Nnh
z1}-{%2AYVyTkhk>zj=bwE_y9D7@T?e8NPG-@0jwc%X!nLRWZ&S*~PcN@e}e3*Yd6#
zH;`9Q8ffEtU;7?MC!EG#-?}Nr!r^Z2_||vWtEcd`zq*+zqYCKi0Dt)5_ju|MyzaI)
zSuA|)t{?Nq9Y@%7^IJIUl(8rxwmtYOe);f=TyX6hx%iCfXbt<HyPqHY`Vp3{e=V=M
zFy`^dfj{$~-?@uPD=z14*Q_CmG{v61eDkY6fCX!K|LZOVpjztW+h6`qy2dZ%18=;J
zh8hJ?FF*d;f3T}MiFdyF^-LaB0EYPe?f=CS2P1BI<6BucxeloC#9cq)_uD$S>Xvu1
za^7e(nw@|86?Z)N0&A~{aRwAE+wSALzj%lx>u=`91Pi-c|IB}U>rTe6xSDrezlKUx
zGu*eIuYK{m98>vt&XBCHsridf)ow3<T#YjeaN52{CAblQxp!<Ivop)0`5Hzw)+b;Q
zG1y<EOpZ}a4M|n087dYTu4)<^n<ykd%`iMvWT;%FzM+ZQLR`^Sh6foeRj93Rq^>3(
zE4N&tI9Q@k*GNNMT!%)LVT#2eRBb&?^|i4KQHA0_5uLB4si8jMwaUOikw_JM9tVn4
zw4$l0@i;tchKB|-cpPG|RHnABk-A#J<4}o0Z37K;H8GFXGR5K`s-~XCy4v{GMO2|U
zFo@3Acsv#did0pBrpEf1A007RjCpKqY$TtH`6(qlHZ(R;$mcANrE-<JhDK@&d5_1M
zx(4c#_uy44i5^ot4xtKlG&M*)hCD_T2L^l|i5MCvQVw|>EHYft)HgO#lkiv`8e*ti
zp{~A>+8V**P??%~$>T7^!4mnJdK&9P9&2f8s2h<-ts{m82B@e!O$`n43`<804HhX!
zC&6QBh{55QN8@{y@(_cAWeW8T_I-j;Wtf5D5c%2$%cHKA88AH7*TwlVitVa%H8eFk
zJq{L&RLBV)HA8~~lp__}HQ{l1ut=$*sc&ecrr>xS8V=)Qh~nUIDn4Q!>uD76F)&bs
zd>x}2>yp5)GSFWn$`u&Z*x+~^7@$I)CNG8u7_O2T9}Nvn36D@IB{AG^f*7ur85kHO
z7sPPPV=bc^>jjSkM7bJ9H8t2<EQ9_13C=Xdn|&pZO-+d&VR*2}P$h`r(jbE=J!bP*
z92iXD%<=W;<4jpAMjaPt{t_(A#u*<A2l40e$&tZh7S5DL#KIHc%<-_W5c}i*b1V#T
zCIbrx2TSB?8Umb&d2Fh$O=7r0u{cQ8+(8s~s>l^+YHYAPo;1!RSSan9g@sXNn1KKb
zCC)T9)Wt=m!I=QB2a8DzdssL~If90UMr+s7;2^`5=!7^^DVG==98P@iL^v}Dg<3`#
zyw+8U{Y7-5vlPxWo&XCIyVhHr83_yjr#N$RSXe3!P&y$NYAU6nlg7dn&gd!weFMou
zXQJMkzxdSDvp4<@4D;FSZE+~zuBUIYnn4mQ#oOpf>ehSuwru#_+^be2w>i;a+Gq9i
zZgGdV<WGh{d+L>P<&Sp~!=%RK6^|%s`&=JyJljUxZnySTC+>fIPB>vtkKaURCp+GV
zdbPZ}7ODMaUO%yC;v^-Ndv!vd!q^#!dyTSh&$lb$$KliO2`6Ra<A{{iSL!gUUuN6m
z5A%wTk!YPL#!uA$FCF*)4~LoMpS*w?`A)?DSH~;yq5pgK@i!1<(+xKn{m=ONzwcWk
zQ#_GQPe$jj#1;-ST+;hXvEf8o_vyMlkteJ@vhx3N+8^)yE5V$!9)gM@V9cB^xJ`AA
zLb$QdZCuoD30ct8Uew2b%|=bL0TRG_A_miuIKgR?(&E-JuVN#p$=G7h@R4TuUA`7U
zC-m$sV6$mZIpZeW#v+P62$m-ykFpGE^M8*=|0y9+pcyr~f1EkA8%I89ARkG#*F+_*
zM1QGL&!kCv_Y%s$jgs-K9V~spw-eXvV`Ue0m=5%5b&+(Co1Wd%OOhQIoU&b_1iyGm
zY^jw$mT7D0*&^BZ0WAd7LJ#JAYOko&oAL`W83A~c4kb-w{m%ND7~fEkg_jj2SSh!k
zg&u_1&Q`b+MxW+l9G;F>9XODcOI}k}N<YD6@t~biq#sE;`f|ItOjt{vl-HizDf=Vv
zzy^cRkmqEkC^n;-cy$j4IEPnOeNw4P(2eCX*x=&Jz@khxR%@YOS;sJVrJTbUTPdth
z%8mCjQF-PGu){DQk=Dv@G1ZdA;IG6r73$jVEfdNSf4q&`$~~0ea84u?1wQuqJea;C
zpqRr7FW8J;Jq<a3N_dn{Rc2x+!Y)DvE3W8dIJ9@F5-PDjc#lyzx}t9icX9g1@N2Qi
z<u{4N>D+-A{}SJvz65ZXm-h>~g~!^sFge!c3@M51k>uG_>KP%Q`A#9M5dSM5m~p>u
zp{%Ku>>(Zy@h1MpmxSTN7d6Wxm4~fj6TMqWg@+2`2vlH%A$*KXi2C)4=$bUnnP9a~
zxM&Be7T}dazS}?+W`RXwT3QC1c?t&cJ-IHU#;oiZ2TM2(lV;LEik(`GkOq^e<UJi}
z9Z&>iXwYCJYj7=Oj|UNnQEWstrc9mp55uRtti9{fQIzfEo)$~^2%OdXAZ#e|YGN|w
zxAO@ISrcB2UX=-M_e7ozETc~$xBYBz+XA8aPkUn--wgHOd_)4UazLY;ejEkyuZa|8
zL>I|Q4lNFuaI|cowR`W7#t&|*U~N?_AzUVb&<rNTJQudw5xUZbUuD|{&pP^jW#WmI
z;e1Mjq;GzWm$XEj$DFtV#>!yyVC78wL^~cFA4<fkwsC6xN*mppw*Hg4*50yuFiB4;
zKomX_4ebVZ>)#?yEOwco(T=W1Tk<7l&xU5w$GC-|=_TqU85%ks_Bawg&z2V#0}hvL
zWAWKN!Ky4z5@izyw-}-9Y42~MAQSJgzUA2d`2HA9h7i+@E@X_Gv7Fq-$9+3EoEbrJ
zHl74#DrG{{$}VvROuP%KS`>T&dwe~E#5Vh6vT`}l_^t7MzgUt$An9Ra$i+?)SN5l|
z5}ZnkawT~RRg(y)l*rr4*iu^^#p-s<$Q0=LhITZy*F?EmEbwhNro)!lr<{x$^VGW(
zAn}Rh{j<g%28WHV3}0?hA;1o;Z964o)0Ti~>ie`6)!5c_(uNaf8*N{DkF9?kwz)i&
zO1pLfl$<u5gb(DDcC!+cvhS&hzuHhwUd>=(8CN_xAsyTq1!@cM;rEyUKj2M5kicQz
zqnH#1R$N(}CA=DC>fk>K`o<|@B}iimdRlxp!DD3*3JxU?9U3feMt&o&d|Y2k+M<05
z$c#=b;03@TInv?x5mvl{gy?um3o4VnC#3Ra^pmvHyGL6oznvJ*b8;!G2oxpg1#?7<
ze%)$@64&s?mC`vd$M-2Sh6pUjtFkr;Fx<&0Q)Do<4jju{H8gnU>B!MhO3Aqu=9xK_
zbW-?!keS!*SlY&$^mlJ8QRs%eK;X|-cM14nbfkR$)#939%w99&ndB$N^nF}gxou*!
zz%Js+b~4`Oo;390z{+KGp`3QTV$JX^<TEl!9Ky#S>s8**$ptZ_9FImd;rkwim7Fh$
zvBSjehH=;-aned}^ecQ(uEJTZ35=F-n#M?Ma`o`o@+lLyi#fS<!{#sTeA2^SA){Mu
z!qczIABnwmeAe=$0t^#D<gFT;qAOKwjS$}mj12rOh4I#p@oeOBE;hc+HDjBkp2kfo
zaJEeNb8?5Pg+4xczU+ya_`H~?+4V$$E6Ors;>jjBYrAm4BmQb=iE%`}kbO|1oznIv
z6H^7fyi=IUUp`(KzAYYU*Ibs^%K8BI6(p{xb_+Pl-03j9i-d3DNSp7XE^zubaYEvs
z=5H)_e!`?lpE57@mBm~rw_!l81jS6oid5Qx|HOr>c+<_-vFhv<OdkdNTRSO?pUdlR
zxrwtEjAz%bml=+f4G?d6(kjQ@<^;v^3$9{fsg<r`#Sb1?^a1($v8>y;iH+;lFtsV7
zrS%XMt*IL~kE<?R!pr;GhzI~TFu0$?oU?h!hX8zqq2h{P7!^m@>Qo`FnH3DT$S;$a
zmi-gEm?Yx?wCCHQ!{gJkb`;JS6kr%VTT%R5U`{8-7ebx-Z7ly5FoS@w{w9{#lSHS*
z3^@~MJspSR8kL)sprAhvJkqk^6>4as6Pa`!|6bu|-9%;>e2g|;o*<>Pkt-aPrz3~s
zTF7K{Wj!+iI3u^ysfl-)J$%09#7MY`NCs||LRsICULyg`UY9bO3~LKHVJ|_(P8gHF
zTKJ<KLkF_k;dO#f637%jt3g7D10h*mO^O*CaM-v``H6BXjh1gE<d$kNyb66gY<GIH
zx{)yz=-=8`V7Gba8AA6q-t0LOURv13#|@RlqD)Yr+^@jc`1Ok|liY0#TiJXJu{!ZE
zmJ}~mj{;Y0zkqIRU+D;V_Of})#EvZhoGf0PnWDf5VPgvW-R@Nt1s>Y0f{DV{v`=A$
z4KRWEI*?1ch45*M-y)9*?9Gx}`m&7MI8=ml7$3mNXtb2dg%+2D4|uuD#kl=#Y7vtz
z=P5CfOxh*9nm7ydgE^;79+fg0oU$$xu2wq#aL6ez!s;W@okH-BjWuNzh@}&Ui(j9Z
z7+iVTP26z#I##b<MQwjG2M+avf{CZB;QC9?Wbe*hl(k~QoMmjj`by3{YYF{_TIlL4
zd43AuUTR|9#E9YhmQ@DUcl<aJf5*>z6{k-e<iUp?W69?CaNSwc7(RB8yY7FEmFKRa
zDen}daybe$HPjUHmWBoj`5J0!3dvJuie=|+<kTtkK#rPx!Ri;GAYIj5^1AnM!5nyM
z%T}VsG1Miz7xFMPG;CU;R6K5NT`e^=HRP2uSfQqdn!J0Di%mqvVWh5<aU92oT(z-h
z1@2fQ1gbNi?b@gXmY|(-2GqtTUOUH|EC4Ni!?TiQGD0@f#s!-LR6$z@a)>q#c1(Zg
zGnRfRyK?+F9uNQ#G)@jJ&kLutiBxBF3kYfv$ja;Q+to|T2yigs+EtW9I%xx;5*3^a
zea01zu5DNkIy=j6zI@&c(2kYRGd|$yThat2ZoirH($30jt32OwtT<kz8(AMPiP5S|
z*=uf!kc?-2#Pzo)laCG-Tx?nngdolboiEzNn|#r>pDVVrV!_jo8%@ZqQvPF;tQf=i
zf`>ifSb<Nvde-OzA&<{vI^HsT-NOyM{^ne2vS*U3`RwtntiDb0BKbGA)ZRJiV`&{2
z&gEE{1+z-)z{=}{kqWV=-Kw~c$5LkFe_9kh)+a+h2>isJQ*P+&SoE>j@g3+t<FA>F
znOL&7P7}RLY;t@HUbWCoa+sKOb~K-r#Y2fBX+QI{7+{JS#1#LsSdA^=CI^XBUcqT}
z<9oZ|F9cC<?BwTHlpR-$p|E=MZU##o+Rq|>?N4PTgC*efs`cyXdj2u)yZ-_99PC9Y
zsGqQq4}Rhk+<fIa>WSZZH*3Kn`deS*#X~th`I(P1UR9BzK=PhAh0uxj+1&_<zbWLT
z)bb=Ix~Ap8(IYfBx6s`;OifJ=T`6(oNH^tb<eWj(v-yqh<-$c{$rm&~`P!Fwd~Y`k
z*IdOrU$c&4e?PmQ_$@!W``M&Nj&rVi8|%g%;Hy8ln_}5Zk7%7|#*B${ZrR7qZO`(2
zGFENVG;Vz3+gLlXg@+$~mP!?Jqi6BvH@}vN4GpX~_YA)I!FO`+z7n^*^S!K?-T=DD
zkH7hKo@?&0P9%NI6I<%e=mwG*Y&!XRIu`nrN9wzBtCMkmH#3*CjkQLkCu{}T8#%pO
z+Q*?uAajcR2(pAnI9^B-smBr82Te2i%%p9boQTV?zgaz24jTZus%_?;a&vr^t|Xi3
zBq!TteG**RVktS3v9jvRoUIG5a0<qg*f3I0dqq0K@qEQjc1_}lPnd+D!`ZAiqD~T%
zqvQFy5Y{YS$i;?KPRU?fixXMu9LfLPcy3i9%X(SoGJS)Qd6gH^Y7fRcPTRCBCl?u}
zSjgsHctwUEWXW+dm?+O1&r^bXU*{S8ioQM_!;A9Hhdju0D!`}_?R0{3ucUnzPuadD
zJ_oUzWdpf57{-U6aWni>9p9%I;k=(Wl!MG9@0lbT<^&^Dm>V+nCr(IfIqb&uRlept
z`e89|Wyly?`FeFVR)JH`T1|9dH-Ff-3xLM4jokg??=tbaH2~ytifs?x#a6A!k80+|
zTP|g6ji#rpEEZ`X7yr?i`_-u*O2^$lkCVQ;M{Y|yYjAD3Uv~breBu+I;KG>$Jh!jM
zmp17)X7(9uT$<yvfA_b1_0E^L@rL!}8^-dwTdv}cFMXPS_{`_|*#p~1-jB>V*T0AB
z=lAfnAKgQ-91*%yu2AK!AN+vH=iSWLzy9yM@AVsLDkz2rI{3+V@8BqTJF}|xweZa^
ze}SJrwvU(Y{W(uHcQbRvC9E2EkpJ&9pXNXBKERcit|o6Zq@AJcEo<#>wv@MoO>rA1
zqR{n4BD5V`8@ZRr+k4~zu~|abmOp#T!l~Fh?1t>*ueoDn#<ZJ9EyvpRGvBrRY~*yU
zgLw+ZzYG-<2wJq0vm5_mgg`a+nzA}d^C$(Edy|9?gsfWj)h?*5JQ}Z!l-Cp(My>un
zB@-Y9E=+tGf0Fz=*;NoDPC6}`yM1Kw|I?7=-xBt9>vR#84VH7)*7Vm}@R#;y4-kz*
znz&F-2J3R#`HG>bjK3=bcoxY$eK<=bTP~FJZE@X<tDMgntTVAADLa3ZD$#NRo$1>l
z+{=A(82JL<aFUpjg(Y;zk--)jmHv2!3+=_CY!&vDZM-P3S3N9~SF~*9jUMzgxa1<=
z`GJhGFv&Z55>Ir{M;>WjM=s!8d*9{~pJLu6zld{k1nuz^`*51W5yOVG=7lS#Xh>a1
zo!hP!qSLtOa#!dp#s<DEWHG9PbI#H!?-)7%Xg^*&{<S}bOlZpfPY7ubWoc?bBnvSV
z_`hEo97^z)ZsmA!bW;=r9Z5I*pn3S`-{HR>eTMvkjeO$mS5t$gd*4<b+1?W5K8+^N
zIhWtWvB!SO{-HuFhrpDAk=xEB#1HVMmOiy8<br-yh=lM95B;3K|Hj>%diguJar1fX
z{N^92h~xp9hNcNb{T&<~u0ne|wd3beTT{=t+CiER4N$FUsue)xfC5dm5jCSvp{bCg
zPriBF+Ac@ezGwLSC!eQr%yi!WiI1{o%hTMyv)c<IBiop{D|z$Anf%*7e}&#sl{pip
zF@3>O-u3>es6rh_b~by36DF}F;V!S)pc41c&9bb!AMbeZQ9jnKTsg4?nOyGlmXZBp
z#^Xf<v2qzP*o$DqP(ElRNTxEb#qsYlnoJT-u8h?bsSmW9D9DVSe0TDv_bSSzUn`gK
zQTxcdPPO+`?LT_t0kqYtj{mX#FJ<tQrF`1Ka;()uGOhzYpV1tI8~^|y07*naRA;Zf
zklov!BAq9YE3}=rH&JNNf@cy8&JJ-$+g{wtu|8tu2?q^q<@qPp?!LbS`a|eK@@V{4
zw!w@wlCG5<&+zW<9yolGai)@MCMBEOscxk_!xw`@D_4g--Pv9O9}F+b*PXPfiyNcB
zpiQX5L|=CGDadp34vIS`unKd_&^VD(Zb)Cs=JqC$vb%E=ff>fG$l}52*x&o79G@Wg
zkUU8_Ri=(@j<Xst%*mKmVw;(A{jKQQ-cG}eVfsj3l)w7!KVXIT*PZsU%weR&H+=jH
zu+?bO`Gzlprw^}%_7`rH7&7H+#ut+qF`M^Ae55$guCH<d<=X&r3sc35NdoVzJ%gBb
zRJGfqNkCutk(@yKzkw^UQpT77UmPA9jC6W;(B*dZI(pWxg-tcEbIT(DJkzSU;_o-n
zSbHbOh68QunCs;i-N>~|`uOzc?*{oZYD%HBCg*{qP`q6hOp=dVIWSy@pzMsOP}Db#
zVe-^z%%3}x>ToIUSt?R)n82)=Q)sBGq36&(qRFT8s)e&zwf<a=?Agmuv6o$k3tYbb
zOs374&8hQeBpdgJx$nnc<@?W6dB@EgsLQ(zpEe#NICbUuoVj=|6DCfgu~t(mRRBe$
z+=p(M!mKG1sjtnGYnsYmzv~tr`1u_ijcTaP=V{ryjphS;`Q?vq=k_1{jC&v7P9>RE
zDMfP2T-)2EE}l(d_U9#DAtrs?S6oIQ1cx#Ok@g(e(jI}XJt4s()i;@|D6=Wsm(9{u
zBp@aLIG}3*+B>Ig5PA~>8!rhMjwE}+gDWtkXv$_MS?p+~+)A-G78WFvNOb3|CcHu2
zS0h6PU;ar!B^e{Ozh69n@D(>B@(Utk6El{PD+Dzmn;gR>=A?zCmn0dfN6}tB!RZ%+
z{y3I>;|EApgn;C!&1xxDpPMg-bHdOhW@6;GF%GAKVFNj2m4r#KKD${H2U({Ws&UN3
zHc9zp(#Wg;rhH9#+H{|Ve;K{YB<C?EX(#<{U!e}`qYCSXW`*CxNbFbIh7<N&vd$Oy
zr^iFeXhKIJA68Cf_3G`x)KYGzAE&bntW*AX?*Wd4Kl_Y`#Wtyb5f|2%@a6R65<FR(
zS)g|<5?WaWoLrc+VjBO%*vXGIdFRU&%i7X1I&xztZH+y%R-i0z&PQD<?W^4Qa5A8)
z|3}{H;Mpe8vBsB88v~h_MH^#Dtd+PGSi|I9=VLA&wU9wsA&d@%F17JPjnJ|HB!OzF
zSeEQ4o8MFtOj&yYm1^LuHLF=LZypzKya1CxY`>UeZMzvPL!qvT=~E}u(9p!RX;Y}l
z!@Sj-dCxVA`Th@o&!{O=sm&)mY9~{E!h}hmN+D+eOj(73+)(PzLZPN<EN8A*#hIrs
zr~cS>zW1~H=^L(+tMqbka5Pt6y^-9Z-R$0ffR_(zuDI%Q#t-f0yFa~${!)pZFTBKQ
z7hKNTvreV2eGmIPk1~3~RE{0k#*<Gy%j{Jb(D(8VdWvPoLy~C5&REPPm#k;?84Gys
zk3Z+Zt<6Lls>8i>N0Zoe`FctnFVe|0E<Al6Q|B#V&FVAg-m{(N=Djo*M|0U_m#}=n
zbPny`MSJ&vNW95xOw3@Ve*D!-SINk#-2#Q^uT7f-`Grq<;N?4F!jaCPpU_juciSf8
zP-1n%qwmcO{na(+k1}AKgS!U;<<78R7B4_rWsGUgiy<9|$eELvOn3pSrK<dWTA7=&
zmD7c@jq>HP1(xF@rmjRy6s!{1zXqf*%JHF3(Dw^D$*(W7)v5IX?djapZNii*43!g8
zD90-xJ$d{XNUgM5)j7f4N}^a{wiXc>gA#PH=Ov%!FymRj@>U(iUK&|kB>YO5bSQ^w
zme@%+kn|ZX$+{gdMn{&e!aXHzS8e<;CFolpv9?0c#FN#F57EvgK|DRWw%Xb=#xdtN
z=C-uwKRUpLV1<|rU1`zB%YO=AN8W~c5#&J(q$YkD6WL5z13u!K%<pR+UQyzj+#~E(
z84c#S^Uha;Rckr^2z;k>X7JbQ5Nji+Kch3_Th8V-f7pqxi!~1)LPg0uWPHeJ(Q6_I
zdbN$4pawP(hq)qLHyKv>I!YQ_KNYr;`t~Vn@4Od3vQH2zf{A3zxTBQE(vm(SewoBv
zx9SNYO$4%*r&KL#HeSQV^H(vk+RQh<^9y>0s!W)B7H_<9BT-qic>YXY+`5&ytJg7S
z+$a{Ewt^MQ7O>~JC+IEbe1BiC_*6aO252LoL3&$yrrXPWMO3AdJXD}mj(k36oh;H3
z)oMg8pC_kspd%{Ps@G0yO|@D@YfUa+AeU3<Y6VoDTn;LgYSLeMW1}@Xp|8P?VQi(V
zRjN@G(^Q&DrE2-l7YgK*LR;Py`FtLgGeMA6&&eEFm@|@|eVvE1xPZ_I@#uZ_+63A0
zFBRw&9w*vdA2jxph<}k3P>d*tDI$WTd0gI(4@Af3gDgBb`9lD;i=yE6y1n9bBMY#U
zd)NZXoIeTqJU^60S_Zw7@S!wMa=SoK5TyFLgcSO-T!;+E3FZN>%>PcnfA+Pstz08c
zsPd3t819)ogmh2f-|p2L>1!cLukcP38OEcuN9gY(`C*m-A;$erV9BXOA7|2<jEnFO
zBe#`5EbdJ~>x!+EtupZ{<jX%H>3T{2_`OHGB=^87q0HmzGzgaC*2m#^8JxL^l~*5S
z>O1Tc`amk}CHnUZ&1_z?W6`&n?EoXoFH>gN??kC6=qqaotu;}#N~CpC_vBGZqoc^y
zO#p>LfhejHMcSUr7i!4a`XFXr&0n-QF3p13gcVLWgA+KO71*c7Jch+NOGC;gf&92R
zI5wP0C#ulUA(&*=J0p3Lmq5Y&bVp27-as4)5Pg~&vsXaMYxJkk$<`ion<l=X0){PH
zD$!`5Pxm#s?m0}DAPr~=y;z;f)kC8{0pKEO+joKy8o&;04dmKlH?}Lov9~HBbSD8$
zC&GrwS%PV30%IFU`DVHZ;e;}$3TepxtVv#mjoAC<#*2Li<bat)x0i~r`or()jUDc(
z8cBb09Ip@*GWvWwp0f%!XKuGyIu(<0e@lQR&M(0K$vt;VNF_|idp~?V=#*}fn^E?*
zjNjHYlRU{)yCgzViO=nA!Sg@!*`K8w8jjI0zOC$u#4%4UR<g66+-nn<Rrt45v*p$B
zDYFO!`Z4>3l#BJ4X+M#gwu$j%*t++C3D$+|p$)>kk;3qRPKb{ZyPO?`X9uy3(I0{j
zV@IRcBu>q5gnRZ#aNEYW5|^aeOk1Nr6Ia$>oB%qB9~omZPlS^wk0(pRZ8I^eL=r``
z7IsX!nKtM&cE}^+ly@=M#_Dylz7e=xCaDum=pen=7sR(`^AXy<l8IIpR*uMj@{gCR
z)d}?X7aSw$CueUDCG2SPw4-s9q*u_)EjuR~w9n}1c(iI%CDM_tIL+P)R|q*Jp<SRE
zhxdToG9HQK5`(|i5fP%V6k2`?3~a9{%<Ex{28f~vUDfuxGO{L2yFy5dKS@7hIosUn
zPnrHM8+a=dT1Ro^t>u%qV%<gAQ}GZ&Pg=*7d=y2A3AE3X$a-4n!0r_ihuf1OH7NOT
z$t31mJNsi2M=Xz5PzV9M2MSD2<8uk{I189I3Fsz1q(xT?R%RIca!6kesWWeAQJgrw
zl>MVEUs6#rc{=%ALFR03{VUOxom~olw))e~zG*uM)&fuxyP^zJc09?iH^~a<+9aUN
z9u^_8Ql6<~TPd%N0l|mVr^FJgpV+WAkt-vQ7}F590Bdb+_;h2*n3gJm<d(8yIq|93
z#|7ls_!Qf8oV@nQ2VZx>Xs&G#cUCP*xmVH%op`^(p_1f`xQzuKUYSoex-6qM6fiLt
z_Z3)x&xeym33=U^%GyfW-=UPG5yzubrjgN|(H8fuUbIP^4#Q;7c_vMCYsa*>6|@xt
zE5Ub|`NN5?d#V~CtndY+Mq4Bj=g(vixi`_-A+d#+BuVn_voCWzrgoSkuP5@^GeU1>
z`;TQsIsMApwazypqNKl+&Cu0RM73Ha(p7X6Ir~{&jD)0ODV3kKvwsv-<9wmD8#2jT
zzL~^6Kpp2d*Eo?x#E;?0tY}rM@n5Y)Jjxtm^MS-lZO5us1&>m8!=!dLQVG6C)oL=H
zw)xpthRbxm>{cQ-h7Nl{e*DCVpE68KUAYQfc?xt<AXZa?YF>?A%oo4>4_vx#9hY8u
z8I!B6Y~O!`lR0FO=N~yP+H?&=ZTsjSHn%||7OlORE0<`ty?hwAhv9^yh}8zU=L4oM
zIhVJ;?M-aHVgt3u_OrL+Xo_SOk1Aiw<~P5c+U6HIR2&{z76aM>%@jbchBMY};vH|g
zl}pY$i=G49={QzO@tqEZ_|OzM_v+X2jyGJ#nBl|hX*onh9T(ql8}n;B+1uHV%IvSQ
z$erAgbR9E&G3(Es%l`fCL?pgcJApU6<yQKex6?gX6`*{4X_RvQ<a|;&I|%^vbm+ZH
zOxP8XV1&)Vfumh@wZ32g(2{e7t?e(`Byl0Z#sRbg)A+mq+yqrkdX;=y266h3Na%_K
zXFoee@{hN-4zmZ!k#@OF#rl)|r4D7r5=Em+HDR)Lbi4_V*Vj+Y-aaaOJ)oH|ZIZ0@
zXC-upyJr1IK$!d+u;Ud{R!hQ<5Pe*sKEgabHKrPokBqL~=M*+e-mU%273a&2hA)SI
zP56m*DdD$SvfHbnI|SpT`udW|ShrWLj+{)sPXuH#<Pi0iy(aDD{LQT$`td2Sz@AHQ
z1#rKT;s>mf@g4Bt;RbRt>-)d;aV6F_gZMV>41)N4j+95G!hHTfF=y4)T(h*Et@}Dt
zc~7faE_>bk`Ow>5&4uePX5EUp?08{2{iP}=I%F~FpU<=8oXdIVTW;gBb*mX@+evHp
zV2W4epwAxphH>2Xt~YV;nP=#ixEL~?D{%e0KgD})x`K7<&Sy$ZKQHcWBjUt|_4V3u
zyzX5e;HIn2<?znu=^Pk7QC9%!N6+Ka|N1!ww%$)?X*wVO$n`w&*z;5~a$#!htU4pU
z;H`2OjE3wi0OOqT;k{^6Bpj4+oM2Ei6q=Y;)5VAW`AZCxBPzp#P}{`J3H2Q8I70on
zX*5O0I6R=4K6N}Q-#}BL%z@@MhN~Ktt6|!#*^F(-aj318o`HzDix#r^+Uux0+{~`l
zRt~fsrqDEr(a~Xc?(f1nt;#cb`Ya}lD$v>1%F%%mb)zRRp|OUBF=Htm?WFz4G0d$4
z8|+%6M$hF#@4kuOee26??W{6N^@7SXe$o^sPZ&d~uZQ-I!;~Y9%GEM$=1j&m<T-q>
zjqbsSX>;Z>G<620@l`t8n&}-ZL$1Ko8M7GIkfWomm1D(WJL?5gWoxD^UC&#uUd}hZ
z`c=C1DCjQ(ic#aIFk|vq`no#k?Cyn{My5_2O`&!aHPHYq?VUvVdS)&<o7Y}-4p08_
zcDC$pCnCqV$x|pD*-z)8ehVPELLD<^&7x2qq!d-@I&_GJ2~$!1Jsd5;w5by~dbpEe
zRmbEBW6=3}#y1u?*w#XSCC{|!(-~c#r{{17hkFKS8aJ7h7i{3l<#n`o^wHXSfTP7K
zQ>IU*b=OXg7UKZdT2tFFhFLSG5e*%qt^E*X@=TgMiJE*3qZ{(Hx3<z>sszc^R2`-W
z(B64%Y$p<r14jW*3IUjooP}LA0^m6Z5i3l=q2w)MGp=$;O}iq^FCra)ONcVj81C1z
zD<NXlT6v#*iU@uzP19NwcUiX#%ndeIIQhId9geieNZwvc7jP*ZAFqvPTReUZ4<i#k
z&bVRgaAi}4S~+?0(n2Fq$=<JLHI_QE3qG%kLR3K>2NK_%{#+4~_5l^p@b#V<k3o^+
znmF6KeN)+6BGQMiWXDUWhm3;IqvG95kkV(i^B!VKD5f{uhh8r=oiVPI*@~6c!SVOl
z`PclBwfGw!^`pi!Yx-oOQXg$?9aL+Ym^`+Q&dx3x#!aR{_jBaf5RGFeF>}gTCe579
ztcD&S$JmKem@;t;m0~Y#2RkW6iiuOE@aNzC7e9HVg`BQZ9IP;I@-%c`7rm7nQzwt7
zyR(DZv6C5Hn`hM6(H!e+<Iu4pC`HqliOiTbk#c_ztp^X$G-nm>zWIFq^UMEA$8ZCM
zQVEr>Ve+);Oc-5Hf6rkK9_}Fm@-+?2m_3umT$T2=RtED0=FXqT^l7shTd!zsZlPF?
zP=z{X&YDeQzD#RN3&l!~sWT>W-|hdx!$&9a4}bSoc5ZutZATPSCyk{{JrhRdX+3a&
zVmV^W#OX|*IEwDhRt_C4Qa@%Ai&tIFmCG9V>bHJMYws`$s2??f*)t|H+;@bQjxHh%
zV<t|cR+kw!X%gjQog5l&Wa;8%l&@IFmfQESaKT)13RIz<*>h%7#}F;eZ46Z+#!Z<<
z9g1<|M{&6I0Nn$_pmI!{I*Z9;>p0xjOjmzN<h$7Z`3Vyyek!h#Rg!Rw!y5Qe*5gfL
zQ1%H*g;}X{x6D8_Oy{+mR`SC3eT*7CnxSLeRHm-tbMM-~gZDkoY1e;{O|!aqyrYq?
zeEIW?Dt5Etikp}<xQDGRy{x_A9lY(TSJ5zT8nee#d2wG4XPmQ!b620uVAm0<!@V@O
zAEAEyOs;tKU$JuhK^}Up8LcDcopS>pee)Gen6`v#Hk`&Yk3Ge-b+_>^@4Aehff{ap
z-PP=T^f8W>A}{Qef*Ge@%)04E`Tj2+<XB%XNBc`eHDkE^s;fC|{w!8)xPocbcD5hr
zVeR#A;mw=YFnatnW{in=X<r9tU;JuT%^T0GS6$3S3-dg>Wf#jXxs~_acp>AbpUTEH
zr?B<87Z@x@9uVjV&bj*4G_*X%e?PLF{@$bX4OM8Ix{Qy1>^;n$IEkySy@Jl|&(Jew
z8DII_N5~ZiSbzO%DYWlke=nT5>O9U}v5@|*F1inQaPUYUljbbtrd!`YQ`d7myRQe8
z%d`6GH}cj^tC_lB6>q!wA|8L>KF)c=hgm%MGTV<e^1nXv4qBdlg#OWs`21%-#oTcP
zCQP2jP)`d34X1G3<?EO|X91UOynv1!&r=>fi*wF9hb1${($m*Z&%suDikbz>&*e?8
z-^`w;ALL-~FsgPOfAzkPaKWk5S+(IxCYRgU(NX5TpZq+lrsSA%#)Yhz4Nq^~=YHuz
zf}WXSnVGgJE)pG@_Y1jSOt6hSyfrI2CIn0}bAIj;p<Fz&3sHieyAz~raphe%<0WyG
zqyX!0Hhys149p5a%%kJd9Ihl#Rwpts*xO+C7LSw96<o>R)JmUNWsnKW6uesKNgUqF
zB+4(W41Ynv<0Voh3jnM>y#CfEIyIi8XMIH)$yMv#p;OJBtkx$4VQ)XIF(SzxSewt?
zYLxN8tUbW~-eCDzIi8Hq36-by<4Sj1ln6cBkDg7iIyHN$+`dSsUvF=i^-)7t7BVhJ
z$ZKA`V&hJwXn8g;dbRp>dnlzmRzJ#REBnXjNXAVllUU)Bel4HYx2h9ynOabm`7+qw
z=J<7E`iEOp_M{W`vF8K38#~G3&0$`A)K8eldp`UDmd_Z^?1l5_Kd_t1>{Wc=br<mH
zgIic}%{#bqP9Ix3YIxsAKg`mp<5{`(Jcjo_!^1Civ2NogRxF;wnQJd)VM7nk?>)@v
z=U&WI4socMr#@fh=s=lk-uWSBckf_NWhx*2&|7%v(fc{=)gR@<o0rmGn9fa`SMkK3
zo}y;TGCuY<?`QUyMiwkvK*#p&%)VeVGy9+8=l4EGe}6B1#bF9#XK?MN%b7oW7V9=%
zPX6Ey_8o<5-uwY>IA;N4rp{qpwVS<1hPZU|O`I~mma{hAz>=DFp4oMni(mIXZoPOJ
zQ|6z|`HM&K^z(Z;>!Qo(-f}P750zQFel2?*zmKDh^ZD`@{)zE56(&rX$>8C=R41?G
zA3y#U#x#xP>P?rh=jq289zBb5&O48@=8mQP$Wb~D>|>yI1|R<0kF#{@XjWZ(CG|&k
zva6$yOW*QoK6uS(lyyCGC)cul{}7k0nL=lN278~}%0(Mb=eNK46YJmbQEoYZ9<$H7
zl(n<UJofBvHh=KT+<MknN)t}wmP;4#z=K<uarS0D^}g3o-!z5mu3pQQKRwEzb}^Bx
z#96ONRt&_lznO@KQ*CU#$Ww8@BuHw<PGjx)=Tp`d9z4=apN=S3;vcV%sue0#4QP%X
z-pj9lb_a#lQM_Z>d<xGVW7EY;`Ra$>&F=mxpvdJF5B}!Yti5PGci(j<dyWo*Qj|M(
z@xY^7xn@?pFd9`0Y`F9s{`iA0^UEz=eCV^EW5v9&9MBbZKJfrQ`^AGaF8&+~r_`~(
zuk7b{4Gj&WC=d19ml7*9xyk?s+K+I`*^3xGZU(DXEatA~df0gWLcaZvf5Wz}Ary*y
zZ9P#mz#o3{9q#R$!x!JTfu@=<T({{Q?*HMx@yz}rZ+hRmSTu7SU3<Eu+KF_Y(e-r<
z9vyT`yjrp7Rp&9d<M({=TlaA8O&?|bng#59N^@xc3*7a~JD6LZ$=Z2Sx%Y)F+;`92
zoOjj??)mkv*wIrWms2$F*us-r*E2ebWz5%(X6>2<{P1i4$a90UnfvjZ(15O1h$4;B
zno6ZYqyeqzKib02zyCe<9vem}MZ?%yI(jQCnKP9sbC+@2DP#EQ6I=P?19O<Mrh#Al
z;x|-t@mu}2KKc+lFF8A*1><HeVae!2eC8j%LhYg}`SA5C`OTK!Fxc0@1HbwOJDOJV
zk?R-HRJf16$oNy3{3N(LzjQ^7pR~M{0qqKsM7`b%QL|>hGO@^_!Xy`OAwux$hBu(<
z3NDl2Wn!}O$YLX7Rl(JyLVmB%2$NP=Ac$L?+EY<l22o|XRFb71(_V$<@%xB#+OKB3
z>xO3BBxt7)D?C!yX5Wm^pIxFzS2S`F$&<(yt3rQ94cd+Ebul!wu(_urK}U&#K+Z3u
ztz|KkeAWpFE1UF%%@tm;==1BV!e?9#n4A*$?fJKgi%TW+6WTLtb;#-z75HIbn!sj$
zftxLdzpCZiD&|}4A=J(Q#!uv`wk&+J#!lJCv=n|~Cux%3z)xL2AB$x1Mlha$8|Q1r
z=C-%&pWsVWWx=WoX=;0vfBg4fP$q}U!IVjneJg`b^6<2|XEM3?EFb>L_c{BubzE5s
zsxnMVTL-7kIgK%6CNck2r}3lvc0jJcg5@i@a8wV6_dZW+$DgTIDpaG`Q7h##8b!4_
z$TRo;j-Nh!h*SRd!^|A5Xu9Asicj6a7k~U{{A6aXhItJQ3=I}-p3s0EJjQ{}URE!i
z!`Ml4S+#r?PxO|!aA^ade*dTFit?ZowWB99Ts*?h{_E>>&b^XPUcHF=`;TzVMf3Q<
zKm9XprLnyCy*D#$>;tHp@!a&+AK{`(9RoY=XIpC@`RSV8qs{#AKfXm5c~Es+^WM$0
zJp4<3^~eF%T>nlsoIQ*Gyl*GJzUNWaj-SmhfBxT;RF1{#E@Dn)C!hK0ubFoGrM&L4
zwcPd0Pl@y}&))q@zVrJX<j{<ocskYLKJMPq%a!My5?d-amdzJT=X39RFRjrWzWvp=
zG2y58qIHo6@4Ay8?Wtwe=ikZr+8VBW^;JCm>#y;<7kaqztsmurWfQpb`5rrGQa}OY
z$QDo%=fqzacFvT{_hiB65XurBXy3uNzWEIX%5hK>W^3YKUVxgK+V}_OQB*1=s!@&_
ztx@?LDwiiunR2AaB?W<t9>J$MryQ@E95p!@jv{hExl*N2$O93j;j*1c<?;@wjPq-t
z>qt8d<4>chpePN;mCyM3=W*L*i}}(QzCpu^n|b4Mh05i~Q>7e1Uh)_XQ7mb4Xd<OR
z)ljIZW%-)5OkO%n`_@Nj?H&>`8PO`VA3n;8$y2CNP)@ED@&ziD3M!YQI$S2#SOY{<
zDkY*Q{yi|2vqTawaw|D<oH*a-s8O0qq)^o=!%-Z}k%n9j$mgla=PkXV-X4mT_*E^s
z3Trpr!jk+pzW%j4x&9p=qfp2>In1~Q{N?fikjvGet7RgUgGzjxSt&&n4O6Z-d-^kS
zPfZSBMG|%^5z0%>vhs0YZT!ejYX07XfPhRaCTk{iZF`%=O=aL_`Cc(5S3<}*lI&<l
zPk->*IYo?cq>_k}&8tPk>Z4#bo2C`t*uv)_mCC(@W=Ojt#1=Ne-dkzU7>^9!;cYlE
zPFMuURXCMmDWeD%G6bt`{>qBrQ`$7;d+xJJxkFHyWL8>|ujAR-R$8v8u(UPazGiSm
zi<a$Nf#BT^l{}c@(i=O_W8g+%3?uy}^kMteIgKn(l#nHBbvHdBA=@5-FZyz(ec4@?
z{*<i#OZ|=DXWn7QcC^wyoh_5f@MTeD<CoG<Q<G=7Tn3d(a;Ymk6_Br~MGZqPm#11O
zqjEVal`^_kGius0-ul{e`L}=i5|vXn^8O9=G3`o;hkx@UzW3mM@;QYn)W<nB2P&7N
zCZCU8L01_Xu972}0OS-kHF<^x%j9yog!PEdLr0jfd|I5h6DL}*@(MPe(ZFZE^6#9!
z>D^pdlP8zULsTZxd9N;rszru|bEqhSTn?pbsi|*b^@ST4C~LMpd_TvAKv$0PtM7b`
z-)wJTaHvc^mt!<A)Z0Zd%9AVP(d4MBt7pdIGr6pBA^PZcw(RdsY@J|#Vvw5JIz~;N
z!=;yBMoz&WAKpeGr=T*-k=}v$b!j$7XkNJg58U#JcTiAAK@}+I3KbN&YL%+Wlgla4
zWlH6^#?dOSe`;%MnRCV(u9#IOdifEycgF9=l|Cyf5ZpzPNgaNeCnple6G>W&{^q1o
zaj~Nkg_pWI$W2+sMH?>Wvhz=KKcy1*Zh4@a$9K{_>~#PDAOJ~3K~xTK+Z(QB)#~%O
zX!TNZfFk1XK#r9wPGipW35g#ox(+onb@6IeE?>l``U+1zdw`8sZQ|l9u4eg!A}<~|
zit?OWMsysYI$@-H*OR<hJ%u;id<~aebpsn$&Lb)piE78NaOqiWx^xX=n;NMMbg^Ym
zk(+M0mNn<B<-*mc#lK(X0gfU^HxE6so$^38FTAjmLp?=G<&tfulOiOSQ@rr-z0@wf
zn47M-j4Q8w4J+r5W$(^y%vf;=m#$yWRTs|Z`K|k%3@Q*O<Flf|p(8z<dgi$-Uoewg
zrJt>Pj&ar1m$G5w#mt*H8i3B$qpaL;5tm<b38zdQlctw2S{xi=^u!sQea?lPzhWMB
zb#(v?^!GD*+I-GfI-hY3HN;Qv6zM#sSiR;P=1rf#vCcj8)Gg%NjTf=`+V!-*_#(yO
zD1aSV0>p-~Prlm3t_@~^jVotUyI(Bn3&H*tk+gN9NBPQ=xJi&;As9ObVAI}wFi!#6
z?;3ABk7iPA#Jp<DkDU)8ue+GC4U`x|kaluo_2kv3BCDEWC1y8ns0HxYzsd;W__c+v
zEtcFhxf$2p)|Iv~Pt1yQ_Pf)iT=~*wT!TM;{`6`BvkK#_GI_R3{%WaXC$fE>SAp+O
zQY+f>WWIT{Wq;wr@g<$S>O|;3g}M-<Smv~)XK34}w5`twTPxetvyHdCJk)_`>uRaM
zpKKfFb5dSwn}i{mH%vUn@gpP+bmqxrZL6}!2>7&p1)qra)>cwZ;o>HSr0(tVgs`P6
z?Sx*9eWguJOa{8M2C$#}_Esmt1#EvCZ;nK`>bUp0KQm_8IyPN;KI<>tz=DaR=<n{P
zFnJLhFS~?`&pi#LBM!B{L^Ne3S6#G@O&eCCP(+myW!1=n#iz6RvI`hDswplslDN#}
za^zGF9aTBpIl!8YmvZTb4a}L`Xyvg}MHPR3>8U4}zy3NdS-YBxuegL6O$GKm^#Fsj
z)^h6&S8(~}*KppF$qWt^DU6!T@)c{i^1?HzudAo`Pzx^|9m5-LzMON`uH&3#b0m}k
zaiLWnYT@Ch4=~)*&bA%9>Fyn5xLO7@L&X6~!&M4qp~C3QsElEreBc2pImM1`+i2|?
zpg7RywKcKy^0vol>8aAbXFI!_57FP(pDf%+gBiYHxOXpG`$jXbE{Cpm^TfV9w_bY*
zSKa(thW9?t(PA~Ig%mke<iS5aN-kev>+{<<c(lY|v7dNy<Ki|y!FxJYd4%lpywj9m
zOxsDGX&ZmgsHj3u_aXK*cO);gQ>fu?w(jj_-mEeF>7IMo-O|G0o__kekI>TEL1}o1
z?!yOZKitEMTb^Wa)J#s9+QjZ1JLu|<SFc{$zLQx?mNT)o#QxSpL>dMT9iV-vfhDI*
zrlq-=m$z@>U}ZFOCo1mz(NEadJw$bQkRyi<($U?=VE-}NTU+Q&etT7!;#zBR4D-xm
zkE6!TVoF1q7q-8|!PW!pI#^`>oQXW~hhMY3wTq5}o$P$}8H)8&m^-D3y)W(LaPJ_+
z-eVkWZK1zZ;#kiSn%fVtd+QeZ3X@nkX9|a#_prbH2$cvoZh~!<fg?P*Wd{>xokCsd
zFxy_-M|bA|w!YlM{CP9@^F6=d(Vgu?)iTGr57W|qn4zHo4tKV3=x7mi#K5s0THD$g
zszg?{j{SQWZkop2@dfrBXlBogJIIZnO=DDM^cZ;bp~rZ6&r1x}O-6U^;^C*Cr@8eY
zL&IfyySr#<@1&wNxg4~={2~Lj6Pb|f;kUoNhlAZmIdXJ>!JZCU`*JK>dJ4zdo9P}H
zMyV=$U);@vxr-SS9b;c}GtWNv5|ii7=h*JY_|<*SP^v~0dymrA)<*x(5XZU>(b{nY
zTiPWvAR~*I2;ufc;hfVHbn@1dGJEKh@MCR#7<+3&uJR_q=)IuO`MAHLqzy{BYsnaP
zAC8DG7e`_fvW-1AX>zx&l=V-P$zzG^34Oa-gz)EtZzW<=hOqZpnPlY0zt>7BrwS>n
zk;w>YXRYp(`Jf~zt4o#ot|j=37_~L1T?J8s7r|VDO>Qtb8TDh&!#(Flsq;>c+Ve>j
z(zFRx_>^+jl^Dax;%hs#8Wdo%=^*v#@oj&#LECsRNiH=$#?O5Jwk$}wJUxJUqS?f%
z>+5_kloh4ZI<sw@T+RkQFJ3<-?iYpr73x=!jICTOYZrTRFQDg>p6@9a!Zr?_FU6-_
zA8Rk%T>yJmz!DMXz~j%h!P(Kqy7W)mEU|Ucztr2omKP2%Yr#Tt{fBw!<rW5d53#G|
z7}KXT@c4cAvAd;>gKe#B-PO)1r%dC|58lhpea*DAHM3)17Yk0A#<LInhUfNm(sA$*
z{e8!1Yi*&Ye>f32;^6+>lpCf{?%2UYPd!gtdnW?}19Wz@ainjMfn(jYwYAZGxP>h*
zcCv8M0)`Jav#Yt2(qIoyJoy}BW}L#P=qTHE?4kX@URwJL%$;1rAMd)8eO>*ux3#l%
z%NA;<&S&Z<&CVS!(Kk3uUr#r!t!)fdO7tA*q~+jYUf8mQ!lb#(oid7+y)V*s_!tAd
z$7pRnK<`k+g;P|ar|SqUZ5`Hry$5%*<6xC#OXe}$)542;+bC5vx?1AckwY|hbc0fq
z`#X7RYcmU$E~SZncI@0uU#U!~*hgo3E8WF109se*?mo<c18wZvx0i#>FZ1I5cD6qG
zIQ7#PGHU2$zW0;gGf;t{{yq-2HgjyKMBmXanp@gw*|U`c1C1<OG@HJIdwFSpCsk|m
z#Qzp7TI?g7o7;MSQ;)ie1cd+qNkh)qCB|max0R~}!T{MsW$QhKV}j7k(v_6Ke8!Do
z8IhMl*@Hu5uFNMKwbMt)qniq=G|gmecdJjbA?eBS61|Jf)FW_h^>65Ct(bGhh1_<-
z#Wd99*n00zxa0n9$%6#i>0SB#JvzZ)NqX>s$xeH53X%nkti7ym<&+u$st6rAnmReJ
zlqrW&it_pzk4Ya=iB3;3T`8)Vd7IUxfNy!rR-=R4E0Q0nOSdxNJPZNUflj?xR2hC_
z6(|YVi{Wi$`7~af6r_!XTwVZ+C4ody#$Mr5fVPk@TnOE{)eO<sz=0iIVHILoih1!~
z)@zenQUs>3M(9%dlRWR3%slO3h|r6}9xX0Q&k3;rY0i=P2>n&~i@`d%VwQ3QV;Apo
zh=aZ_`oo5C7s#apn+vS-_pIq)>};L-ewLwciM>eN@q^XViNv5(u1Flm%NgQtDm57d
z3HBdae+&3Q2V+Y^NJkd~d7$oM*1=~6)@t!YYRX>0Nm1a(mqJ{!2;{~KVp<8?XXBYU
zmEmVuaUtntX0s&UA?akumdd-KGJ-bv`x^Z^CA+VaN!ZQv1c~D!FUcGj=p_|LLLccI
zoH8zI!Q#ad9Y;KzVMP%Y0Q8A^JO0ysT!02(nvsY`{u{QFv`4<49w(!nXEb?T%9J*y
z3&0WdG=ejP!ILT131~}aHT2M+b^NnpW<?=8aGYL3I*xiUwrvmozat8st&@?i+ky-n
zq@E0oEWM4?UnVqP-ZX&vd}c~)ll6&sC4+XV!3gn?YM4GRZK)Bj&&cw%<AE)x6Uk!p
zlkXvEe;(=lNdBK5#7*de$P7R)<*UaTBkk|m@}_l0CQY9nKFz?U{@0OlZX_PP0xwgq
zj6hTB==k_LB3ybrpXmDWF#32n567pU!C<K4VB}Yp3r;5IjDRbtm`wf|aS<WgN9;BE
zThAxNvhY$CmXN{Y@i9jyZ8GzSC|uG-{RlE=x{n1Evh}4!+gM+**jaohdh^Ce>pc?{
z<WuO!@uHKji$-o6mjym%r+k|DTuuXK&y6IYHA%vEx-;L6Z<xlxBAtbUTrL;iM^6k8
zw{z<uW*n7^cdcn13x)ZsRPy_%+8f7>7mtz8<rDeLK7w4_FQFv?G#*0<27DK=V?b<d
z*+zVi|5Ml}qa=UEk&K>k<7qjbjZx)Y)AkW!p(`Cbk+enJLhOqQY+?2fq0Rm*Q@ELp
zuFBW9=&KW_@oc6IA2^#zoheB#c#=<hf1Ira-O$ETcB|K9TqS5|N!0u?`n2+kYZefc
zZR7iv^5rr<V{D?8$FJ}I${*h{tAcTSg~aMs9<NBAJYIDAyr(lMud|nx-L4cke&o-E
zA#A6G65Iq*dDpZM%;2`sMG}_S6+m$9OsgP<Jj@~cyTlQXpgAcLhBk2;`j?F5Ko{|M
z<(>EXBzkhVXnfNJg=sHWIz2x#AT4xf7p~1PT6^(p+eyrjF(BAdgmyH2+-Lu<!7XJ8
zNxU>ZZ(`5UO~+i;el8hvP9`Ixqfdt3jXpzLN$d;x34O`*SN6Pie3|Pi?fdawwO~Ay
zcf!RE7AwRlXB44Fe-CXaFW87tq2HMCMX+VXWGdDjQwFOw#($Kh75bCpIg}eMizQ{{
z)m~rU7bQ(IzU?Sdvma+VQI+%!<?(LwqcYzG1EQ^#3QzPb_aIt-6LPxPbo(JyF8;mu
zoI>pJkADj_Cj!~GNlZ7cjgq#x*p~4aIs3cthW%VFpZvb6qk)lAYt(|pr<!^xI0B8@
zB1u{Q@s~FRXyzKZ<^3Pyyg5zioaQHA`ZSO3>kS70pvq|8|H+T>_;)|UGtGnIHjAcd
z@#TExnsI#e3qPe|myC@OBTl{GM&5nnS}Mbu?mZ9jg>T$RF^ZV=s@L%5bB6fL*MCbS
z5}G{xClX_a@Cx1JUlr)!M9aQ0Z{k*`PFLEgnYz`#p(&PmL*rx&E6kJqdv+rpAu;zo
zOS%(LIR47X@yE`bZ3OK{SaY%ZKHdz-*HN&FksSadPze5>R9`aMjcly+N{-Yqfewyi
zDFZ}m1gm(u2)hQiL2>#!Srcmr79dZ)^D7wUxV`=|Iwwngq;|(!K|el~(90}EFIkQ>
zR<=)O$K@;4mBGs6Xuu4A{8q>BY`>mlT;cnsHv1nlkRq#(TYn5Sev-Bpaq`OW?=Lx|
zeK|?6|DQ#`D_K=K>Cs1ENhdYM$-iXVp`ppuC*w9F-I_cZ`<cS7e(F;G<^ykIY<-@(
z=rCXW?7z^$G(Ph2|Hb5*G7Y&tzVzA8vb|$4xMQtBjpLnv_i+}FEs!5Ih52eXx4i2g
zs7_kW-+cT%jHSf>r+>}AeD{w`S$-*>df%%V=p90LJ;&ew({~wCdBF_vLzOBSL5tC>
zFt$KXoLwctD&<7>elSfjZ{1CtU$>JFedG>?RUP%Y5<1enYVC!T28KB6%q28Gdp{3t
zJ;=&SE~0c`8=bustKiYo7IEc8tLPqTB$pFUhX@5n)dp_5VJ+YO=TEV<wU>M$M^!`P
zg!x>sVj_Qj{*grC5e<_Tal@tyX~-+8!^gPi&R@~mUtz|ntJtu53GF+c<l$#`Q_;e^
z%&bFM?fX24V%F~85-;k^x`C4=hiNYqX!Y#5m0>L`;G{zec8!I-P39xwq#&MrR9PMT
zfn9N6Kc^8~t}bM*>dU|h_Ez`}4PYskPm)Q5-J)P^69lFN1*PrlEG$pKo`dXiQTm18
zH5okIT9XDp(O&|vH~%rb`V$t9N~j(I<Y^r%q0)@o8AhLmF}b$}e}9jhY-jC5au)H6
z3O@=m9l(>v0G)m1NWyEX$j@{r<>@g!;>d}5<l7p)9ptbc3!?f;P1;Vj9+7;@n#s`2
ztng&8B^|9lr#0Zos?+XlNT&4RJDsB|d68+rU{Kg6ltIP=GR&2eC&R8DUvf~9i<IeO
zzlbpnc4YbHi1y0xAA=czjYh7sPEa&x(ZV^W0V*et7UU$|Kt3;y<>;9Sfsk46e5Nil
zI?19cvy@KuB6_+p>}r*)Y6)iw@ZR@*gB2FFTwg0AMt~$=q;c7-7G~~G3|L1gN{3$J
z(|`M6qN-;7TRzRTYZvlQzum<b{_cYemWDa|#*gxbD^~K6?>tOi3j$gJ$N0uSejL!e
z>behcS^Xm%9gMj7);IF(?Vsh2r(4PAV*RYT{7Sa}>??frkwbjuOaID}iQnP5E?*Bh
z^Mm(xY0e6hV&5v$+<H;orDSPT!Rcq7$qQSaWT+Aml?N%7B8aN2x$a%O`O<|v_1Kg2
z3`OKr#EVaFVf?alnL8nVT}8fO5+C^3`x$I`iD`?M)0FqCPG`#!JKCze<DGBk>Wfw}
zDzAw&lzQ9Qy1U5vYnLbQPf-l_wsZd<|G*>L+Bs*<ndE?R3)b<;zrKe3JGQax>bG$D
znKL}&TO9?3on*tv?t)4S-!mX794fV*9Q>7+bnNIS1b?&CE|<5#76a~_P7ULdy5K_z
zIpu8uL*5c`z)BDkTC_KDo_eUjz71X_*G<O&RI>h>W@5=&2|l!0ik7ws_?C&$`oB4E
zZzs6{t@f0Z#DTUl&HbPdw??vz9G(zc3M5dQwo<3&oHFf$kPVWglH}4$&Ym1#{4Ts@
ziL;eG=Watt84@Z9Fl%3z$?Y107qWmzEpnS8l6caXvZGXtrIB$<%nT;G*jq$GCN0=a
z$tniXg4Ylij13J9C4AvHcD2{c5SFEkM5}}TTC}yg4r9x-lRRkayS8vJJPZ07XE$$K
ziB&FXor`m>a0z(FwKX|ZI|srh)DVZ9Muc_*UM79){y;A_jZFj<;iA?x!uXV1M2x;X
zJ;*VFm=t=@k~eQqM(pGHl4;}xyhC`2HF5=4lN2FhOlLyv{7T5KLcEh>X800z43i(y
zSn85Ynh%TXDKHa!2A4c+HFww&dF)n-<eH7m6i<Qyja$$P+F2dzh{nkaxb^MtWAjqD
z=a~bj>M#R?gGA&xW#&Zo>}zu|B}CF%qoar@s^^Lgi@5v#ZA7^-EIwr_tFM16U;fI!
z^5NHSpayvA_rGD@Wv}PG?|d8iefO}lONpEx)63b&LE9B1^9~ySS7){3LYcsKlw!Td
zp9_UN)k>A7IcM?3Z{5KsZd!>3^7<&h`_(<{+WQhO>}dxD{XJdu4wdW}jbj!tgT4Iz
z(Py~p_xDq(n%DOWohr!XG{5-L-|*$1J;IdJFXMlI=7UTdm8V)M(RHLJeuH$vSarBa
zN7pb{UVR}y`S*XNxv$FUtJhK}^)Y+lVwA37)vFfAMV|;nQ<%EKM*Dmu@|*ujR5vqW
zZwH_R4(nt-keCljW(e=1k1iRp9JEa&(V)b;s0_$?_tFFVN?A18v$wOm8{J${PIFQ_
zF4(lTK4-_ZSw!1F)N<?q<t!Jr_EUB|w0X)X(Xr%5Yp*snV=Kd}c;6YK7_jGrq&f_4
z6}HhrwdN@^5nGP1?W4kT+Si@+Gc9P?3Q~fTyk?XZ0-&@f&IT6qw6LY~bv$1Dgs8lB
z*3G<(r;@@=>6m_2ESS9~F1ZL^gk4DzhD<^lXMgXN4#_$5zwmV?hK52Xw>^A5T#U>1
zMhBL2<f|;C-IN$hDBa824ojR5$W_p+%0VY%nvvpS#^+P&&9^0P3=xNJpQEv>NJ2O|
z(jS%JIre2&#Cvh!#iUtXlKcsMm^e(@dhgS7m9~?~`UY_<OB}F#_<rUIsGSX^p0uYs
zyx7x1Cd&ivyOmp$V3QJbl*Jt%CxgEBuZ(lNzNx|#AIEJvSISZ`G3BlFN`KaIESQ+G
zI+G6JVT|eTeAkRA*ut%diTJT%1vn+{diXvj=}4720sP;@Ht{WrI!i{xoZuzh&V3#_
zc8I5+c#P)$dX~(cko?+Gjx#U0h1G@Uxc#9$<V>7sXI)bmk6v^hbNXIj$Dw{8&*%yD
zJo=+=@cxf}j>+d-%c4m&EI9p4hFW*<=o5dYY4+(%ADwe}mWY&dpOcxcS%L-3lBbz{
zSr^jMFmm&KOzou?cd=;cQU>-v!?%9^5ULRWx|c2wGgygSoSLA}Mo_3$i$uA4@>)|@
z-;g}mkdc2-C|zRv(+}})|MEpzbJLhMp)t9pT_K<~s9HAN_I7$7`!Nq}?*uf1Lq*zO
z-ochX|C!(a@T>gWFQ1N$ApF5NgYnOJb;|{`O5`_BdB%p4dc-=Q4LwUwEOX0XFgk{0
z^Qj9%=Se|WD?vB;9v3S~B2n&^qotKxle088cEVFa)5zx)Y+muEB>S;#gq%)y%GZHJ
zY%_zh#g6jFRY7xOBfPbeByl(z)&vMy?70wN5`VUOaS6fDC86Q`!;R};(U6LR$Ltw4
z{sKu1Dc^SXF>~X0O6u9SSIk$!k2dX8;AbL1d3twkd^;&jso|1ta$XxfDZA3?$s_)=
zw$?%xj4!J&wU9ajyPI}aS2kHCyo%pCvT<VgwMo-mOZl=G*T$34ZQzGW=*GxzH)tC@
z3Yk1#^z^Sqdl&B#OFSNJVIXWG>}GMu#I6`aDd)46Z-Gx<X`Wz*^Fd{Ki|yi)kcn->
zl7Vv1e*@WL9P)kK#)=oOQchR+TWr;dOfo+xKSm|qNLy+nvj759zj)waK}T7=$ZO6I
zgkE)kS)R=V&I-PjK{1Ot%IVA$v6e4Cer!>a=)_^Z!*To2f<nk)J={3F&z~)*lz#!?
z(_TBjj?0Sc_`J3}#5iO0;3m+qANdn+qYJ~2p(Up-D7^1>k|6v^j^oFo?<>}Jq98Z8
zsFQkSu&;}4&pgT>9^S|5)k~;WV9~kPaP#UqKKs9KPk!Sx2Kt)%Ch7{proe@lU&*ug
z|AB#uB1b<hEnV~t6d4{Iq@)%3d?Tw?E#Z-Se$P{nKFEuG<2Y^Zm;}}|W&)X?Fk#}S
zP{B58jpu|mQ3++l{-*6``@jRxbzm>2Z+<gZp1XpTXD#LBXYS|Edk%Bn71z+Q<$juv
z4kq!hfHE~xPUDu%7cqu@p5M8P@yjmfwHwajtg}`zuF}kJAKvEZo7r`g<A!(t9XDTg
zE*q}6lCixz`0sn4Vz2_`YK04~zKOHvPocAU7d4Bo;_q)ekLIol%a<?Y@XNc{*V4)A
z%dTPZ>}f1HV<l0?HufDZI!6`GXGAr{fKf*1rYbn6grnGC6STb<g!fS-9}#Q@#OyM>
zDM*6x{Puzm?enNpeJ%C)y3sXekC;ndQYYE;+`|K@J%ErpXXqM$cQg%80=P9+FAiJW
zJ~pE}bJO1fy#4-vczf?SIjSq~`+K{4@{A^FW;7a&az-L3Admz?5=bB<@{)rwU>h3@
z_8PDKEcUYAUFS76UTiQXnQV;7U@$od0Tw9dobyPdi5==be^lMNp{hq>pZD`NA2i)n
z_nv#=@0=UD+C4L+<rblyu&zl%4r%zkBY~k0Gnp9u4JC;@ai<FrtqT>n_IVC<Mn#v9
z{>RV*qJ6?yoPAA6sb}+cF`<FmPvj<jm2kA}4ZtvcME$iUFPc{jzY0mHe}(N&b`hcD
z#g0*}3q{gC!=!i;NXrL=WpBem4h_SPgzf90Mh#ywZKk(x?Uk7jBb+fz8(Oez)D<eZ
zXk1YG6k#Ff(loJ1wKd20)oJQN7+7fgn9MVV&9^G>wfVvbk+%KJGoo5^$_6S$+X!YC
z=XPA%aj%in)~9Nt@KcQ<Y@L`kQdbRN65t;@Qnoh+F)`M}XrV3`eq>GpOv#k5yMCm6
z!sJg~40WN}g}$pgY55{}qi{iJ8s^G$SPwY%mPWi<nh)EV_@Vqldj+Nsg?wwj(RoSu
zEBnI4JdHnAZgTBt$gT2+YNzsz8|QtGIgRgs`>RZydKza<8Ngp3eTscWgZag;Zl`<4
z4n|I#Ok=Wz<(u~*Qbl~_j(a$7P&;paw!!DUxQW}oaXJs(`y>Z59zuBRZY|>auUx{o
zNmJ-`U=h!}wtz#)MsB#~0!AD=k+HSyJoe|m(UwbCeEHu_MI8>razwviw+*8Y#j5I9
z1(Yy^heSaM4RyVdnRfQ>Z6PZqy=rPnA3jKTCWkZ>LZXm{J`E(&t?b-;h(uv24Rtki
zwYLFTS`M`ZRxm(SLP2RcwN;g*$g*$WUOLmcP`I+n8ftr$uz$}^x)Y`J>0M1KA&@f5
zo*g^s%z6}*R8U`Ah0Juae}4<<oM+LuG2Fcnd=8i6MR-lObUue&qO)wvsj=m>4xk#J
z;|c>eko4z>K=#XJ(&($lF*Alq&Yx{4*-XH($;QcM@Mt8V7{)9;#T7I$-!8DgCn~Q|
z;^zCNSDe~how~Go)N@QcfyHOk6!A2zxOLg>$tQZ?$LR=|N!yVzmTtZ{iNBnWaofFQ
zm<*%7s7l^^@zleZarZ2rH0VZxm>z9oFcZxpN82XCQ<R2UoA6(8Hu-}5_B5EwuhXYT
zSF$de{JMPQ@vWYGn0z)jF*=|v>;l~nU3V;lZztA6w+Lz35Z0rkTOuAozXrc?nB23>
zs>>1LFv@%sf{j%$cS7XHW&Bh}2AFEXuVZ<KzH43?z_NeQ{g!)ZK|`0CM_XTEAsx{4
z3Az(pY3a<(t>JclV4Fm$fSTG`N(+;;x3;kVP&>(jV(J^}DGYv_z4OpM|JRg&y5Xnu
zwV4I{=RL3ZvL=hDt0`gMzC(e|C6WYHb@lWr671c*o6d{}B0+WU1}cgZ9NfEy)~-yb
z)3Bf(GJJ$Ig|;A32EqguP)nh1;m~t(4f;u?yDtu|J%5urPm&04#|OWkJPpEba>eqQ
z7+f=TX3yFqfe=U2;RPJs90_ZC;<HD-o!ge&(wcQROopI^OU?p;s4ldMV~W*0m>nx?
zo;FqvwCEjKiFp&g*@hUP@3HYwHj4q&p7@Ebszv*|A9|WFL|BbYUOmfol;ZyHUq@4O
z&~uyr9gqM22%LA0^}ibDXq1(&a5%d0dc>QfdHsK)za9#bFFKH_c~mOM+s#h$_N|yg
zFRr%gsq0bS=z-=J(QVv>Tw*<P=!kM0DfXQER&Fp0z@RXaBASm};%=c9o5y?l9K)cs
z4igq?82Zt<uc!P?(BEy7NR_upUJ3vJAOJ~3K~!?VRadh3g@;+Yznj=sQ0IhX;sy)b
zNaLIOGAQDX(qYWy)5uMuGL9TfgmC#aZW}Givo!Q5q6yFJXBI$tS6q4)EpLY&WHC6!
zUM@C#H>R4izQ_>w5_z_idIA@N->_)IxGeRBNM8Pei92r5Ahz&0g5$?Yfm?7KRo@u-
zd!UdPck)IkQ3E)uG%`@6xlMFru47`s$W5_4x-?9E^$;JfznaM?+QpUgC<<KXvq`t-
zBG~|9cjA|)`20fQ9AU0!d91va#}|((vE>;vj>b{RlLyd4Us1Yo3c~yag7GFFBY9Uy
z;smkrx+ek5gqJTDSn(C3H0!)()Z;WAVXe32!L#Ub10wMf;HK2sAhP0P^mTKYJ9du?
z<EmM#Pnmk`iN;Z2(NP1lmPxrwH0JZ<^*DX!Z5xe~Sc2|zNpNh1Q5fdQN%>w!Qi&C7
zdD_O|t=h)+jtHkZj&vPh*9#i&HL+w(ZSOn$3RuKA4G{t&xTVe#`|pJX9_6)t88u=k
zeH(fsyV_{)&fpgiF~x>4PRO)KP}ekoVy~Ufbj}w@m<5w^Qb}2_DpE2_CZ~Q%>gb%<
ztzA^`TLCQ~QCP~*;iDMXzmdw46s@g?$pt@PuU4(hLed5pO;4RCN*0wdeAH+LG&R!D
z&_GjD6Qv?ece0G$<ps2NbcJ6;z>re|l7Brcrlc_j#-U>+50EOUq;KyE+S)t7C^)UH
z@(^nb=ZOK@aCIvjU!R)`m(Nl6Y`-vduilyG5iAEa^ywy34JLX!F<dc@v=fG|^r~_s
zcUf4hlOm`*rN+Uzr_ZK1wVlnYQy{1SP{}|#zAL1@X3At*!}V2{|B(4ev1gtLGG7`7
z<9yqtZBAg+7$IFlKkgQ&ju6e8JC1eUNbzFu9G#f>XR4IQu3pUZoQ}xE6+CXcAcsea
zJQ>^!-8_#SEXa$2K~oQQ@v42-1kKRACa(&@1-O_L+CllTh}zWj`#4<F*8*P+6HS8N
zjr}MU-E?$kwO+zN*F{iiMI{9yM>gw4z}fx6h?2>T?Ot_#sVvCS-kn7x3#qCsCEcC&
z+og(W>RU&;y`7wA;-U5v70CL$zrt8;d#i3z*Agij8XFPm4$@hB_ete9d#hFkkK?lA
zFT?|7K*I*HeiZsHs4p10m=f|KL@1AKLlbR76+|&03Nos><#XoP76zMHD&|4r`F3e^
z0hfQFM>}{J1rgesH`9jF79gLf#x*x4v<~!pjCNWKQ$Ag<XuLOj+b=00RZ_*^0d=&t
z916pyyrvH$hYz8=AV*7UhcAaQPC{oREm#NwW)Udtj}hsOHpk4mmfOBOmkB4I!7qRP
zpNyz20VJN6Bb`o@$>qYJk-@cGHcK{}Lm<fIBx9zW#puQotz7Am&16WY(|De*piFlc
z=U#scV;aiHWOF|EQsQN^emm*mf15;-%>ax*bH8TE1=n+4&@ShH&gOCv1E^O?{RsZ`
zhqrOcxba+h^X=Sv$*ClT^!dqV$Ye4)v3ogkIge~MOE#Ou)R;>4iNaDwjy{&t&bpEx
z|L~ifG;utG8><;~@;t7cb0S_kO*WhJ{Z2wQo5k~TWYTGJxj-Jz*F`4l1;&@;a#=E&
zOsEIn0X;IAG-U%%<fd!pQX)KUE*ZWvY-oarhy;mgY+WNLrxtmG-1HrF222EfOrO%2
zubnh5nd)(LP)7sWEC}piixhCUzO8}MzheNr(Z(t?RC`CIabO!3erAHPdMWyvw|u<D
zy|+Nc{VeiBq@FW6_B9I;t0*x5Ew0*hfL*Er8e5NACpN%4O>Dk{_PUGBw?CfI9Gkvz
zBU04od&L!brYtUX*A5YS2K2hct2?IYGjU=Vq8ROMnqj<1n;#)G&ZN_6a?<mCPI?BW
z1TX24&7=c7^>IAs!RSdd8PmU))}Miop7a1oCY|Q2tG~_({i;Z3vq6rL<gyvk>5Pxd
zLE_YSoxxK_m8C?adhw-i+{Ey@Ql!jL+~*i>ylO6~Tn0}{61|$Z@f+9DP$0-;vv@K{
zq+X6}HW$c~^%JX<<T6>(nM{!9{b0>xe7;o?2$0QW=%$>@ZulBw8cH4iG_lXLxvy_)
za<9WX#KkawU^TG{$gr>#rcXHbH)Gqhy-;WyeW`DWnx5}sivA|4?h{*N<|){$m@-zD
z{E4a>UDMD!*`3XAtA><AdeSaf!!~GTqY7@ntuS~A`vw-V3pi=f*ZR=k4{)`owt2(t
z-Y^M>u&=2PDJ7zylIwr)Ywr7xTc{LSq=d@m3H;ZOf56PClR0zx1QMRK#+N|KUVt-J
zMvPg7Td}H{qwiBu@Rm7`<sZJvy^k)Sqwk-YJiL;XA9XW+=0%)y>R8%#EaOj)J<Fc9
zEW;<w<?=b>*}Z8enf6uO|L6IfaQcPJ9@or^pJ~!n&Bt-=<@2a4$no9_kMQQ=gIxN}
zTRDIF$+Yw<Val@kJn*L%=qRe^;>#~*Oj9|F-+hVa-dTh!uIAb+|A|9wIYtla&D+o1
z&-}G*oOZ$GOdH#e_H9eK@9`Ho)R_%sHy!7|wv{~b;C-YAY~mL;kEi76_bDGVkt;5m
zMQOH$7oL2K#amivIOa6wO=v{)>PPR~4(|QaUufybTEUq)yqD*me31H)XVbs9nfw2E
zA026rQD;r2X5a~Y=k`f>yFcf?$6uvG7V%G4U4`iGV))QzmcIEUFMYCwW2epKoYRh@
zbLT1^f9iR5v?rK(_61BCJCKfjn|SV-=UBhDouLzEbLm+p($<|KB{zf)Bm!oX9o^Le
z*Vfi&V<xT|#LgU@gzSQga_W$b%-=`aoFDZ|2LGtXBDh4Yq?||IyxV-ns*=+pSb;RS
zcG$N1NbsM1mA}Ap&(6j1oo|GEtI@9QO<Jyvg>fyE$03~0GLiB$4=<4d$ly^QOt$YU
zUftq6(k2gokyUeJT(hq#Z(Uk4Y%c@5L`Ml%c;f79xMXy-J)TZJJkD20S~Sor1J+21
z0xr4kt7O|d89lt2#jiic%m3KQansM^+-b+sv2!JlJ^4I44|Owa;#pieXCeo-?WOb3
z3Lg3E2aKJ5E@z%Nl-Jj2{Z%!O<*G~0p{6LuN3TA~>!0o8!mr)TMQ2Q<eONUoF8zpy
z9(kU_1+`pw$)z0EzmlaNyvp-$e@3pTmdh_bk8D~pY(Q^5c;OM=UA_|#^gC`EH7$!+
zxM4qy!zXjYSFdLB&}u5{2Jz~Xf8v9k5?RoTbFaCXy1E2UKl~s|HXmU4$#a-GteWE5
zW{MB3=Kd#MMAQuDvdhk=Pf41$UwE4LmTsYO)MPH5H;uAHnor(*k+&AEB~{&@%dfnY
znu09-stcK~>ju<B#q`iPABn6atM|@Ik7d<3e+oOByG=oC>Sf#b35)a6dBE7&F@0V6
zq$)x~e>J!6brIqd6$V~uT(j`q5lwwLM>Vn9tAZ?`D-8vju@mapl-(+>e9spq2pPd1
zDaM>YwN~DZt}>9Bkbk$W4ss!bbAYM{bB1XcHId}DPg!44@89<%ljoktis$d+)fFwY
z?Ak^);s2hzQGb$9%8(&PhEbGVq;X&RP%bdsR;ubySHD&D%^WlC6voxJvTSP`)kCLo
z<GkTKc-JpjaIha&pFN&r<v_l6)l8nd?{3y625{1l288ff^44>FyaW1F7KJK1{n9Vf
z@%gjddG`aX*nWtF=-`>(-^rT`m-FcF@8qtBUgS`><cur7&cKeP{QS=Q=|BAvCJv}T
zBnucjbvDUO@9?YN+{5x6hbgEV!<93ddE}nEc>L7`bOp=f=2nq<6_i#oWaJpmJbOB=
zTQ=jB^x<naUdCt7-OmFrt>Ch2E~dOdP+Z-}=_iln?LXhgJx{(yTX(Q37U<B|oj@d#
zBoYaPNRUhfKXxk%s4DK}>Bk<WVAKr8_v?ie2?mcpji&C^-1Y0<GJn-Bst2FM^>dHm
zse69I{5`c?cGiic$g*k0KX~r1&#)oe%%yWDQ(o1dYc8F^8&5sRlARrt6cwu2H|%I8
z9vRL;!k3z@g#QQpn-5S7gwy{K&OD@4GzdSN&L2)j&<QR~no<LUP_%-^rURRJ>VuK~
zU-`2{*c~EOR6cp7Gk`nEF+n-tK<ZBz@55Of5q$7rQw3%iYkHO%G94;V%Vw}{=}BX%
z3`9{pDLWWjxz@OWjm=?E;3_r#IZIb2J}BF|YSeZ&BnulGJec&*@n~uz)Md;PnkA26
zeWQtgJ*e8;6;Jg^I#=9RIBO@m%BT7UdFTj9Rkzc8*qmVSD2!G*ZcS<~+nP^eco8HR
zI`MRxI+ydCJMZG7HG8NUJcX~!9mV6n|1}@&ujlf!CQ?{Eh%aAu8m~QaAFI3uCJb#N
znebTh&dV&=)I~#0X`u5Yr=D{etbLu|{O$pkY-vG=4qkZRe|T-d5}vs4H~i+o=Qz-n
zW5%U7FeJN%U*35?4JVz$NkeOqi2}x)JcGg=3;6Y2_po@&L7&f5DYMU*$a}B7O{WLD
z*L}?0e|VhFKKeU9{*RyW-pcKOq^zupwI4sv-`7j#PM=5t9u<v4nL4%~FFo>m9(w*=
zx<v(--f$hQ3t!;35C5H+7hgehWs3Iw+j!&E7kTl$m7I0i6*N~AGV|iADckiCPrm#S
zg%y?I3Ql+sT|G?I$d}O=H4S2eaEMh><40(BBPgUbfO&LB<4dHi<u4{xY2CGsU#L6w
z(zEa9YSkyGP0VfiaFP=Gsvlh8cCxf-N}-TO1)|oEm6yU9im+lytumUs1uBbJJgM6u
zNs}hVORw$==bnl8maf9kl)5Gi3pi;N`QeXOgheCz*M)#xOJ3)9Prrv3QY>b`pnjZm
z`q|7q_Y!{j)9+E2^1`7NA1S@1C3Qt<C@chUu-rllr3_Pnx=sI46FH}MDP=o8W%;f)
zhMYc@+PX%r`sz&-RyNUV&n7B+rzmLMz{+iVss3akhvp;^fM+EzpQDdoe}!*cbuIsT
zYAY{3b|1SBv?Ha5l;C+Do+ru4a>gDrfc{;Ao0})n+^3fH_0_Pio6ZAk`DoE*Iy$p-
z0Ew=(yuBvNH@@?IcCPw}`yP9Vbk;IwICe=@6Vql)W8<bb`N`WKkm@yoVFT*nf@_!}
zifOFNQCgS+DOtVbV>a(;C1V(0DaTyybzeecSi52c+ji_^>+TLJ%2Pm^T>ExD{B#xV
z9fxTLj+ro$K8;OWa@~y-Ry0tyZy%+Vja+c~)g<<BBwJlaRcjd)m3=Ah+Ro}NJ4lu+
zWmcd1h-2CnMJ!X%Yi>77S7ui?N(AN=<yY=XQRqp+=;5qTMBWEQiIoXMMjhgDRg`8r
zMo1a*<9<y#P6yG|AR8RjY*Gil39(8UK_fN$jjZ4(KrsQ=O4CMRqRu0^x-{&fUDTM7
zyFA9Pg=%2apW?m7XfL9CYkgVj3iQ&`#=h}E=~kHc%`>3U-VrPhg*=KedyuqeE#%`O
z{@T%S61kJl$Md|rj%~dE@oL&T+Gq#HOdLghLlc)>|8)v_^`T<VE^6u|1+80Hy>&Nb
z3m4OJMq%JHo>fpFbA0~Zn@qau623p-5U)M`FnjhNCg*v6p%UP=C(Aiz<NyXHO87?K
z6KL*N&(69k*xp9RzV&>(Xanu-T_M@}5tA91TgQ{@_WCg(rJpN|zaVJcw~<wAw&IoS
zVA|MXk_kx2Zk8=t!S;QvcnLvK#ZX2MZ=x%C9)qR``qfoaSy9fQDd#bJcoFM%cG0V%
znv$Y22Gp0ZV*VO7ZOXEC%N{G=sW+C53<C&N?N~9aSF0>rInT*+q1i@I46qR~HYXn6
zoiQxB&Mt%5O1E}mT-B8dlhl|du1kgI#?7cS2L6~hEcDMz8#HYwwg;7=el=yEH(>#R
z#V<DT$m+k7r?C14+zT7uHIx{>U`TAy69Op{BogM7Op;7wdHd-{_~ZOt-2CIam^ie8
zzpZZ9G65-DxfqotAydXj=e^KIblNO=_gQ}W_-EYk<GVTggxC3S-wxI-+QskheVJ^c
z5V9R~l?_3bHBeVtKxK13$`U)nWR_0ns4Xoek??<JV$Z7g_&>M(o#rW5@ZAef=N~_Q
zj1C|R6cwZpLXZ%hY}>M(jUPYBE1zwlps<jxwnI={kId%6Uu5vlNObVmdw#|fqKtp}
z<<B_b&G&hC)BdQX5VY=G!GrhQ!>){<Zi)-{v1#=R9{b}x>}=0bkdPee$j}UUIsFVg
zbD0{nbLRhss-De-S8^VnvDMPDe-~>P&F2sIJx?b|M7ERC{?n-KTE&m=eT=ae{xcUh
zrReV5Po|)n+OkrL`t+fsFcDc(u)YA|8o0-o{cR;vrnrNh)|hH(TxdtO4qg-gZAD{f
z!gB`JHJg2X#v#zTe=$l|>i*UY&<Fw}uj{d`_|9_}*8~gWns6JaPe!ooV#lEE=82f$
z+{5&N*x|T6>DI1!ozf=;#G<s~Z5{fZ=`UgHo(?i=o~;#v2;bT!mJj$$go(*5hE2PK
zvWRG3GxfOR#}~Fvaqj5Oow&r=B+{mbSjzWuxB24wgmIJe$f0rTz=55tU38dx|M()^
zBq7^HciCXP(%#gT6;j&Nm-6HRwZ=duo1>(p#Q))LVE3B&{P25k)9<8<`1bj;_+;4w
zv<Vxph3I7a*6r+E_8c#Lyq;8HAzkgQh~j#rm%~Hj9*JU3pFNcYum6FzY@iz<ke)}f
zpn#NtDb^}eXkhTXT<GJu>>)O<U(bTaA7IJ$HVRS++S=ge(TzO&@Gn@E9>9bNqev#Y
z+1Hw;p?594cBZMXtyEwU6aluOV*MfJfWPf$N}jx?jM~i|Tc|iOEFMEI7ONB;r9Y#4
zsL7y>E~t#y@Gpe%6Y<?R`OR@nnbf~-P5^}QGoX4U3!THliE_ZJ5&I@BHCp(E40Yed
z00ZmY$DGMPL24`!6$e$oD;>psSt8T}0wEHlQvNPoDwQOW>t@BO?VM0oLs4-nm8B^<
zx-y|<)R@WY+S)rpr6|jp4khycFOB!Yc;7I50{y*>ELgLXbz3^P{?e(u^z_pd4xh;t
z7o5eZr%WZae>F>2Zy-}MmK&}<pAk)cp?y8C&tC;Xa<E-;`E@rip{a-ki`H<`d0*ih
zS6{%?W1D#6xo23hy#;{oWCK@Rc0Qw<OIWdN6|2_mV%Ei1a`x$`F=^~DHY{Jr!Aub+
zopd~(z4rlaS&0ymRPT{|@3vc*bNY0O_blQ0w?3gG<Dmjy1!F;Z9VZ_@kOl93Oo!*|
zDbv})-dr7Dy804MJ7p?^dS&^1#d=B`hcT+An~xT)$3rj8+R4Iq#*&(WOdL_lyB{tg
z<9XB!IEF#Rt$eb4D<j5DqI2UvSie8Z36mzV>GO}+)1F2EhxcxyWbhQOxae$7ojRGK
z)=hlAY9oVBK8H)ro=$yX4!m~Wd+$Tq%Z79PH5W0ou{W7RTX_HDWn|3~B4Ez{qZ}-n
ztlgLsffuXQ7ac(MS-HqLG}F(puh}y&V<pL08gh$a%~wofGWf9yN2z2D!yvR%7>q>0
z1a&<GD)TH$yu!?HoY{OvX)9p#c?i~~3}kncLoBE(Isve$p5|7yo@k3GEv;&+2oZ`w
z_Z1K|{l-3{roNd4mQ6$b61<C^y^YXu?O^FE|A|R!Wx*-HLmDBkwr^YUVEBGSUvc1#
zZExF$i0CU$b~{HK#bkKh@NZuTV<lBv(bkPxnbpKaBFVTZlUe`C`|Rt?fE2Xu-a^^1
zQ@Q-YIh=aRR0<ESXW6QCWNVJ$+AGgzWdDAMwhg@X(F$^S9PUbT!IfX)!~vx&T(*{R
zXI;ZhU-~DejBV!qSDs_(mi<VeJK38{E;*ku1Ik&oY$dBU>|y%3moRVE3{D(7jIFB{
zvA?^B3FF7G;*$j&?DDrqYKKndyyJ^`{P}n2_H;~jcXl&!@>!fSdlH!)8`ykUFlpRS
zmVERHUhkods_ACIqBYbGK9)wYpM@*8fJoqFJJ@zWa>-R!GIQEg#xz&5Z0YB;B$~MP
zs*5>cP(5B!uy)A;{;|4+b1%P!Y2!yzQJ7)j2OqHWaK?`}2aDpv#Eet(EL#w>PIVCb
ztxh4Lz7i9|QQuQ_kMe!<*oxv;Sd0r}^bmoRAFCe1{%Zn%R3~;Dofl$s=h6+{Yd(b;
z6q;@8y~b2sc-kwFdXlXE+I*{De=xDaj3gf?oIVcznJh<i<`kL|NyErf_`!F-#;C!A
zId(*UmM{94RVz0${i++eVD>b;^>6a{YfHn)mcm{!WcYBu4m6f6!<WQjrswN2Ea5$w
z0}_aY0O>ECClU#e(qB>+BKVwO&@p2u>^#JXnU~Xd=PTU%^5PKWyj%_`1j$4Kq!;|C
zy&#!PSW3$U&-_Rx{Od9pQ>bz9w_N@A8u&af_!ej)K|&>YLqrAGVP9ng=|iO7Uyw+M
z&@diEXt^)-%_z@<g!=W`fTl<YWX^*`f<(Z#2*wVRj-y8b{uYIl!OCSoH{e|r1O7QY
zT}Vcjt`&DN*YhRG{4%=jopp?(BACCC@3ZP3_3A<AR-%pP&UNuG@?{GBjjd?=o(kim
zd-eRq`~U4=|L=SqaRihW=}Cekt!Df`Flu)FqCWhEbm`E}w@UVfc{qw*5jKxvb;zaf
z<!qes@yNd}5hRj6UJo2Onxae_BTl=B#=Y<G#}_{he&boKbon?e4eUvVbc3zWoTqUe
zja{*LEd)qG-(km6=j~we`aO{eh{nDoLJ;H|KQVg}62i|P3Sa#d5{2RZR<NG~5`c%7
z(~mz90lq5P1Xy|GNIr7J$W_Vwh&DQUA3Mt4s7FAVUu2R5=aC7TOTPe;!9}wK47VKB
za&AzV*(>nIWJ3C%!-6!Rfiwy@GfhZ64#7CWpctZy(Dp=#FhA>J$zU<~NBwMDL_Rg~
zlty7ELk2^h!bECnSJ>Y|7J(A=Sw>q1#cP-nRV@<`P<4*$-_}bA#Ngq>rP@E%i803R
z(w4y~-t)G1FI%)c7<k-dPCa252e&Tg@9%y}XO7STEK*W$hvW+kW5OL@75DTq=;w_n
zHA|A(Xz`@twnu!$#V)5BO3BN!RDbj;fIi@AB&z=n?MPEc0gf@^cyRgjg^d#UV)-}#
zng~6Immmn;pP=f2Y?SYO*L!HIf>VSMD<Zl=`JoD!068N5W9l`AVQGPi9z(~1M9;$X
zduv;{C*Abq^Qcp6h73K@GL5XD*4ZlWp}JB@MLClN`e+QWnK8i8?}VBuYpiwYDm*i3
zssv$d!<r<OjU$dObpaIUKsqwGI+Av8V{9Vw*t}Yvhx+1!h&w#wz6=}*3lT#zk-mn?
zq=W%uCvnQS5wvb!$(!%}gZ7LUB%^?;fuU+eDJ&8KVe^NSu?=k|Y$G`;8wl0IC`9Ze
zYNdH6ml%A6<}kL-;@T@inONrLA=f3{*2Z%)gOqFN+u>D(a8I#j<}5`@@s%eB=o}Ws
zgUC1UjhyAK92wxI{}}P&?s|mzM-Ao{M}bnbr$myuGQQ>G@y7$4wid5Okr^TB@+!35
z14r=lQJ(m+cthiHo_LIbN%Kw2%W<&aj~G0BxB+8P8tz%Ah!L%8fzd(_HHUXVJdi?S
zzv5OiqGCmZ^Jrd2%st`#Xo4^US2=`RY#t5IiiWjuXX?+=k4q%~DjAAXhc6qDWW1fj
z*`V6iiWbR7F1~D8-17$^yciE;b;YxbQHcH7tbEQMfD;kURnE1OmfJuIZCAY-ksnCS
z!1w7YpN!L8{^B{#_R`=XdVV4zc8C+|jiwg|QjTBCASnu^C#MflKe9%)fY{;5;LQ?0
z?naES+Zi=3^G29*m1_<BMexFn(Jw|L;@PpiOcXmZ#vK{ljVOzWf_(htvvAya#<#?S
zmvuZKj||bViD{W<V(wh;5&83~C~CuKo>v%{k3<BVG+oC>ZTIfkWAew9zL5`m<BlOj
zi00ufw)s(pm^-cn<MA0mBbUw*^M@LZuvfk#BR#@Kn4#k_&{|1zN6&LBu6BvjxeWTo
z$r^iX!tm_~pLqsxvnoiEHMO;O_zrCxa7T9z&l0Ilq4xajCeUY65P|YUf??yQbMBeb
z8PcbWt(&)#EpOtCNkiDWWfwWcNI*LrLqJ?0M{~9?StM6b#p%<=v3J{6x^uPyy+k2X
zXPw8uk~X&NX=lRpb2+9i%f{{dBc5P7oRVFQXP!9_V5#y*XS9M=`XH(lGz>Y8bLO7K
z=;kVRZres@#tU>1U3t<@q3$FYI(`}#oIQg?%XW6Qv?E0^lTMvRLC0<mwr64s%((Ls
zfuR1MFiMOWx{_P?@5^iZ(%id*mP75K$0kd9GyT*F9N4i{KdQyDo^$?7MLu>=C`}dj
zYGCGBb2)Y57z#42?A&_@grK(nahy1;oXtC1P$&`3ItAe`hq&^ER<v{&4yVIj5b~~K
zAq=d5u1`lpn0q!v!~t*^OiFBBq{SBRhD{W0Y1#+HOAv#`&jQ7r(NNAEIg1h;YK;zS
zoPAN(BLHvGGG?wu5c#eY8e12r7C##COxA3lk%Y5a+o9uir<gQn-H!fZ`(c?bz%>yW
z@@R^3`;d8D!ca7{qu4ZU{pU+;jv{>Dv=td6n>~&@u}0WD+sV_wY_QtKBey&8)H+qM
zp!8rK)OXK#DK7keF<BzecWLbqrcD^Y)@{2esOZZX(@*5kp8bqDaUz+0J84Vj!Z^2h
zHZZV$zz`}#H*H-Rix*2DA-)Pz{Wg9(_-68KuX0(8Sbq%G9C_6}cdL@X?*kH+S>tz_
ztfAFvnX4beHfjr1;i+mLj%SFV7IorxmyD{(cyFCyHpI1X+;mD5eZP_lb&BoRLWtnk
zkm6%kU~KjJ;APNczIs~#03ZNKL_t(G@hc9yB#DAjCQUn&Su>{4T$N<g<{jh`1&kPX
zDre80&fwmKY}~Y+tmpW)lt@{`^b0TL>}ivkFn$6jjA&-#+BJ-xaW3c2n#P3j6BvKo
zX!fjMN^9`*tLofdB21nk@L5{6{s_>|J2O2ba31xk&$#os?c(EE`q@G{ih5C5kRnmp
zpR;F7Amw@CIRtf{As1Xz3DrN-=HcaXVO#Leu;o1C>+I6MmMy4a_N;02Ds+q^K_-hi
zWzGeB>D<#PuWaPPYro8_6Ni%kr1Z#nx$x`hYFx#e7pxTed`g2Cka)o_Lg+r`{g94I
zU_qrWrAOVU8QlJj3)#DFDSNsKsVXn<X)2kGaeX>!3|}m-VgT1&Je{TQzrpIA2ZO%B
zuR3^&KN;x93;$R8#-gp?dr}7dr6rG&L*}v!J#iLioiq|Jcv`!}KQGdq&fpn|UP}Gw
z7X>IvmYnDL{0Y6)<ztT|QCPucH{Qyy;sY#Ry_L$k-V_A<<gyvk!PDavPoC$I%lW^Q
z5oisN<Z?NGY&3v{iO_P9pr8`U_hb;nQlXGh$cYfVt@LUY2Ktt=1R}%(8_*0!+yYK<
z8%Z{bWWckTM9dX9^K1rzFQ~#IBx5}HO`1)e6saa??kbE7KvN|z6THI-iIMmfk0w@V
z<;l1|VS#P(>Dwrf6I-8Bsy_BTq3jTJGVhr_u5@6oSW8Fn5WuA#$GjK%3c}Kv0b81v
zh#p-3v|l|HkKtdpFjYQn@L<t2<05eOm}a(rDILe?N5_#Fds?0_8B1*cj+wY<xs1Xm
zP60s@FRvTQSu@5{Qr^Jzx7<u^_ihek3Yk1(I(<r#<U+orlUuwH^QWDA1!J2kEzFgk
z7ye?7Rlqs=35!!~xkE3vijHW(rgRkd-umq3YN2VWa8y2~;y!d)v*>hb2XPyDxz{W_
zb%NHVZ4^IYQ{|_MZtz~kPQ>rQ@p0A7EXquJ8gotdBa+3$pg(>fn}zN60!4(!R3XKp
zo5i0kreVsJ+&u3%5~&i#j2yt4WlO1<bUFY0PbXT%U^v+mS=N013Gcr59-po|$mEko
zlkUv0WyNCNeeXTyf3}vBr%$50En|)dKuUAso_mqD<c9H`B2?dyzR>l}18Cj0hQ*5)
z(f#>i5(z=7R#Mgfc>ekJ|6<s{5`OW+J6N<Y$yL|gz}S9OBoY~(yZ>(HuiMKB=iJPf
zPO4x}dy;}fYxvo{f1&UAdE9!%X=Jh<i(h@1$KO~?|C27@*2|`HpjFab)!}%*3>~fe
z@E*41Dw#TYDs3AVQJj?|ifZ}lx4+HEniRQ?UHtaW-_d8r*SP5T8hX`NvSUvthnBv~
zUC(^VnOEP$?Bg0q(#a!t-^GHpyBUAZja+$hJ$u_B*|M77KK?#8fA6b2@$;Xu;cyAx
z|IxR2_SZjW^`5q%u<;$h%Sxuqp3c(Op5e{;p9eohBq^yK%FVZag}$XJ+IBADSHFD}
zG3<2y<A&2&w`o7k^$C9ai{G%fcmUU4cLT?d9zgQ)%PD%|5#C$An<?jB!B;Pw!b3k>
z$(H?{AQOx`<1((DJD$S_)0DD>AN~9R=G^!lw*BSzeAw2&?Khsm?|%6kGW{m=!>`O^
z$KF<YB|CZap+9n{dOTmbcqYY#DOxss!o3f@%%ExKaqU0NqF=I=%H~l#{qQ5KYc1r0
zYj0%U*b;uY^ivL}C8R32=(?MjI;;kfZQ&1h{SPbKtGMI#Tj|)gmEKMDyz$uYcx}-Z
z^wz8YN~X%_+q;C1Us}SaOLqi^h!fQIKbCLYauY>s|H6XBn*gX8GKm{5pG|p5FGdb5
z;YYWAhn3yE`RYwKGOV_M!@JjU@9!UEYilNS35?lRP;Ka>f(VN`zoj$V)YCtt@h~6}
zWPUSYg|l~KhOhiU6=CMKgkF&_eb-ER>a8k%gEXRiOM$nsvJrCbCJAGeBg9f{kTMtU
z%@k~|<{FtPEdFe4(ENwzVZsEcdGX~?#e<O^gMX}5+<=CrsqCS2Bx3m1tc13gnvUPi
zv77PU%0Gr(Wsnu!_9lGBoX9eCWa!e63#m{1^~9Gorc;oHy0(f2pNCLup)6W1TL#_N
zT7?#>7%*0LT=|tp291?(gt7(WnyalYUn&C3)lF-qQ_(ba2qX%txay{F(*4<AdFq{I
z<cfP!SkaejzjZrbt|;Zx7aru#^LKOeZC~MU_xv|2TZ*~$d*9@}2k)Wp^z)f_>Uh$F
zOBlWMWB&BW^Q3D>bIVO%qIa=i--b{5{r!L8K$jQ#s@jnX?d#x#$)js?x5x$CZ%wNi
z>eP25#fe!-Ijc@W$6K7<T^@<KCPN<VVpdIOgn2F(9n+?7(Zd*yCn%=F6*RR<W=B<Y
zyA#Ei4~o`(vvvtB6~~jF?P<(5Cm!a6M}XILh<D%mJ0e*~#qkH2US3M3`!LTu_Amfj
zi;v;<lbS-wwBZCoc<k7?mhE1aDJNXO%6DGjkWA3NeIt9f<`{I^Bw9Xwg&mnBsig8d
zpH4ypfI!MX=zwvfzOx(wsq4DXp^P@m-*|!z6;rtHp+E7nf4PR*;uKPXcX$_n_|*@2
z@Z-ZwJ*F9vZsnaf-{8|lOW4s>&ADfcM@UJcxR}J=Rs8qQf5MOd@K@6G;)ZKyanFzb
z58uA+r;M6)0X3zSTyg1i9{u%C_~re7p)KoVDie5Q7ynqYkE<^~k<UK!e|~D@sprvr
za6aF??Ylhsk2Ld69YH}slF$D3Bx`oJ@%-~IQ#oJ=jRTKo=Ewv;{lWM7&2wuxf8MDS
zCM1dCQslnX{N@)w=4XHWE3K`2_-Iu(r%f10?@=dFv3~_y4zvePWY$%;M-hFii`jFi
zmEb2_@jRGt#@XanyuxkY{~>D_#mwUyA(0~6vWffe{uOU-En?!(I@)%w=6~*afW@EB
z=hr{|CGW4=0f`(RKL2MvT(yHl($_;lMIX*OZ4?jw?BBTi*$*iy@PGXxRZvJ$fDj1^
z3;l<6ibR5v!YqHj_gDP;FYadT?soQVT*T||&S%MrjSQMFlQGT3eE!z6{OQ>@dGFPy
z_{o3Y%i8^&$jm`rc;s=`wsfJsXj?yQJSX-!#2x?V`#ki13+K%mPck7$(ao!m{f>KH
zTFQj+V<=SLkVX4irgJZUdFJn&dE>wH@O{7K;^|{aA~~>i36DPUH@cIl&<_u6_=KPR
z_@}(Pawi|W@Ml);>15KZb1B*S7PsDV8?UcVF#D9zc%Dj<_S{Kr3xoly2&?3bk-&pQ
zq^|qGK*+fagIpy$+a0Ajg0MkD6)bMTvY=UNw2%f>C5MzU`lTe*&-iQ6GaEWD(<b3+
zvYX6Rx(YEwNomj&Mqg82Ll+?^`&El^-vCR4s@9_j+X-PO!-!N?GBa0H)jhY^A#9_Q
zRrJ2Qf*N@nLl4|-Z<`<YvuVSSAJ?9$wbFy*L#7SGL}`xU4`Apz0Qrzj5gAwSO2qlC
z48FB0E&YZBje;!DNRa!Z$<*~b8Pbtf8zBHM$H4KknKQmWyLRp(lM7ns(!Bn}UHtGD
zPjKw?*;FN3_{XYNPC0P|wL^}lu6r|UckJSoNB+Po^FQY=58lmP4?M^IF2U@JE@JyT
zk8ta^zRwzO5L3oBIdQIVLKT{Xe|6zxh;G<8EKZSzKI8tXwx(VT>r1J1uc`{Oz%^^T
zz$RfKtZ&5noox%}jWKD^69PMS6s^#%D&RxzoiRejjrNL0a9ubZ!&*z}!Bn|LN9zBL
zqS$u10RL4ig%ei4pPA1I@Jkhfr8R^2?tlD(>rbuYg?E<vKZ%7z6xDF`d6RhR`FAY6
zSU4*sUO@wMCN=Qths#L@PneL2GUm*h$g8it?_V=S7eGoUF(s!;Vk)_W9EM3u0j)|t
zVgJA>vTZy0-A{hNgC!-*`Radg>1hkNXBo7%?4qqB&EeKA3W}4I_dSm9eC<M>ee?--
zXZvu$z(Su`F3ZNvn@MN=hchM%t7+`phpWE%UD7#8LHm9@D(F?%#ooj19L((Ja7R~Q
z2=@$zFT?I-@9@BbYuS()#N<AL`aXRbGT|6*|F`iZi;7wM9*9zpOgck%cZPH(gGeSR
zFR#UG+s%QlG@0!?D3~#TWI`akG+VZ8r8AuklheoVyu+-kPvfk%Mm~M#kN!7p168Y4
zodjuGI&<_YDG9+;$|N<F<?P+Chpx^}_Uvn=xuzF#n@8)R7P`|py1H`I6r}<u)mx$#
zz)M{y6cm+FCOX-BxRb+T4+ql&to&}52ykH6X7(S>ko7#ImtfYHzRDQZ^3sBZ3_WHf
zg@uVQU?u+cvJ4$c7cu~4<<+DQ?cz{(hW*=jQ97cUgb;Lh9ip`@L#``LqM*Pp`b^*v
z0%VTW3tr)SpS?*<(-^*U(`9_VbQSCNWsNc@=q4q@PM*uu`hEQ7?honq3aG2^!*RzC
z;ok;Mrm(1h#cyW)Fp<FujRC4Uk-0NDVcl0rS8q#5jbXvR<_yyWcm<dmfQ11b(g6u0
z{uFiVTVyh%Va@1uE#pff31tcyRVsE8inYe*%(hLw)XOTARH2J>KuJSPICJ&~CyWJ?
zya9shLb&~Bj%i>|2%v%AXdpHclQe8Bqkjz}MeImSo1lg9FBVPrA)~f^qbmV%Nz#(n
z(Yspti)&*(Dc`nPobL+V&Mgd$`}P<{L1@SqSw+N<%fKe#$ZzS?$w6iz7Z+EmpY0c_
zc1f~j;q%=4{6@ZV@#QR8|0_19A>DnD-Fw^V?A%8>Swcwxe7@i#W?p>?XEc?tV*WF<
z<$NxDHK^k-#q{aZz;VMaXW--xiVNZO1sJ@!HHipqYx#hwLdPH6;#gskZT~nu+AGBd
z0ZZ>;+hBYV6gATH2h#_f0wc~J&C190r)WWB`WA+7Iu2{{tb{Ny%bx`L*A?!Vy0r1c
zFi3EYDb!Jb=hD!HUC1f?P*|lg(kxCRu^jN+abP3A{J}rdJnlkn`|{a*`tMJXEG*}m
zZ+({?uiVYMYg<Arl%9tq5f)}3>3hs%stzq><&I8B_$6RP<0ww3-N!>KS`bNN^EBWt
z70!;ODk!v`jm-)Z>Vuz_km16HZD>i|kYgEG+R3KfZB$nl)76zWtrxZ{Dyya_bAUBl
z_Hz2{5mb2FEMqDM@N&CYvV1EmUwndvTiR%-EvL0}7aR7MFk$3Cio~%rR#zAaPCHNV
zj!fqsUVmc?%~J;gl65PWvb}#T4?OS!qPzhPZlGz#44XwEIJkEc$@*!G9nhDmDHCbk
z`dK);3waE(N&B`%Y)o9u+>v{^<-T3Es+FFTIX-%KA>TRYOcrizAzRj)L)%ucVapz_
zJZ=L0S9Ej2s6Kr5$sR<iKhc+S5+f|AGqk4#eH$7nU3-wOwgc?TRWfeGK(;j;M`Kwr
zp6Aitkzm-!f$Z-dL7&P}&7-G&lT9E=(z~XT-HSKUndrs9ehqw_On`vybdIX3YRZZV
z=m?5znd>Ai%4w>vWoP?7_V3wBS^s&AYM#%C$;Y#I)63ymy@)g>5Fkkwlrm=A@pSCm
zh}Wx<!lXww>pNw4R~zNE1F0@A;ZS=gWlbk=^_<Z>eedsR_Yx!%Y1Xb@#kvBx|B2Tq
z=v7Dd;1>URg~l>fBmo}by4%m59~cQyMCqs^(ez7Kc4b4?rpC}lk(7^y@kbakxw06v
zBddXW_#Ve}$9DDS%CCrMo}zkmiw1jarN213(rIrRCqHkowDbkzkJHcmH>_WT)6Xe_
z@{Apu94$6%J>@Iz;>O7L{;2d2Dd3J$Z%2^nXn4revFG-oZG4=_rn}hq`5U}>!UV3m
zU<N;b>QhQ8ni)TC80`lKQqr}LL+xn}A6~|W%oUtBzMbzru^E-kyK;j1nkotlcGI2i
zVEM8&6cw!IiN7tRqNa|6J9n#EAjB2N3_a@WXmn}aJAw}%4W3#48}KfK2;ZwJ$jsvw
zR;X$qthr+1%_-o7QQ=sVKa000QW?W2=8e+IID!(=iQ<C^pysxIL|=y8q#;8H>8jQ-
z3Qaw6v<vj&ir$PG+J{}c4lweVVaSffh*UA>eeGXp*!~6&F5XSA@)8cUbs{B9KL6{C
z&VI^+FD>*7YELlj?3pZl{Wo-r1W5s2PIBz&bJ+IYE3}FvDNE=6-(*c~?>oZH0?R2(
zl?mrQ!QQtB46UHnZ<j2sW#;TToHq4DI@Y|!Q-A-QbfT2Lm9Ti(dJ3!hkZj$|;uWh&
zSB>D@nG@N*cs>Wx85S>HM@e-(sl!`Yv%Mu4Kg-gk>zFii4yR2zo>b>vR&CnGij|w0
zGJ78ViaXi5V>62vuMD0ur)`8tQs1YJtt*z$((O@DUPUE`_~^qA*qiLfyt8L9qPd36
zYnIcQswBH_GhGE`Y~8q#(n47F**{pb7tWk_Hg)L@Jo=Z{IM|t^w5pEe;T^2qzCUzI
z&&yEQa17<U-{-YYH-y!>TAC%$<-r|m*^#MY&Yal{tuJELij{0wyM|<+G0d4ck@X+G
zz-ynbLlpO-rm&mktG7^E-9UQpIyUcZLnce;TV2G86>I3q1;HjfcJ4aFq%-C+rcSbS
z#VXcpI>6*vXVO$wL~%ho@4mZ$tve4famH+_GW*%Gb34nHuO^!+r@F*r`Kk@%07Q;$
z+xIhJ`k9QV&aq_GM)vO9!nS>F96qq0#xYYk<CHP%UbBk*9T~h_7kdvUm@{uKRhgYE
zUcQPQ?WN3}JDb9Ni+SRu_i6W1G}M-{cKJ#&MV0gtt*l(P-T1Lz<&p{d4Ijr@vrp&P
z!PWfznI~AX<p2^v$Dw@`_C1by(~f7`x@8pf9m}LK0~tDc921Wn$ktUWShsEs>0X1G
zd)7<__bF%d+EwgpONT)i2EQ|E_%bJqG0))(?+e}ET(Jw510rPgY|fl{+UHS17L4&C
zEhN_BoOM4yp~!Eox}CxL;p#{$dvdI4$25<-?CMJ=jo9I$L>w8B!qB<fK5D;65;(FL
zd9EL+xag^WPXZss_|mka$loq<U9TVz(>9h*M2{*x8jj=WYR@#eQ|G44l^B&fB5q*!
z4-#san9Z09be$^dMNMgjrOVf{e%&SpA3KiLZR^M;QxsJWV8Vz>UikCVY}nU<%w;L+
zJ(TLUMZEaIYE+c&+k1$KGv_d2cnxb;tYPh%Rg?`J$DA`xp>IWkb!*mexGNXt$&dw0
z0BOr)vTJHQ+A-8g*eY&$%K{?%?48ppSbcW_I#ivzYD|P5YgOEM7p|ydV2Qp>cn;FU
zN~`eE#g+N13XwJeeN&^p*otTM*~I3+2GZzj$mbSddVKf1pq5R`ABY<0NiF82N~)MK
z=RD3hWfB#gYx(`dFVLN=WajK?)YkN2;)y3Ru&j%Pt2QHn#-Ybklikjeb^d~nS6Iu`
zF}->9wYO<Y2R@K2<KzjWc=vBFv#;GlSVxe8mDa(-g6G7=0*)D)J%_1v-C1T=cJ!Z~
z?e{YSJXCb(tNOmVJ!8{U&MU)lMOai~w|9cTf)sZ)Z8Wf?kE`OXag&Unc_CNN8^?n`
z{ZBsMaX54clP68f+!`}9DzRly^sVJ~!Y7=Y#e)oK`<feZ;&djDYNA)40kp4pjfY<P
zgk0!!!jMNhYxqeL)vXpl^JN5^D{FY>LTLNj1$Gb==Jo=n9C}>6ByR?mG}U6y_RRi5
z^!(6F3>S%J)6hS)U&NMPR!8f+Ps|pUskeCF%%8BM3BkA#KIAthH$=$9$FB*vuJ7t$
zj4jasB#}1AEAS310Y)1WY{P9->k0BEa68+pVUc2Nhs%HmL9nafY?DZHGv4qg&6$-5
zf<}I#z(}jH{i55nVay6cM^YL*MXemW1?Si#|7Y`#X;(XWx?8+)JcRtmP>%3mXU%wi
z9C_m847c&z)gYgn{4^DI5R?_t4KJe|+Fl}A!11S^%a!xS^YmRmXTheHKyCsWt{$Sf
z!qt&<c(?6LG}iOQm#gQPesTJX=vWC9^Kx+p8TDTsmqf;~w{oS0u}89R7{SJPp+04p
zjTC40c#2m;Hti*5EfGB`qw-CJFJt_CUy^WTFm<KniqDzW*0>NP!sh;H2}6d+7P|oH
zNrW(eZAsZJ(53sZUrRv90~2aM(?^cfI=+ePY%w|1NLYF)Rk$?rB*M7+?)d)jrbF1t
z%_=z^$JVq3bxi{)&KzXho))9<#4)%UL=lM=GAgj+K*8^+3)|4~)sn9f1f{)d=-a0^
zvimUGw(q1ns|}{kiA9nuQD@dK5$0y?&#F&MLXRXQGodK`N6(Ph@(0{1urO_91S;m(
zVFfNjT^UmbyZDF!Lep06T7vB_27H)RQyxInv91an!eb0BKuKc^{Y}_F9wk6fv4j>4
zT{GXf@c^d;41WSgM1kD}V|BL9EpAl8)G#azTL+-y+8aVUn|5%0Cc?kt6PjmD3qw_k
zp2$Zp*2YFsi79Zb<P?`A%&O3d>+oJ+XZI=kZcJd&vN7AdA1Nv={<QCFn`j&Aag`1O
z{i}pw${p$t$Htmo=p`l&8rjHQGt{ds!8L_vrf*6tdxv_(j`uih!l2!|Z+}X?!))8N
zpPU}T@{72nZTbpEAEb*VI__+pL@`M5>0+JcUr$QZvmur1SiCs?Z|XmQi;$?YY)^T+
zr@O8Wj*$Jo0Ok1G`SbV}AHAnuJ=F_GZl~iVx(@xtUvYh1{1$_UkB}j-`7N}Q2EZQ6
zfLS;=gUVc%OxEAmOr;8f-xAHwW;T;0kxYfZBN{go*UZN!U`@>_xPs91a#^xDk3=F#
zDwPE3k@X;zOhf=JHmN%inK$t(nQ|VfWXeg1zJfBD3?i8#nGj^N8AKxG|4v#=(hs_#
ztl%cVe5*auMGwt+ghno#(Z5TZ3V5-A(CC8!9s$@yc=2*%GFiV}pl^XBlgS_v6ciMY
zP>wF_F)TZTV@dz{c_f10VbsYs4xl5m$vp41?xK1%;mMlRSb5Yw6yDPqVKTCO$4P#H
z{d7Lo#Y%WB%rtScaGh$SGiE(mvSzhPMs&>kxl<=z?SnGU=OF5&)(27skIylQ#uQFr
zQE1dN!2s&<J4W_0>ya3JI{Gq{Y{j<Gz}AnjVka(<NBZQ!fiH6Zf79px<-YH`#QLt~
zBd)*16mBjC#LMQ}EtkzeB87rF*B0hWdY=C~)Cr+iS;4^{8DfFP2gqeTlBuN5X-Kk}
zG@cZs3JOT-kPKdStcTbe6iU*|;Ymm)lL*Tt%}M5wbrYFz8vg3)P;+J{U^CXu<gEMI
zIBMG2#UMAP{XP&rGR2A=JBJ&?`83oACs%3=FfrSujX6Cqg%54x=4Nvo6LYlinjNh|
zd*{Z&Tcbnxo3Oz+8gnsZ6SnRmARSy5E+Yz|Z9V#;2HWD;y2<4-<UB~F3W6^|*sH?H
zn%des)C!IePT{1-%ws&wj@fMNtZTUATi0;vl#?jy-p%^$2k;DL&EhX7%eeaM-@x0k
zm_3KH+E9Tat*4I*f^U-;pABYozOX@|prQ|#U-xycxZrFijcH`{l9eO}p3HS;3}wm6
z&EdH$&7Y<z6hBzjSN0G24S5!VM9pw+y6iMoEnh|2v($%IQqMo%{%<s=x3OxU<i>xw
zgAoOLS+Q{!QY0+dlzz?e)mU+XrS69tN5bGusl^dRhy-Wd@UPr@^?A&kF^$IJPS$SN
zPS(?Q&@v0(c42bEejoTz&7c$bw}1a3bEcollySq^xnT{56IERG)o*d_xl`G+_#f==
z%<4Wu^KRN->sc^j>P5^RB3ZV1pVMAgeHAa}HG~ctvv_e6kJZQEL&=3O0My_;+}t;9
zAvAER?zS!sH&wA`7{`hMp$j@yOqj_+`-gT)Q=VvIKm(Poh-}_Wy;@@id<d-rm5`;O
z4<)Z}A-$!pb)$U>RX`bf4S`wM{iUgY3m24r?MaG>9TsgxOK6@<z3cbF$R)ZgurWx-
zm*q2xRoAzi`;lwdz7==heww`5*6;;Ko=^wsn#-p)jAqTWfk9LGq^T>D*T~qwX_sA3
zbLV!pwzP-$wVx<o6iDgOXZUy~460zu_5<pNMFRZtITOY>c#;#&yqrmO-E25q%C%R|
zW9Q0Mv}OG-YbA<$ao#mIbK_NWXj%Ok+gsC7eJFmU>4OrGG><!*^C#4>V*U2;f##-t
zEG%@fA;e<`t4%u_cx~Colu^2Tnmk7R)?VGSc~kKc<_4qq5ylU}tAfREXBPtMw)<R4
z|EISCI|i|1iDO{0i7N_ELPHxgL)91xG|&BNOm?s?(4{_8Xy$3H16$GBFGawo%E2O(
zN8z_=P&CiMpYWWWl2i3{x~QVf1_b|=EUD$n8^6UB=blbU`&L$OYYF2(ji119LM^+n
zz$H=~BYg!(-A)*O*sFdZdltUH?;iXsr(Sm(6Z=&FlG^$PdR6yk_=u78sV+lGXlU-s
z?)A&r-|iQ1$-)ZyHdF_ooTRa-k)!}glD_?iFnmaVijqD^CyOemtEr|>^B{%~YNj|D
z%$&?wkq0Nuy@CPhW!!bw-8}o&Lb`L3R9Ov!`!_LQ&`_Eissn>1sqNE`5yMB&)Vne`
zzXui7y{WFMWyr7*^lzvpAtWS=XdXC(p#z&JNcdwWQYG{sG?adQn;1H%iB!VoMF@dR
z6w{~Q0H&Tah5>^|aqO|f=vz|`63VOV89sa%b(N(c1r^ox3>-9=etr8ec*tNXOAC-v
zQczmOu;C+Us;~6JSW0@;)KODY%b=md=-*IHK~Xu4^|k(EJVcWE#zu-$NzIFLXas%w
z)wAHaN4WF8XPGeXa*i8Vg+x%<yDuY#4W*{M*mr_Nin`u))YLUHV)!s>%8J5dY0jj;
zIYjyZ03ZNKL_t&v%Bm>OZsYF%{uLkYuI9>lCn4MS@%W$q#9moOc~LT0xdD+XrvJcU
z3>nbZ*JCh+sIKeB$PvS6s4hbwDDT~ue)ScQD4?;aA0?@T6?Eo%w=i@8L<52WB357-
zg|C}LoiX%%H`o=ZshKdw{a}`A29=SNHHe6C)?^h`;Y%9tbb%EC7lSX8*I2%VrfdPO
zt3w&XQ>>0eSlmiWHx5mKr8isuDsyTeRQ<!^LV>iyr=fxH(F($(B}GWX95XCf&Hla+
zsNy`zukAB#;bZB=7$<T~y_b3VU?&mfTju-lzBKw6v~l#|7F>jUX+8+Y)IJ%RR9o0$
z#F~WO{RYs~*pFeuhEP>nh?EKH`ZQ8jm_$lRS#=#X6{S?yH8JJ%Im|j`5~D{Bp`s`S
z5{k-e7&?47eS240<0s1dF=xs!Hf-L7OqDQT&;Xj72Qj#RBZWzUmp;skPd&uS{etS!
z0?k*VkiN}>89JyR#VK8E^{VN^@Damls;eRalvMR$K%Z&^2^#t~QBhoA`$k^-8NMUT
z_rlV-=>x8Bg!VK1C*n_Wej8r5``YmW(c;I!4iS0d-1q&#>MeTg5$UU1hYMVo$Y!>+
z%MwtNI(<dsn2m83DTO}~oOIjDC?E0M1<!+ZNX63B3t?dvaPyb4o%A^FtZO(fv4#hq
zev>(0xs`rJIh=Bhl#J)nXk`y#uA*6I5QNo<?v7S=ZeGr7pR_P#d^7VmF6QD}e!`%x
zC49bqAD!D0Jp0Znsv8D!{-u}FwSED+TC)_CH}cQ_avA^m-5=3edp!U8%~QDT_PaRw
zf}1&KWRgv-#q>I~kUJm#8zqBJ;?CR7=lyp+B%MvuerPY-_IE}VOoIBxW~xe;uw~r_
zI<p?t2nHWFnbQyTV%($={Lc@5$l}&A&X{#Nsa%@=!-w<X6Zi7=vaL+J^xMoC*v^NG
zHj-^w&ogf<X3m#xWm0`7d$PUAtbdz7Jo^!+Uv&#p>O0t<>Q7z8PGfxkU!}Ls-5WTV
z=*yYY8ri&NHEB;$Gh`CqzVQNfZQ4S8LoXix{qGq#_iIcnKS19xBU!v;3q`v>;(uP=
zz%AeXCUVba>IV(r)%$<VTT8bw^`E}UyrCK9f4-jdfepO6a4*;2d>Mbf^XF_U9LX(T
zpT*rjyOYjr%r><2Xgk=#?(JFHGJ@j56wSxa=9`yIVat|%G*sod|95|6Tdta$Z~rf{
zn?7d4?!#<a+R6J%wmClPe<&y_r%zKe0~#yXz4`#2^vI^uWOJS#Hd(^Ft8eDSh79e6
zy*aq_RsQgo4;V6eE;pPzfpu#)({*?&fBDPneuqRMr<`{!C)e!d50Ab;S613fz~K_R
z20685U^KF4U*WQh(Z{fr+uQalld4n;M0QXb4ej<0O<`Yj$CH6DZoz7e69-DpW!vud
zR<Zdx9{`&()W|q-?Tu|YbL%6YPGbi|Z=prTGHf5pmWOz_`()ap2PWJ?(2AQp{j43G
z!1qKCgSgmWGmxLR9?$KFnD-)&&-L&!cVzMnU~Y5zpK<(DxAo}7S?A8BZbz2>CA<0g
zul~f83$Lea`7=EF&IT^J?j`_<Kz6_Ozx^PKJ9^XH*qhpdEW?KlWdHW9WO@zbJKws2
z&g~oM)!2vEAO0WauiKBrW6ac9q}P4O+CA-1C8=&ahO^IZ#j6|0_79%o;a3-u$z;fw
z*po;x<=pEy_t*+{b(c}N=M#SY;LB8xn#L{HoWbU`8_1^j^VcVz3-q1fgflMTj1dJq
z@X%8n?mP+wnxRJM#c_((#fv0=yyod+UGB**%#PBu9aikv{cXI6#~3xiF;|oveqy#d
z?Y+bpJd2T0C_ES1q4Tn3Nu4ha;}xmci^qs4I*0z>6ArnLqcPfqO^t0?PZl!nxPdHM
zu$)V04rR%n5=QqgXUF<3YxI-|STGf$K7{vFu_g!)RQg<qFmcH|!gd^Pqr9Tr|5OC?
zpL&XC7jO5g%>=Am@IK4toe_5KY}w4`yGxlkq>6pVoWi<~-l1F8a>?9dc>E_nVQX74
z|MLAWQ}^uKv`I<puGKv8*kc^b#)p(hz=G!=pkm&6eEF8|QJ&t$FMslD4kFpL`coc$
z;32k`{*;kT6)fF%kgfYVm@vFAHH{;fIJz(IEZ>SKfMxHz!ecM{&vPlOKbBb&YI*2a
zcXJ@wms_rzLG9aHnQ?qC?)t&6IMDkfe*WcgjwUpB3EEk@u9Nd-RPo~Hd+9F=n0)#)
z+E#zSbAMaHiRWCysS}5?G271P@4rENMI~>2_z4$J?!&-QwbZsQ;*Pr?XYkCgaM6@8
zytjNCA|+Y=-rsoag^%%sz>_Kdv96OzCk&^ecp`h2EnwH-3?h+0S`G&!dvW0nw=lUo
zNp{1>EMDKn1-H#$<Hs-Y;>TN<d({n07~RCy#T{fb2l?C6kMr@?!@+FdaAm2TxxBWC
zlO|85p}vIWpLK<^KNJ2-s+yTNw21qEdMEqJkLO=+oJQT7i#T=0WZr%3*SxS~SFn<p
zK#*X<-0K;(Wg*}D(eoVa_E1{{?u^U4rkASKs7_+W4Al5)%f+D3rbbZ`COc`2ujq&<
zn5lsEw;w_$34lWfayUy4#<0)Sm<5A#KCp+xpan2M)2))?UM8I*Rbi%_&gy~9k94{?
zU>E|P9?&}f2`URk`j~tpLJZBD=;F@Dsq2b=`@E*}Ug$RAs-_fC+D73VdTC^0rFe~r
zF=O=wSEe3h6skXJ;;rq0u#;TmY@NdY7(RUeG;>D$OC&P9|NP@T_`z<z|C8Gp*1wz&
z-uj4J&K^(SjYSM9J;2{qZDZ%*^}PGZ@yOEEJp01CAd-xmHiN3x<vjMnTMRt;d}f?-
z93QQH3ySJFbMgS*{NrCqdy*v3abPol{?i}XT|JraUpbNTw?3yOXIYw5Q4P~3_UG~6
z{uj#**Yoc`{1Q#oZ*anyXR!2zhj{G6HGw=HHBvJ2w96Q}Zxwg^=)c*~nvUopoIhIj
z)v+2|#HhDcKO2M0vvgJw;`js@IAjmuS|Plz0mHm)$C<_|j*Jg6@`V#NYYeA-D2O9h
zT;XL@RqjMAUkqXDHt!+-pt(^*MJF)%N<nQejgL;=apT`9c*B;a4oOi~l;B{lfX4bt
zHg}~dDJcRvm0*A%74~qFf?dc#Sk8r@s@^C^HO3C!d7ei@Q$2eY@AaE@9cIs=E(3)0
zOh5*Z_<wnO?>IZkGtc+?);Xt>LaSRXsg-j=0Z<lE0uclPM7E6$&Ny+MW^K=|ckG?r
zyED7@uIGN{&dl!4c*fagz!)$lCxL_l5<&v0m9x~Xj_TZfQoZ+&sy9{DX$gDx79Z4g
zs^0MA=l6!H68<2Yw|u~7)?QEdlu5ktHw{Q{9A)L@EM9dDvkN}E-gr&Fr>DE4m7K58
zttDb5R0PF=cAo#uKk@wIWjyfB|IGDECh%-K^!N4QOF^OFlS;#+E7x<^;!6JJpZ=NI
zw^y?=9kx{%q_wj*XkAEBsS3&}hqL6WYZ)xU_O~`OSV-Z?V6>OZWhq)O&yDLm#fe?d
z@t=1W8GF@(OpzYt<rRz>Glw-d)F66~@!pXZMvo4bN{dAwe2MTp$}(jXasw2A+`u5|
zvN8k$Q5dAHvnQlc;E`wN)}1`|;4Mt_s(Ae;Pf!%X+QVSc=J)dIZ-2=j-akrDUq6Ln
z73JmSOq{)l8;4J2@XTHgob5!2a&o<$boLhzUfh`N#M*jt4?p>z|3KNS>-cw{UB?TX
z5741*hC#<mWk|^a*`iM|m&Nx|q*H0iGJ^hrd|(yYwgl}b_i#>3V#VC?yz)V#36M#2
zm>=#vWu11SjofvdO$Vd68eLEd$`OojB=np_TO?4JGS=R(u$&!O;!O$k5UaEZOi!0E
zv*bqPC_5@BCJmWO3oa(}XaFQhuOfag)@2DMg!{KtxtL!hwCm0mlsI=7Dc!as&e#Y+
zT~DU5RQs3AqeE|$nq;0!(L0p9oa?g4z+eIC=gAi(sZ@sM!@FqS_!tk|H<_jn-k`NF
zkMbaufh2<Rib_U~9m92N)_~~XoxLXk$;=fuk~zDRhI8GT!a^ZSF7H#wXOUhS&!Jz%
zlr$Ny$Uwe`pU+X09;sA{in0{F16e&73(Ql{)3~3L1J$g$d={tQYDmzl`K_NCbbK{#
z6ZUD$KBDcHcF<&dD_L6Au_1}nd+hXJ2%*c!NSuv4dya*c2!m8f5IN-7FP=~mi|Mhk
zPW4jLuFE8VY7lxQ(H=ml3PK16-&hzBk-(EXfuIT|<6Q-2)eXI;Pq!OBv#dco2(q+v
z=cy{3;aAVLu=0T`IMLZ_^&hn1DS};@35z*(UxX#CB&EuVz|o9w1><Kd<d%&Ovtf20
zFYjs$N_z36DMXfYt%Iz*>Kf)u9)l2q#=V=VUj7N@$zwFM=a7XaHf=wN-`&K4gU4vQ
z&_k|R0EB*`)#k&Ug^<j<>NXy_=XS1NyN+cuhjaRDS5Vqhe0vB4`P?9x;iH*8=StSC
zp3jJ3Rhl1LHa#6j*wxU&K>Jw^9yv+-g&qb58rgrolDlrXfjigV#OM*_BpyQGc^-v<
z3miMujE^9d%Cmd>4tleBjvqeA+2&4qdU_#zH;g9&84Dq3Jiedu=~uCS&Gl?te<cS$
zJW9crVOd3FU<hp|_s~``pXr>Vp|M-@qGV)me(me-q`S8tUkW_Y$F_IgBby!I$iYK2
zwsq6j-wy&u-a61(zm@fj7(Iz)t5$RC&1=ZEoF(f^$PLhu&9L;Ul}xLzWw7fkCwoV7
z_w6@x+g-PiJ9m`U3*EfG=LEOhe?M2RTEz`(R!}7*2%nQ5Y~`PR{yaB*;t{6TmRns)
zt!w*`QsR=QKx918MC(QmeLqr6Iury0Azjp}Vl-pZYeJvuLpiWL$z->s&TZDDR9mnk
zqd)tvG-Yj?r2T)CH+z$cRZjho5PrHRUQar;BKMJQTeeAAjX658=#R?RT$j#u8LOMb
zNSyxo{Wu-!-1!-)7j1J$6T6iD68nr9<d8>HW=Yo_c_p<Ml|iy=#N;l;aMs**3pd_&
zCsWcFIMdXLm+#=M-7T!UY6d%Z?WYiefUfQyCd`=6g1OTeRwg*KX9q341<oEj#L31E
zy1KfN>1x)mUBH{KZx7;2c+}L-<K|m$V8ezRXghY0zH9*sIXb#>EMC2uIa9_VbM5Rq
zUEuCJ*K_k-8>s3&MRVr>yS9J8wRhdiwX0XMZrwGEN_z-T&~)T|e*7>0%Hs78vSjM0
zL|r<wv=L`Xj4!d+WZ7@TUrW`EsVM3{iS`q{X!hp;*10hEB5eq1&q?XWeXJIvM|>b~
zvBQpyy5od4G?27-S93)1*=ToBTgk`<Z-y&%V)W{g6Mc}v!c8S4pn0-lqz%&z*aIPa
zw!OKXt8Q6G+31<fPaWm`Gg+%%aF(j6tNUSeDqYivjS6Eo^?jR2fPA)}@)5P9bM5@*
zXTRW7#~=v7;6Oj;PM@SVTd+nRl2eBcF>>-;MyLBYdbX89zK`biZa&<xnNuy@NGWM}
z|2@jbPG{=4Y8p=*;cQDM#X_F$j#f@Lwd030=@Eu74{Qt$WT_c9k^0&Sw!QEK+YUG5
z7jtyCw{W_-gWO<0&5fryb>aw(0~JgkSIL_%Jjbb)4jP*~$z=y<K6jdq-ob#&;voC>
z?x%X<3?|f8aOU_C8rwQJu&;rdNi*m^v6r169^lm377B_mr6^fsptqA#ryI!@CB=M}
zw)5vW*szD=?L}tKo`&pe<M8oQbochsdj1?uZ5KFq`V<`<ZJauGh&_j!nL2YSr*^)=
zEAJknAR(U}q`C179X$gYRD;L#`3pI-^Lch1Ymbb$rMA1l{yrK{pQLjjN8rDL^Cu6`
zoULH?^ob01oa5NZb7b>H`g^)KbM_pA1wX+m0xR;11&X4Ay1G$xpV`B|{(ciZ*&;&b
zICkt5W2ercGT+JRbB#3YKgjU<siZC(;i>0dr#oAu@#ub<b5+cmGM1K;M>*5dMLsu3
zd-FMt9X-V9E}yb|7p-0WCfLN5f7C7ONnS)Ofa&q0Fpu0v`4Yk!<*E1X7-Pdsk7Jcn
z&6o`dXuEVBM!lR?yR%ROI~k*^)Fh)?N{R8H-`!z`U}*9q2+ST-sx$j(HJKC%N{3~`
zw-%MNTwy=EZ{`r=p<B&vl<L7DJdq^fM9|>RP-rJAh~$7g6i~I7USsV#jU;oeX0=Q6
zU7}4ZW~-`-Uwic<xxQ7`vNy;6EQ(ycsIn0ox4OyowQ=}l2laJjy!6b|9B=DIc!J8>
z=}b<a;px}kBku!3(0%?iJ?UEJOdCtf>65fHpJM;XPG-)TNqMoGBS(%?HS=m#jcMoE
zH{K`jt7Si**2d#p$W@W*KEksvze8`%M^NPSi4#=SPp3AM<K&sM9B$YTH4_=-xA5Di
z{y=MAp7zs+IMF4THEklD=Z<sod<Xg5AQxJjIC1PS$C~@85?PwtyOEB3+!$hHW5A0}
z&dN_MKR5N2oL$JGWNT|osv8iGKZ=IvD=sj7C4n4m2{Jhj1)<Q2hY>5%y;w`JPRU5@
zGt2w2CgZ$Xa*pE<p=IlImRQ!rAq7}94btAX8+~Zc$OfV0V+Y~HQ-^gW4<STMW^Uc!
z-_FLvoa`xQdaciIe*QQu14SbPRdVK>IRQ;cYsL0RLEV-Kza#Zx!7C&<{;n7G$4w;m
zySSotOC2g<CmqSbR9l4Qg*ZIN@5h}Xu}zmM%K;-nFacCt^oS<j$TbxRyz-IU{m55Y
zIi`aj{XhTA`M!e5q}aZ1k6tXZl5uWa&H@7nxH7NkETt=5-no|~v&7C^=Z`qEZHRX8
zaJL5-epNDyE(SzOq8QYpwB&-3b`x8%db9hB@3D}P+Cwd)#P%%(PT+Q&RIl`{1YVSe
zDIqKSiu+1fw`{5XB$hNBeu8@rcyz;tO>lBDiX@B}k5DpZXd)7I8W=^XszbJI(Uhcz
z2&+z;L6bw+gXOf&@|WckrRY_4f}+Nk^!Oy!uPS5365GaAytAssXAOPkUUM&Bx^E@F
z|MB0lX@4VT(B&S<Vb4PdiSt?J#JV<3`YM+m!xQp@9b*(&a6CKsR}W5xh!2T=uCbR}
zPql>*=2adJrKR^aI*o^>60;MFZy`c|mf<p_G{DBZP#MccbVHghhbG(hH88XdC~Aim
z524I8AX7YQ^6g9%*?TIZG{^U+s{VszPOAq6jzg50xVH`TtB;&z*qDqWaaD#j)I9}$
zRBh)4by_Q-{IoTMATD%LM}=0@R@+{5o7P{oDciXhk*!UOaBfDAi=~&LkCMt6qU=Y#
zre^n&j3F^$Cn&HKoyphjfErD9@j!}IE1gb*Uo=mI>d{weSw#Cb8cFUaDwCmj8A>m1
zqA~k0qJ26e-J$5V5>H7vJHRRuq+)eSCo7jH8%)}=E1iU+ajcY&L^n+*r)3i<W68tn
zXh>Zo1j0BSn$=`t6KEdlerbp&O|rBAUNSz?nJ>|g!NfT?Ek=Enu&iyz*0nGIUy6MX
zEkoBXN)0*;-AMxhHy%X&#_<=2?nsinRJt_{;fqNl2|ST6o7RZ$c&Rig0fk~Q){b2t
z>FVQB=uYyN=&V;#Ojh9Y(tUo2eiP$hRL>EMjK+f@Xi1{M9#UUw{4GIBoS;%=;>5)3
zsFN@cg(!)HtQlO30a^EYj3C!3BK@e#rDPCiDmnuhXYmr`S@MYH?g+5Dq`E=>o=&|g
zNZCn5B_a!esj|UesV}N~!3*?)O`@q(u%}CeggUsy5`_M1zv&ohqG?!1M(PAad1m8k
zB$?{AV=_>V9MI``p8l=B8WF2y?Q75hUAc-wwMO5O!d5?S3Xy>4g<m58yi`i_WCH+I
zLj|^IAJP>P#X=!`z#^D7R*z=~?J2pNpzav0=aEXK!>>+wVPcHPURuF67KDWUtB!O!
z9oU6u-tp$8QlvwE1Cw(Bm<nJf_~OuL>)zN>X#f{3tFMJdED~%<B|_aEuDq1qmG))O
zpkoI@MB6m|>-WYfaFk?ysxN8Hc3Hk-{3Y~6sr=4b9!ucc-HT)FJDQBOS!vn0`(SL}
zwCz%jOf9q2<(&j)0V!ppM(hGRfd*6?K5V)L2JkpO8wgqxq59WVWgsveR1@-#21Vh!
z(QNu-^0F0g@g4*7)_o`EMd%LVqiIKl=P3Ayw<Wb-34~SF$j}C<G5e5~%(d;SB_~Wy
z*4Ltm&`>2jFZfc6mQC=b1slK_`IuN7G%V2o)S^?98|DEa+<nrjTq+gB9u+T4jB@p%
z@|1e0L0wD<_lr72<z)k`5&%y5fz79*H?2=M-UTt=XiS4q>9%7AYsx>Rv4e=M64P_o
zj*!|f-2J{r6-4Ma{%855<3GwVF@~vGHhInoGGNKeE#r8<6L0Lp45S@?ReX*5sfy{4
zXcL;Ei0ov-NsJt6D302#$#0=Esi)a(I=ld<2z^~q9F$Gf)YknF5sW+(c+&=93*3^U
zF|33Uw1AcOe}^Ca*~eIZ&5c|>PV(OV;}m=gO*?@rk~H-n;gr*0(oTl5$by@`$bAco
z>^#_{ql!v0agb$>vYM%U^Lu~J##`62{>EjTc>jI6YZvn$zI+oqx9ue>%|=5_kud)|
zPNVy=Hbv@N!HGX?8h`PfPjh1TPP(#ECv^e%nQFfIfBtvwm{DN!hmCyoKm8+j&n@uQ
zj)VACpSFO6c2Jv7VdUq~jfz@UTdslHAVc~^Zh7=C`QfAYu=?5?xpCPv4t($d-Gjax
zq!kYl@N58)xi)P4W&G&x{+4wsmvGB%H&fAlnj<ZP-2A{7`Sw>oMc<+A9BUuYf!IDY
zaI<Vp;4|&2kMrn_^=#jJ3}5kyTR%2ZvTSImrON0LwNpo4sJ$P?-poFah^<6y*afUc
z89Mf&{8G4o6%kmxM08{OvhK%f2i<p+HxcX4wLVKGiXZEmAtP|wM88Oosktu_WMa4P
z>Q2x0*zG7SI5NUHw{2IoX?Le&CTzOxuxQtf*+)F~*fs-%@=qSkN9h(0FQsLeG+;s7
z$51k~BqwnCip5jAuH9y&U)vU>l-&8Xf6I*C;~Z`6)->62)wb&U%v`aao92(8;b<dx
z5&EUuzVzYdPk)mo!`nI3J(BNy>tRmrdXMfxI8#wJoLe3Y;!JPDR*rXM@hrL{l2=j)
z^L>#S*M5@EUR%ez`;MEh`{+Mmth4GR=}_y-k+;hiB3>tvCUw6=d12FL6y)fI2;b2-
zrF0w}h&vt@I))Qp%v~M3qH$IalpO3}t!5?Ixhf-tZEGf{se&e6+Vn*8XF_}l`xB9C
z?jf825X3*Jb>ZeGyPDBl$$mA_F3(kUbNF|E`4`;2?nV~W_OtE7Q~H5SRVa;`z0pY}
z5KO6QWxMAPCu+edFzIE=*!BF6`O!0n`08K(E$epeV%xxEW>z%w?tv!Ct0%B(UM*Yq
zoMHL0d3fns##Qz6`WtW4nH5Z)dj%^N%_P@(j<?_5PLHTz&6?%ZRtlQh3yhR4yuNiW
zRrL#4JfTR#?%)+BNbvEPzF-+E7tNsk<cDm2w}Gr52@(kAUv(ShCtl?Hzutn#WGD^}
zA||FNuNuYG>o$-nwDQL0o%G~A=3c&>#d9Xn({z+~cI=@yTV&kK#Y`FwbyMe%={n9U
z@9d$NsphKdu48;vmMxp!qNyuOs<MXb)?9~I6jWE0CmM)EP%QLOGhr$-W-Vk~b&kTR
zK(^y%T+TI1=g@j|4_o&hVf3^`Tse0dBG-kisHSQEcJ`iVVPyR*uDj|=3hl?)yloG?
z*#hHcEM!u3n%YS-DepPSn>$Z%`IWQzaMvz+^B#*<tfc+GZW_A>wfvM}l$A*~{q|q@
z$?M1X(!c$8Tyw=#e*M;QW-M67ipwT*YX44lHJrlt%2;^Cl~9n(oil|Kd$#c2k@GRf
z6(U7{O9OxVmw%17;4U7!aV4+rJj4r6{DiUf-z1$1K$?`08CJ{cYp!8z*#KKNZ=tbs
z5aFenwP-m@=S|}L(LHQ?|1ctzrYxO8WU5%cVmVFw-{nllzz|Mo_>*A*jkufEh!d_v
z>{3|D1t`(FBJ3!U9tEmPQ)-hNwpD`Mq3VakyFuCj?Zm_K_tM=(&|CDbG=r(&cR~!g
z80~I@B^!t?g%5Rn6cK(@dWY^eVF^s=e@fR=F)K;bABDalbR7}2m5?u8S{Gefl90}n
zF>cOM?wz-c{X5>~z}X(ISh<|GqkA~h*3Z-hOBvDENOsf=?z{I6W@lQ+)J$a4ORsRY
zCr|y1MO?FVE^Wv5^3MB*$x9GYQa<)FuAV0N=ci8~Q&T7(Hj?Gn-O3WW*t~fg9euq#
z|F1t~?D%hm--Qn74dTqTj4kWuowqmB)RhG<&FsZ1STb)SO^4rS`}>FSQW?rJDZI>Z
zR;*Y`<NlpG&RnX5Vl%LzLWg*5nO!;27>DdToUtl3h&6SMzU>$hOkznZ?j!&K-AM8l
zl)!>qyLV?EBg{)PFGr+3ssAx`%<QcduM~gIB#R+f*p)*H11w`^<h13>h>Y!rL@Y4o
z%(`3stNXUF6uuHtn%9ijomeu(m#p0Q5QDp(;_(lAdH8EzV&3y_aj?fjgJ`f;?Nog=
z^0P)!)?A^p1fhiPEUbie{9=)Ou9x@rG_h#m6#9!*JoJgX8JQ}w=!OSaKDmlQMJ=Cx
z^s7|&9wj&BT0VK>Vx;h>9a+WcBM0aiv6!!aY8{!XiQISdA_{fOSvT8b(Ykw?T2n<|
z#~B(2Mzi|zDWTwtOj&*x-*{*(XOA4BIHHc>Y1>K+Kdt9ZGw1sI_}HB{xlam$001BW
zNkl<ZF?m!41>XlG6K7mTX0V+(H-DU4FP{qOaz<5o96#8=uzBnF)cR!z=`-nyb$t7a
z_tACwAWfYEc<FNP{qlEMHC}SAubRic@=2=8Qrz*`Z?L!~k5@gBiDRlAS=zFd0~gre
z1ov*do-_OQG3X0wX0GPDU%8jgv!}T1mQQi}k||7Gyq0;Rd{*Ca59R4{Hf*?o5j9hI
z?CW2oqNj=aWq0xE4J+|vk;#|ez&AgCKmF$pbFQNwNr5?6-@(<h#*nF<&4U}RMfiC`
ziEwF?zE8Sh7&Rj*$PNrLZP{%+_VKkepKapmyFbmE%f^9M#@hS8#DmvP<G}ud4Enfj
z7-M00-E=lS_z;iWy^;fa_A^+Ju00_jRl)6_`YNj@2s$%U_{P^BraCQ|ef2$j<(`$C
zI(igQT}M^8A}cb(xZ~k3vvjPattV?q)y&C9r_R;VS(HXI?d~o+N(f)2A+`FPB|ts#
zZ~q(eW;s~21lBT@-EN$Ra5`Sd5a`<cM3ytc|DEMV9Xx_IZ7oNw5nS02ZClGpxP4g5
zDy6P#UZm9g+q~=cRV$Y5I8t$%OCJ3?ax|m|al&p_N~e37P~ZHH$XnHSmX_Q$bSn`%
zbo;fJTN3EE=uV=~&>&0D9a3)ftIvh2BQrxB@{&-;Zcm-tv?kF9;Cr=PaoHG}E_i(L
zu}7IQQZi!ZD(<*y0bZt>JMX@Wy7D}YCl0Xp;0aD0*~jLsyJ#QCQ#)fN-}%b@be%iJ
z+;yMej^)!(7PxZVEu7u?M)*xs$>?!&sH^OwZ0gl~@xHZ`c|KD5j^0G7oI5`K7%S@~
zotbHT>#LunIs@~sdw@spT*|2<$4FJzQJD&DD>IBcANdkjjKPUBA{rN*SP+-1k(0@p
z@#RgaLW7-IW3j=4H%@)!>k44Vu$W!n_(Af@g&Kc3pOaFE36tw>7@Ihw92?9FjTaPH
zv3XSe*nX`zb}>nWR+}+(FVu^O-c+ZtpF+z;*k#oJN)VBlZf3UK{u5_Y)!@h;z()pA
z>W@!3^QPBw;=%|%f6p?GHW!#aX{4U{Q->$@hYjPXUh3=C-T{pOqbNVTAIS>EVv%$v
zP2b6PIVNT>chWR&Twc#>Tla(T=x#mAuH6UN^WjPACyvDv9`&=Y<j%V`vUb^J%$hNk
zln0&d&73-Sp4P^bG!ICsGimYzJ)Cd7fO;9XEJ&7Jvz*;8J<B`qz0aoC-lik3HenkE
z1<>@t%Y64Q|DLW)JzxLfUvblddO&dg_#QTI*}~iJonhSA5v08o(-y8|<HoyKwdgWt
zO|2&-B%Ua+ck7$HyJtVUKRigmtLOS<6Y2J=sIN{le%4~@swc2yW;rjv`4+Fd_9|!E
zdbP$(Kn#`Z`5gZ65H;1350AAVNHPD)m5dMrjH;iAM;XhP%*7*5^XU_uYi#Az>C;eA
z#rTPH8PRu`m)_dK^UuG-r1^8H%y<Y<;KLnnvwio6?AdpS{{CLx+4dnzu3pKaB`djb
zU>i+61(YKRHADk^S;5U8f0Q45_lun0@eHr*Zei()Wu*NaqsNXVQ$C6-7S2RUNio~X
z>o2{*{)YWD9B(!j;f%}#xxNb=*!KZ@8%{HE#&pVK09%r&7|msq%lN~KukzN*uh27m
z5)<o&apj7|Y<vD`cI<wix8B@FS3!agOE-Lm2QIJT53g=#Aa8xeA@;q;$X_eFY6^wf
zZxbC+s)0@D<$AOl)EB6C2I&WV>^Ox949cfW<3iJH@1Y4Q*)*upu^PSEGhI4zphv1I
z38;aV5c*rkZfJ>sGpzlb&Jb6R%Z(g0eKHob>={6{Two^+SLPwH!q69VtzwhA9Fi2s
z?L+I&PTJP!SdElrC^Dhsq&mjzB{Ja6+S{k^Mfw*?ToKT!g>GgSTp8F&)v^Oy7^aH|
z-$uW-W|P3PURqUl6rsUM^0u$&oMC|pO=BP#b?zFzOM^!bn_hmMEw8=AnOrqfCe-l$
z=C>Jt`Eur7wv;jXlk7d!L0@MZ&8-*cYH#NBnMMZkF!%BmjPwSm9ygH`<t$q=4}l;(
zY6ffPSMcWc28y9_I$I9$#_L;n^`%YJ&X~invNX{nT|SzHlQTT`(rdi=;!E_8n##DE
zN|vr#$UA>{mL0oyvuV?Ix(hzimn^^YbKJYAf)`%5;*1Qzlx4%VoRo|c2WH_fC1dH>
zTX@gPTMC#dcNMlhDh0-DLs4O_YuK%A*Y<s@e>*oRikx_mP{*+3w&R+i0|rf6Hf`H0
z21;f&2GI!DK6m<YZBX5J>{Ht3hF8-<Xycj(Y3SCVTLuG(2!K|;Y?G;Cu?D9x;BK=Z
zLwgXUJ;)bx2t10uAIv06x8HO?mx%IU@U@nA1RbmCU>Q-hWsJq5WcIv?9REWjo|osv
z*Z1+oM;}FYe88cmUZ@7?7YHW7B*KHr>Y3cWWElVc`+rT@+ztHS9~<WgNJ{jFv;lly
z;t7E-B?w7>wn%+t2I>3w2)vXq2CIk_;0a0Nv3>mR_<ouSW4UJjMBaK=P$=Z_CHOuF
zA*q|bf-7s=_^bc&3#P353ZI#s3W@O<7#!5_2brU%vxCD28))t=(6Hwn&UAE>^F1=c
z!}C(4(<w`<mi>f+>pruSfB(bHj9an+kmPb%&Yy0gp`n5O2O8*UYhvOx0dSJO3ickw
zVh)ihCnY7RvT`UC!a(aYFgRd6f-gC>XB*<yukzq*pWpq%vjG6M0FMeXo*3k{Cw|M%
zUVWc@u}Golk<AWp?&J{;95}*(gAH70Ifv&7WIjiKu7G)@*(6-sz_a}w?Ax=O&g1=D
z^}PoeQ~4}CeaeT_$iF~Qq$y7c{8XBZ5EP0<26IJfD$0TfH6(Z*AOr{Ac${q)>bZZz
z)%?G|d6T@I027{`BMiLmu*uk$0aQ+04BJB|LQ~cRAq@`INH_w*?6&M>0%6_P{v^#^
zJNRheAI3NZ8l1<`9omiGN{d?ikTy`T>gs-V;<t0O8R6TBN`Xad&M3T&wl8Z(i%C*g
zigI1V@I^%K%JtM;3wX$|(<rwJc-g#RiOw!#oYRVVR)R2z#q0@k+Sj#(WlyfGv^=e(
zW%>^7Hwob5mXFTBxrx#wOe0N`^y2m1Wi?CNh98Hfpgr3+l#SZpP>Ku{Wgb2r84vP>
zBHgF=ain)WpL^ss_B{Vnx(5A_Gef%!V`1}|7848Gn~+j+<(gZ$@WET0ypYv0NvFz5
zr-IowiuoWngb7txai&0;a!)`iO-2M04ud&KRYeBh_mN?o5uRZGtG{A*-z4t6?JEBM
z@l6yI&kk@lHYhFq=?p1^=11#W`<K}ckT{#F{K5kK3b?D-gk?A8brP1&9pEG#dSiRF
z+>uh)ycHOPUB}Sr#<?h<wAc`Fxw$g4X^1bani%5FfT{BOuYIay6y;AE>lTg-FmhEz
zRZNjmV<0{FQ@BLN`HuG=AzH-;snC=}kmY3a0F%5Xe)QAheC9hV_^`QK*HulWmC}=D
zADkZ+I#x<BMi)4TN;sb9vGk^o@%=>)GA8>j|M7!nKyYZ=YmEBZMqc|bKW4xW+DN5?
zr!Me<rxbJjEu1S(<SSqN0%F7@GQ)dFdnr<03eQWCk|KCgBV?rU_+G|te1NYpjwhda
zjvdcG%k_^v!k^7PPOjL+lmGG?&S%|EU`Z*Mc*RY8{`RGG_GXzmtDfKd=p>T4q*DP%
z^HM2NsTAF9Cn?n3#-oppVEFjCR1Wl!@&v+51y3x60xD)(c=nb3+;;1&oNph%>u==e
zXP@Df%_n%|%a3ukr;-VyQbwdoe&O#_I*pJ;`uYZtB1I-$WdGJzxO&MWtiO3Q#Z)C9
zzV>VSyx?itR630(f+t8j&+OxH-%Wh;&;AV+b>rFf%s<heCy-Z4(HQ6|*W1Lqhx_^X
z{B|17T!;V>lYqQb3j6~3d;#AB&nvLwjn}y1kqz8>(_~1c+4jP(XztC1Pt$_akhWJf
zqadiAw2;q#^)XU)Q|LMT7A^gGM6t;6Q?1<l$uCj)+RME7_WNvp_avYC+PAr7u$;kT
zJ2>4wNcSr*^NlZlfp0H9&fq{RPyGBTgeS=McJT6VU+2qz_DybEc7osUJQ+!NR#33$
ziv=6YK4Si*!JGQ+?g5GZMMeUP1va8VT0~B)c%UyP%{E2rBnBMSj^@j4#|`4~hNJp3
z-6ViPVFy+Vh}Zz{B6ZB|2tW=4qn+fOB?VjGVZZiocZLvRfX4#W?#!An{D!g5mO^kp
zo}^T2ks*IJXpi>c?90^hF#QGgt7PQPP!N^3D_X5H8#ssJvf2?2|4|whh>Y<`0zVOX
zt9sfLWSZ;ld6+TRN@}|ga`apm9(lHGKg@<JSMlS$NA(QE`7>vk_NiO>OllObJpT#}
z+h1eNm7n6an`<bfD%t<W<23dS=jNq#JpK23!w2Vry2EOx@QE*c4X<tz4X^)_-mH(1
z0~|Zq%H0otiK<s$;l<7G^VYkkc;vBf)0V5G=g=0;UC7e+;!Awvb6?<FSDs>^|2$9r
z>S=`Mk?rf?g(u$PD}VNN)-OB7b30GO<Rv4&&AAefu}j;A_3UiK-=jHM#}M2@>DnHI
zsy(kJ2CDol%{yVEeqcAQbZK9<2R#W#q77PYCem)lX~m<Cqo8qE)E~61g;yg$H<>^c
zh*extRTB25IZWWvqHfqZBK#+aw{AbN!-`WeAGKxWNwz-sEI<6#XQ^J=!-!Kav$rJ}
zS=hEAX3m)_%`$`iOo^gfYH*Jhb$~+M;ABPvmDP2OudBw-57OFvp5AN`Av}iFU(R2B
z_b&d+_x~ejdkTnDIpfBRr0skwqG}Yi6$P3*`WRL{nsK8?(9_XDs-m3MwjSzg%IO*u
zRLOq)%4)Km?F{CNNReXnxbf6f2+p2sWH9eDqIL{pMh$1M_X5qWofOr`Ll|XwWfhDW
zQ^&CK4E^2hv~+YMQ<aRZDW|pN0%arXsFb~Q^kf-1dK`5l%jvq%MLOfr+SWn&h+3)`
zq`kXe8<j|vF=pI2hLxx2?dqWILJy?MsUKgD$PZAIMLOEr$rS@d>yf-jF}iLv1D$R3
z=Y7hmMlz}bT06R_7*Wf(x>3mdAg!$}P(G4Op`TRM2>Sba88NJa3+<hFl_RMiTZ_#0
zaK5FTd{I(4vW6;=qob=&>o;97l6$`JEh_f?hM&E0FakcLw!IOf$CA!=(;0rHLM5`{
z)pd-i9YJBRkJi>UvIR*^T^)m+?eyo1v4F0Ysl0Rr<0np_GA+nw2WV|+2|kAdq|1j<
zUq2R+?WVE09nuwy9XF1ORDqWB&Gcmp2;niZb_}DdtLX3UptZf5R8=*j%8ImJ=w(<<
z9hI`5w$45WESVW7+s92nuxEC3@^$19rg8OhSM8tfsM(QY#8;gt5OHjqMyEYRu;cv3
z50fqh0U9Tu4}C<I(QUr0#O<^@gp6!?CdpUWHh}>BGEuAqWzFQEmO<^9XU%FkBWO#8
z7LDqN+PN%o9jhM|(`?9;d~OAFX+c~E;JfOK1An*g<oiQhS2UHpuVfNv)K2ma01jij
zh9o8AWdv&pf@Ip&Owq{EW2o>_3>#j~g_iSl^=0wW8J6Dmd2Xpc&42yz^JJwJOY7?y
zQ4Z(No}(}CQ#pJT<LYXVg+ba{S{Ock6*tZ*@Z<|S$jd<XB3(vZ?I_aea>~3s&F5Rf
zuge5+X8icEFnEzTGkPqeM^=V$ri)Zn4b^3N+Aj1lyta;Vzn}KbKI_9RN&XhG|5AF^
zdQ+lK)}P775k0He7W${LuTak#5b7u%)HnYG7OeeAB-Aqlh(vo)byU2F-YYrSm9clv
z+ThKsrkGj(z-%KcEf^Y6^a{6MgmB}JEqmn~N|x4DHzyG00k!g>{2c8Qs*H{LjEh+P
zM0_U6UV@t*HDkt8J1j*@QzJct1xtoutr@fDNP84vBT|!KEX9ZS?ce$f3z}Ms!jQ<a
zx|w|Sa}RR(&Bu9RNBBwzWTfOrJE8$uU5k#iqa(v;9m9D7H6<>)MU<v4ou8z@A1@@7
zH-7ZPuB#j*RmPf)pXSP%c7Fb=XK5QK;ySE*9-|W(BgaW*CE7{SNwj=&i>6%E@Lm4Q
zc@l6*z&`FxOpwy&#CeHTjRvJKuq1V+0|`<2Sh3eeG_iivk4W+zinj!24M0g`C=MKP
z<&oT~O@sM$X+ZMghXYhEK_3ncxPe-gQ@Xg+e*css$sc0p8p2vhr4*e}!Xm$o8i(M(
zx|Jl;Qa<47!kCNOhLx28+a4l<VeZu%xo6dQp8n0RIoQ-aWURPYTo3Y9JX{BfHoj#1
zr^J~66c~3eDX&X7cSM3EUoLKcZu~Gm;`9VOCv9hN=tznIHXd+*Vl@7_Wo_FnH8_of
z)za<8X5vfGVGpcL-@*ba;gJ{vOpJ`q?Arq=4P2viVChZKq~z^<F~I;<4bP*ww}j2d
zW-4dQo+E9BZB1wt?%<8&WOqr21$3Nx8hKwzu#P86XEsCq6~?L6s6Zz$Z4ksej`o%S
zY!ox9dLj@gAty)CNyZ!zh_qjFSqi8mI9Y5a1=~^FWFANVhC_QVSdwwY+8~NU4DgGN
zIJ5vGpaw;+V<HArCOPepf3=W6`HI88bzRwp5rjpj#a1kk*oh=2tFTYwu|}pCevNue
zut<`x%L7K12|9J_THwIeW2|mS(+~mCjr++#RmoPnNK$YvL96a75@?fk8%caxenvhf
z9>ixo)ltfZqWqWg#iZm@vX6_<VcHA0v&)ofBT7rO{H5@E$$wRB0EZ5{Z<S}1l1v-Q
z#u5^(BNJ_yVFj~C(12VV+f^|$9w#iHF98T*@jVu1liDo-Jgh$AZ79Ep+_U%zNw8wC
z?dvu;LI>PP_K4-0OB(=v)I-Iz61n;#LDC`muz=@BY2{J}Y)G?G7mdTo6kTLsTSA_f
zO23;zqjcC&RgkK!8~wu|UkdA{&f*9+mm0za&J%!921bsV%>3EYsjf`X-QA1tOYq95
zsjebBkTXl0PJJa9!Km6%6b1*eKeJvvb_&($EPdIc=}S3^2p!aVd1mlCaDdU{r!jZd
zREA|F-MxKyRW(c+T}4mtfWi@pLbrjH2>S^BSA<C|BwaRB#nkbm>Fw={BpCsza%Rk$
zMW#4FUsf`C`b>s-IeG_j0j*IP>tG^WzHBu|oH#IAlaMuCnit`Kq%o6cGk4}R>g#IA
z5A-mQEttL{{o1ssbF5$`fK1g$=FXeXl!^5WFB5e2^g*VIIrA4Xam)yMIxmnf26Y6k
z+?9-pusFP?p1LaN8OR$NaOGa|uP!ZOb@hEC8iy_+bZMM<FXiys3`|)i4?{<2KU$W;
z0aaReBJ3kVzm~5uHDO)T<&yQ`%E8d%=oDf4v}B+XNVKhJJc;^^svB$>Fg?fYSGxYH
z?rE8Z@^nI$u=$f(H(<p&OYU|sb>akuH&<8jzlpMpTu-VyWFHry!_XZsQ>qQyVUo(*
z@heVm9GJvpozS-(b5&iZ4~)Ikj-O10pJgy#)Oykp&!SICNv2{LBPvtmas{i+P^PM7
z%Qq`)>KRid=qm^&P8dtJx0j;oKV8POISZIEp_cxx3k((vIJQBlD<eHJrlNWb<3^_G
z>CftZ?RcCdOJepRF;7`X<1<>mh7NNcmA&0+s0us!>$7AVi5~{!h9Ef!yF?}mbRE^M
zHE=SvWc8_mo(q(d27C%o>A}6dbt^WYaQ!?w@DWZsdgP!71vuoHxZ$3-poxgQjSM4$
zyU1X?<jf_~?4MQiYdkNgn>d3xQ^(QQ*+I5w_Z^fEKD(vmYAtX|g6q=oZuA+JQZYvA
z8%PNwrmf;X{P|Z{vE)kby8kXF4J*f&Masrs!IwXFBYrN2?+2|E3k7ny9Qi^atnX9w
zD*3`!A7z*<P$&eSWbumyCNI2}%coX5l=xx&pj?<3>=M4uq$}6+=im4Y%a<<Wu6u7|
zY<ZdyGgtHF2i73+dGg_&38bV@$cOY4wL*%;0>z?FK9|?mrS!??bL8@c;OQp<+Vi;_
z{>WK8_St(GkqM=$g$2cmu{`z{|CKLoSi!KmX?*)X{Wm_bW)VV4ip2u?TrT_!P53*X
zC!a5p&*v$IU%l}CA~}l|5ctL56I=Pby6#gb1k0sTN(u!-r%JHF;c?sNzR82@R<mO5
zo&4a>A7e^QS<rtdi(=77hK5=w6t&KZ`mtAIyi!VPrd`3ezx^eaEL_aTzVvM#yna3@
zFU81lGx+SIUt~^gxt53YT{+5--eMt7E}x@N^pObWTyq~EUpo(^Pa$74yg1enx7GL|
zY+tnMMk)%Mr=(WYEQASy5SX9+Zn$_2E69elgfU!)2Zt^h1Xro3)H;j+h)ByxbjwKT
z=+)?58I_SFw0*0Z9gFN1MFKC*xiDp<TgJ%4;!0X{S^P<<qftEZ5J_BiKT<mXmn1Y=
zFLF|{4OQl%_Grr5?Ib4LBr=_A$xD&;{fpg;_iH7LlC%g7=IuBTOwvewf6d1~&(+gL
zn^=aVSSSVqe9$qhn9nhN#d_|!ZUMP$F7$PYUo3{P*$-_npv|Y8jgNeRE2q`sjhM*i
zKK}{oQj&ZaCxq}AK6)bef9^}Hm|CsnCZ$gyALJh2%6mb4%Lh3HpGiw@=aCI7NE74-
zKk_cjM0uz_>}$3uO6kOnOVLeop9#UtDI-dnpgSXP56%(;o~2`DC)N;3fz1fu)aGab
zH3J-M(xo{SYS3r;j)KA@z-147>={ursI+uok>Iow2qq#YTL{GN*#OX{&85lsho(o<
zAwmGB2r+~Fy1Cc$7yssSEWc(0Kls)s$@ta;R@heBR#=eRBk0s(&`b}Dq^&!es98&{
zrRRee_{V?VgiL2h;Zr~NT0ZlcPjL0DD!yMmjh7$)1v}0RaO2%~vvkgQ1}~iD_rH6Z
z=G<5w{>me)S-q0s-}@dPY<r2P-`>ZhE7!5{`pek;^y`{huVOSC@4ugU<41Dlz&kwk
z{8k2v%HGt}x5wfYD>%9J8UE)}JMqfO@O+<Y1l8l`^2M)zm*Iue{P<_ja6yh><J}vX
zH>sBHro%k_?8`KF53*|g$C*CDXY_<A^c~;EuU^<r-K-Vdf5-Jy6uNl%>EH6f=?m1)
zS<T1qyq4ZR$?%Gt7NXP!9>}h%`3%D+O=sy9b!0mmDfkkvY$UhdbstM-)p53A2T#4Y
zmHBHP;Oe>ccnpxEl0$Dl%^M${XW6=YxN+Gm`pzHZiKkwqrF)R&H$T9v8jn%+lNdO$
zgJ*WMaOcenc<#5)(VME}!TWAz%QL?XUx?|Ve&G+W^`&Qd`okXn;{W;qvnJNkm@9I}
zJ@+tg;s_4!e1qpVy-%UMmiso|0fiz{Ce-lmD^Ia`e<Pmfj%X2r&NKV?^-q7ooV&ik
zy>q7^-#WmKEpKz()e8gT4rMcO{%Y>M?OG}d7kKfR-|^w84k~IUu;K2F%pOz8vAvsl
z_LZI52&*S9;GP>7^5WA^)7(>t8MI@#35WKRNTH;uad%{gE^jXvsd6zI5<8dCXkj&y
zR9xzSraRQZKMwXZ`|5rqX(ZDoVrK?O*^L@rr6oGO1f*o6?c5}A^R@$1(V;}dPn{hT
z)>22Dd_kn?M1D&x*=in=%3)a3vrJk>X7(l~TO~JD&lsH}Q>UJlB%?<uU>8;aIYgo;
zA(N;nmbz#6E!1yeC48METsoBCR4fjTG)vaq%VirXIJotBp5Jnmn?LpdExTUiy<;7$
zT>l`$n|9MS@-iNI=q{$DIv71;5l{c>agKFKR^M_bYc8M8h12_Z@|l<E7$_hl)XZGQ
zyvk<&w&5(Yayn%r>$(54k5N`J$RD13l7@4g?A^YZC978$ei778Uda9T+(J!ykWGL1
z9Xk#+k*ciawvG3&Xi5!Dhj;SSbDK~gw6bm*53Ik29WOn@(U!g-h6>00V~6w9q!O0E
zwK0PqLo8(1vGZ<{%?ND>?!I1EA9@x@<;8?HEwi=cq~N={q1wTGbwDwpzx!!ofE{Zc
z%xMpdgpq+0SCpnKx}8y!ZR2Y25YiR(EA<*BvjP*D&9k^w`AOM((62=Qu>qUqWa?gY
z2GG`p(T^s|sL`IiLn3{_RqNMtWYZrwenBwy-bYwG?iZ*JRH?SSfQ&#GI$$XRZAX=8
zA)2SjTmVR@%cJjYV${k{@SlG8=iIyYa>_i9w&U;d^Cw^8@ZK%__rL!!d(U1#$O4VW
z8`!e>ZQ3j6^QqfclIw5eUw`^@4jenoPyg{Bd13oOB+%Hfm959KOd2yR=+F1L?!hlJ
z-#f+6fA$OBJ=o{~Xk$c@4|Z&0-YuW!FaQ0&;ntONNqa$wnvvza^}<u+reDv>X|>?z
zIC<y;HgDcSYUWxttegeXXWZPCtejfLGmrm*S9c!58#bQLeeQk^zWx-yd+Q_*eBy3K
zR#o$<&pyC`H=kwCnJy~I)ij*8ePft?O&n|+#oaf}<<P+s6nt28!+l&kInOVC`fHee
z9qTTi#F)u5xo~h7b<=0EXa7l7ufCGe)0XnkO>=qiiC@z?aw+$%S%`;Z+}!1?m{!3b
zp7<rN?mR?qcN<lcm#}1d4P)o5V8IxV&c18}M1%$=(oC2(lU3KRWm08<miAt*z58Ki
zr_b@zpFYL-<+rh7)@Wp^oGY%rfngW+@$+9k!NJDP5Ld*3#rS#G@Vy`WkS}eR%RAfm
zQH%l-B&lIz`Pf4nIlkpNUOza<Cq8}`Bg)gPz4u|J7mo3>U;KvM$C~wwP|c);Jo=ft
zIsV=|wDcBql5_@+p<Sw38e8tlS)CcSgjtlN*$h1c&ns>X001BWNkl<Zg$>%=`zj#V
zvk~^W9W<l$1Bs}pZFmxHfkd#WYX_a90+h{rwC^|r)}>G`P9hhU=?IG_J9a?uzAKA%
zAyvRK`-$w%Vl!EaUVBDI1vf1NbO)|JrRLj7g8HwSwNvNL%n>$lblWx}a{-^zp8DV7
zO$TvZeyBEubKkbPXmX3r94of%B^&2s<~j+5bZH+kGPV;ZR$C@Xg^4J_YxLY|BGgAn
zyf=cff#W>>yG>kk*T<MM*5iC%6*pdWIprh9v1au`x;k2E*!~Jnzq*~BZ$HmJ{`2FU
z>KtV9<u`EOs(POM#m_ifK97yp&I|K%nzgsA;lsDzq$7M#Ygkn^C*FUR%}3#5_pGBb
zMa+@B%rG{7@*(>6y~)#?Px8R0KEaquk7c)il4YYV@T*_^imm%k>si36x*2@#a}Uvd
zco!$z`i)MM%VB)S){&NpzykKRY*e|Rvb6m-jAhP@7N)*IMYesYc5NRGZOgK0VG&|x
zyyI~X1;8|4VcAgMLN{uGJR7CS;EEc|I3P2ODdu^DNDNS*N5u||T$R1r`!dZS%gQoJ
zM$tC3ZJTA`5J5XFXwau7uP}Zh!#HDNhbvDkYMj|O<wpugD<EtgflSJoI&K)v12x=x
z%VN&84>Eqt2uDT$Y0ac&AOXSZtEyaBQU)Ut3)~`+^qktocfWg_*>mQy@snSptnUXr
zzT+%?{e$FlS$cYU$pK~6^=!ENZU!5VAcqg5E?o_>NMBzcg+hV8-W~>W!LGPMKFdHh
zZ<OE*7B85<mVfvkoIBqJ=h2EcvnK-K!Pz~p@V)QvWzM_>-2d^fL-+UDf#l55{Tw@a
zlBuV=88tG)u)68ob?5E0o<7cqsu7H+twI2v-^;FdchlJ10s=-(Sk1f{^%QQ{z#>sa
z{aBCT!|It()y>Zi9jD`bHJvL~X;~{;v}jXsVDBEP{3?zQPG^BEW8VC^)KATF@BKB5
zoiv71<HvBin5DC$osRY{+B>^QRn${IVLH8MKj6U0GxY5|$fuW2BqeqsioNXK`99~)
zx9I%@@9a9tEvv3(blH49-1a2B1+~}11a1M~l{5E>Wn@pD<6r*nZ`t1{`Pzb+Oq3No
zFnk8n#*d|`ZWNsBqyNGw-raqO*7kuIoY1xew4L0;&wl(f>X)zQrt4O*>BFb!i<GY%
zK8`xCgI_irp{G2Jzh(oYsz)$uY7N_e@-F9^E`)aFAw-%hZ@7)7L%aCpsTO?8kR9h%
z(ZLCB(WOxXCfs=F&gR+o)b5?3m%A+gi3Uh}^ri-o_J~$dE#hrQX;V}0YX6)KnoKY>
zdv~H!`nrTL$wCF{AmpO=Xz0kVS#%S<>evM=7WlBFXLR5OOY^z|EP1HlZDgsO$xbE`
z15Py0IynkUCr-8qiMIH#lB=OjSZzBTTLMu|ZP}iZm$dB5PIi_YZC<p|+3Hs_91{Im
zJbS?4SgHMTX<N4r5-h<>1Vkt4)AVWimm*l0t{mPRSL|W;&V8Ibbe_h6d#S6dX8ZO{
zY<z43D_7J|-nyTI=X)sRd<F)y<h%iT`udR~#nf5zsh?QS9rxYOh%w`lq8`CMhD}_|
zlF{A#{hkx}Kng*3`w2eSf0Xu7Be{FUDk{rT^yOW#rpiV!WmJJ*?rq>i&w0A<TTSiA
zD(23e$gbyp$C<`P&7brI3$DJ2p0f?SbmA-pUjZ8v3$$Md<Da(312sqN&w>@UJ{{nu
z?9Y~c_`+f>0DC}$zgij7rlCvUcH<vbU1v$uu}AdEoVC%)Y2QJVQni7`2?4oEHc?ww
zD2|9hx<l1a+NQ~nDK}0^4XX6)n2iHSsB8JBSxK^+wBxz)KeyXRjKUFLq^*RTN;j5%
zjhG~bqQozBai+18)L1Xr+NAwI=sPVO0lGL=O>;^kMtxr(t@q@h@wY%wHL8|!e~<%v
zcT&IP4(5*y(4+4c$&{Cq_5^v~XVmz4O!iLky`TMtB@a$x(RlSW1c~pZC{L#ldA%q@
zE|;Tf*l<#wKnlsZmH}qZsAv1xF3Kv(80hT}HnUn%mQpaXb~IkTn-6zwXYN(EGNyJI
zbb<7Zl7x`d&ssux>wEnCm#?z^>nph|9eym?_bC+3BNn;A4$huD!K=?bO=EA7GEtzt
z7rG0>7&mel>H2zxg)?#5@FKyq?bufSZs!2Y9(W8P3N)W@;$X)?o_Ki=sj_nVI@?)!
z@3o<xjdAsMw=-<?45};3sh>Q7{tNF0`#F4{LNVOR8+LkR_g3y(|83S5Pw`iep9x1c
z#(H#+E%x!|vrq8*y{AK`Pt(-cME|KbdEwpTq%#@1+t1^r$KV$V;d}RtbK8cgSS;iQ
z>1b~ycczJ&)w3w~1bu#yqDWIwk--y!e71|hR5cSujo^Isc*^qq^!E?Y)|qF<wDD{|
z+ecY>hJn5Tq%5-eU;myR#VdIDz8m<PpTB}y7`FYv%pO^eDFWM7vxe%e?XjgWJu)+P
zE~Rcv>rRKj$m>2-ATzQ_K8!YH7$l=(H@7EY8Ip!1As{e3i&*_=ul9f<09DrXzHnR6
z?O1I~iw<i%=g=1f?Ih;yrO2Q|J1DE!Q!O81M;immw44+MQ*B|#sfg|)K-8il`I<pq
zL4BK@2pG2ci}vB#l_?fE(Ot%tOC15n`>^#M?;yIg>CzuRM8a7_x6WyDJDMgtEQGmU
ziYg{guA|~`o>AqJ-o62{ZEd{YSi$EXzKP%c!(Y=Mf~}%2DJv@@B?JW|9j(nAZ)oJH
zC*QzNm6Pvl$H(L9n{VRC)<4kFU(h~SHGB-?>V{LS9gpANMXun3loWkIWmS3bg`Ywn
z-T4Y8j~PWv`8bA)9Q}h?np^vrJ#7LN2U|#$WytpRB0PBig`e_j;}{-%a05U3A5YSq
zb3ueN(_+a%PbfqIzEwtfm)eTrN?6r#V~}!-WOYV-RO>`WbeT|DXl1EHudgdI;s%1V
z-62{8Js1dagUV5kypnvR1f2<VOIx-=B>I!7EA@H6lR_9tWerWDg#HLyG|S>L-kQyC
zM1C4O34<GhKUo@%wQ=>BBLDRCPQLSl>)3Lx8zE8}=mVr`N7wyO6Co@$sF&G^Fd(A#
zfMBwSR2%SIx<=2rivQ#D4{`nVYneZ)i(ftY3Ks?n<OZ@Vyly>fuUbt1nIjx)YGvli
z+gN|?GR9^Kc-{bSZvBv=Uu4*%%h`DIYHCCm2TuiGeCX~O<kovX&hnYHe9&;1qesqi
z{XHM!hO1UGXJQ2n`;L(FZ2<?tf7<d}`0~T|vu4d2>ittZ@%y*PjhM`p69n(PcbJI_
zR*`CYpWVlrS$gB$tXXv>!@UBsuZeAYk27u23TSTNP~!yv3WHsA6l=Nj_H``1;tIx<
zXW4t;0Bwb_eDc9Nm_2Dc8NZFUw(KUWpP2}R?q^1@a>*R_zq^Bufdb=ZEoNMyiI+CL
z$DGycSbNoS7R{eU>xq4o)z3vXA7#YEX&gOvmYMYxY<go0Bj#Si-Ro~;{@5J9{p|~!
z?;K#-qUCsPhdFen9W}bi4fHW~{%R^3xAMaF!>&<Ep@YV}<*VsCyo+OPJ)!J;&YkOE
z`Py5!X5|tV%$&rTBm3ycmT|@9^EkNsT`pvcp#zvPS};W{sH~m9Z5!@n$>K$<ykZ(#
zp8FjioN7lFvxrgCxM$;SjPg6!f9NQ^se10a?@m@Np2F4_pJvaoW|~{Ox&E&ES-W}}
z)9O+jXgJ2GX$u)wIKk5|Z)4$2_b{yeASYUT!x5>7<<KZVbMvW5#P%MAaDqGZt<bTp
zGG@R6YP@HjuCzvi0;lijQnS5WU=tk+e%2j>|Lr|;!95FB2C?0mmC|4pwSP<bxp2%u
z(<P$eC-xkcxuRzTEIu4wUAi5zv4F1-`hPvpumPq@Zd!*rbA<88N^UlIv3OLrnWX#Z
zeSNOMe4I~5ry&T9&7kTLKVc5F^d2cAELkPZ#M)PME;bVmI#1hW?6&kQYobkx*!>s}
zm-stq#mt)6L``3~oCULIbM=~Q={m59SGFFcAPe*qM{~pFBlz`io~JwS2eGS=;+k7-
zXT_qaoIiY!<EPFrX8u*&eEljeU$lV1rh_z#DLi!ZES`Gm1-b@<7mbUGk*r*?j2W{R
zaqZHnyz%VQ96Z+vevv|1EqC6#k;%gfG#orcOJ4=|Ja`Y+EWeC>n|{x>hO;!CJI}K9
z_i@uTE15TC1PAvWqP%_{)2ljo_PN&?zx-CFdd(a--4+8A_8?NpM(aSt?I9+^(4V!B
zhC0@^Wls!UOfR;7A)J_?X~o5L_GD4XwzbLFwqwl<g>(zM;jl;B|Liy+Rp;8@Y=NR@
zA{jz{vACsX7!%`b0?leROqiK4S%O9r>fBhTaFULhRvXf~ua}!6Im#%-=)d)BBdaz(
z#F`bCbLO2Vcyaex4JdW&m^o)oIMXX!>Di;=D1_9P+;wJq(_p47L&_5r!lxzGk<OHn
z$)qS`v*e3DsdR?2bc%dFkMIPALIEkkOQk6*%ODGRa`{5YFl5TgNP94t%^{@>+DoS>
z=5yqWMf5U^u%^4c6q!sq7}e!-<cmJSOOZ}Np-{w2We~DJp$O?rhIC3$$Oj|4e4#)p
zok7SVg`#iT6<#<}gnT}Df>T|Sei4ZbpKh{@Dom!Cbc$m5w5XR(lR}a&6iKBsWYTG5
z__-^QN`qfSq*D0BBB_)Y(3MJ4mPz9m3*<w2DH_$2q#~7O!i+2U%p)6l^C$m}Eyph4
zm2k{dI)f}0@THk;lv0vPmr<5U;TH?!@&zQ2Nv8waLC>JL#)+1LC&2d$<Z=a*B)k;m
zWo5{s(yK?NtPBsITrL-W=T(r-WXPmE3WYp{ViDn`N#T<(O46A$!Y@+reM{dsv}=co
zfgd*l==fmi$npXm=xo!s>)K!5NM2$!m1K#)LL{JAi>nqubHQ4a7S(>hs!NX(NHjck
z+lj_pdjoV7$dyVUiHSr31QC^8LeRsOwKTy@CImdEeKVwYcuor9;Xe+zjDRawcHk_7
z*%>c196JU(AUA<-yNqpn!fnz?6iJE0mYV@=aoVF}2UW&C;&vO%(&{!feGxlKpx0`_
zwKXY8r89VfAdcm76nx3Bn(^HB;Aa@$_7?x})H@U~`p#s^NT&q3Y?h)g@w^}>AdBSk
zc}7jVjCr;FZ2#aSmIaVbrx3zJ;FHVeQOoy2c$AfwA^alQe4Zdrm4~q>ub+5VIjL9-
za+mPZq(l(Uy>teT;K_gc3qY}iod6V){3H(0OZkPWOKD%S%S1q2^l<DaN<(6eNc{+}
zNj0N(6YatN*J?Nh{;=DRBjp&F*41$DMQt)ljZBg^xU0Y=jwGGN<E{PM0_#yWE#4#{
zW=aXN*_?jmnlnK$bIu$o-E~oCl&9sU<Whnk8Tl>j6B#d?B=`S6k3cyk3?RAm5baRf
z&V~|Ni5@O$Gfsgp;3~(KfvI?9Rb0DnJ>?w@y!mbedHXXVm*S<DUd+%DNye^dhi*;R
zwvFwQBg;kfW1TZ36_L6YNHf>0fEkUZL$-DC8H{)kw1AtHfYoT!nf)0;UP0YR(6~hH
z>OdQUYuoNFy?*KPf23MJQn8O_$A4V?i<#^HP8yR_XQ?tDy(iaiO1B@IDT$!G7^w7p
zre1zMt7la3#%r5s>CGh%gc3Q6&rV;897;DHMbsoJQ1kG=!f+Fd+m0Kd<x>5j6s}34
zJ)uo!cIuC?mBbpCnrkn@Ybii@(f*wpZcHDVkc(Z5-Us(}EyQ@&t(4$ZN(<bUl7)56
z`C`Km?is0<Jkoa@tK<=sE7jkn2{=xp8IAtYZX9Vb5g30YJ1o^uOo>Tt{4w~_N!-$E
zBw^|75*xi_I{tSYQR60ATz{O+{0Zbp*DKuR#AG6)OKA>3OmbooKevBevMuVNy8&YV
zEA&}%=Ft?mbi;`beEwG+e?kz51MiQ-Qix+NBB%ci$DiV}e=0WlsQgN4_G$`nfz+in
zU_fR#s~URRMQ0`rFpJP!8i|wyj+ZhMpWNafp9B9Cz!IBHbz+Xxvef-V0#f3jRKxs$
zvOtv*{f*NQoJ3z26pxMZ!KoUEMmg52Euk`;6*z@S?9ps+r}T`pG!L36!-_xBap{MM
z2-hA^TZPQi_zASQ?Ws0QyJ;gUvwWhLQ8XAd)4N(Ewj#A$gZ(6{Z@r&6V=GW1>}@*2
z^E*$m`m#E<Z2f?oqRbYUI!b41Yk=Y!W-&90wX5c`d)vG8<c%>V8+d8cw+$|-L^LQw
z+f*B<9TQDJ<Ra-FZrdd%Ornv<5uK4spqD=}l9vp~k<rgZ0{6x5lRUbO1eH?2H|pE*
zWh579CGBT&^x`;OxS_z-t7^&)FeQlz%d4QEY(T|Gf&NU8Zo*ct3WUV><`LTK#pnAK
z|5iwd2`MVWQb|eCpAc}J9xYru1gHpcW$Y3Aj|#p@_ECbT(&s^xvw2k4odFVI!a(Y+
zDE7GsD@BuEl(*Q@Epd4U?c&<x5D9Ncw~vev?D*@>{FdmWWLqIm;v%vYR;<MAft0q-
zDLTx^yd+It#YkpNuHpEx(-b67F^a2}&0+tJ_vk7V9lGsl1Do{Na$p#X?3r#UB{EgX
z(&bmuw0{>(J$a*N40wpisYJ{v{f%US7|QT1!bQZ%V!1S-n`j?q@2?4Fx((Hj)1bQM
zPQtiywu76}xru?oqGQP1U?EIr(#M%K4U}adHM*=t4N^kw<jc5v!DwE4<!uIif$%cS
zSh$=O3#Zb4d=IZ~KcM>y2|yy4IBz94u9%011bjYt`wyHbOkn*D%P9+jIw#(LgB@p5
z+;QhxhK2mL9e$VRwjB(%F@?Sz93E3ITh8)@)A4)G@y4cYbcV0T88z`T)?7Y@mtWk(
z00Kmc8H-l1YS9cjj=#^#TN`3=(bJ4dV;DB7*4M);!F!;jX-5(>x~7wd+27F29rxV8
zx#LGTa=HmdPUEhdS5YoJJkQg@_B_w}YdMxD%pNY4I6M(Ea^ZPUUdM*pZ)9Ydh_oBA
ziI9_^Nu%cATt);<<i6@qB~?q;m<{9lQ)FQ&-FAeM=qNzZtuE`jp-q{tjIr8HL<4Zx
zLaYyqsc;nRjtW#TwS#IvuS2P#K+~=0kr?@?63!By0!<N_>gyIA(LG+eY_v=wsJ01k
z>zVeX23RVv0LU;oD{zNpb1_>maYoBhC1aajDW&r@hv-tMwEi=s2Q)^*y3~lr-aqGR
z+vGnRj98$=lA%K<?y_slPwb>-Csn&|i+%;tqN7TuTMGr-^w>6F`%KcvSOWn?Tf~+e
z9%G5a1_n`HoT#Do<}RtrXj6tLTp8+CBDSsi7RH!#S@n=bOgEAK)O+EAK3(~^@dIt!
z?miylr;g7WwByDzTUe@yde}|6a+X@hrY#Ybm80*ls1*~FfMNCIMLe`|EtTmM0`MwF
zbMq~0sVz_A3D52T;dv&9Xq;m5k)lm_Cf6w0cwP!J!?<?c%}g0pj<erZvmjjClvsKW
z&$YV>WA@V6l~onH11**?{+rwo#%9YV?K&n0Ib?-@?bzu8x1mypD(+HiB`&L-fF~;e
z>+9(II$K3$6$6tI8vj4$-aAaL<GK_5Rrkyw6Olw92$BF1NH8N1nP8?QT9R!=TasnT
zwk#*fmUq40WWD!3@Av$?_O4f62g$Y^WLeTDOSUX&B{2aY00xlAfykLilk-f6dVkzH
zsj7NrV7<3?^CPD3y;Ucl-#JzHcDLgYfsRuJC*ipGp1)uI&Hsdt{rD4@<ceqK=imP%
zKJ}qTaP`7@R7x!<dre;G^>$pwkt2t3_{~$eXY&)7-sCXY(~hG@j^NOdW0?EE!?=BF
z3#vmsIDX^^4j(>><^?Np-~7oi0&%{TanJpCqvz^HOj`Cde&eT~LfO~QGWAyc%76IJ
z_}TY8g$b@`>gL}2G(Pi@$8q`GSyaj`DCsx}f130Itn^0Ss>l7LRwT!T%JMMh=_7k^
z3iHt2^Az^%c^$326*MoHi>dRM<CDMon^<`3c>Isw{v8~As|!zk>_1@b>;Vj50tU~%
zia-3?Hr)Hz`|*=cEJvkM#g1=&5&vuVSv>f}2k`cX7NS%t<IoFV#W!C%f(5I71V8^{
zYthl`F>gu@@E()rF2^r@;{BN5tGIS#EB@r`TTxZXLY~*F016QTuSTl`1|n_@is#V^
z@GVZJ4>JbZ+#FG0OTjU+Fc?ZR#mT8wABl{dL-v`i0_dX^w(_$lyLczxLlu0?yHF7Q
z*bbzl(}kv>!}fAygUoHiTx==em*N)Vio}`G+Y1rnLx}FAs<_np*xJ*SZ4x<YZe-O*
zo+gb8W=tE(Ck{rY^wxpajXv4Lju2fUnfXfGTzM6rdI3hfeXx=QMAXR1*kret&934V
zF0#7FMKZI>)CjR-tX(`cG`eW8odRpRrV4}jNYV5#Ul&4+6Cz2S8Z52}S>ZjPo-Z_c
z2zFWY2{#lt0hsaPD=D~S2l_%?$}>}X;{vPfAM=pEI<HgN1)G#zsbJ7`MD)}I8}}j@
z-Q6($*VlA-u-}olFWar4aO3SO@lzjtFCJVv9iRTpbUgQsuj19K9^<FY$4`9XS226R
z4E)nyd=5X@eFpbE^%1;(-F#f@t)tv^3V;6Pf5g<KoAER6e+;GSFplp0HvaB=ujA$i
zp1@DMZ55izWxRE83%>M^+c4?2)%eAaz6<rC3KlMyhL_ndDn@*Sz1m(7`+)7H?GP=}
z0^*2yQ%q;vBzvLQt<|y3UNCl3^&R3Diz|$HN(dWgc1|>xYg8DbuvsN3A9AguAaGnb
z82fbTFtI_jOx$UJD6UPOIt9Ca@DG@=acSV+j8*H`;LY7Tal?%1IJWx;szFw}s|a*m
zIg4Fa&SK*H)%bAh0qj24fq`M*ja`?~G<^}?i);A1y{Azf0QT;B4W$V)@%RTz`0kcH
zEMMVrlBbIAe&sJvuX{|pHU*D-cp+L!z{FWI@yfTqhWT&1ztEyy!rDhR;?S-wn0Dh#
z9DDr;YMkSO-KC;1k<&A)WdfIANE+o`xOHF>mMrZQb}EAmwByU4|6P3JkjKWm=L6nj
z{EXRf7x&_S{I}o7pZ((wwA`=&AN{cv`26quHh$-GU&rI``7umrayWV5RqWbx07uXF
z;+c299TS@-;sYOi0{{5M&*4wM_I(Wc65wl?cjp@1G<Y0;`ak~wU;OqfsFBdxD->+1
z(23?c_!Us8_|QdHic!MAu8IJY5BjCSmCreuZ@}D%jC!XoX%-wcWj-s590WV)FDq5W
z*2^#>i+)kz5KxiZ?mQ2GhX9%sy<mN{eDX%#Xm#$L(L0J*nqgKe7SNPkmFymB9MOkv
ztUc?P?*0|Jkly4^Rn`QA2%IS2w5}Sk8R3+b2|S}_Og9Cd0F*^5tPZuK4r-Fw?oO%W
zn5HfgRH$?b(0%|!b{Knw-CmPJw!kp9sAfaQ5}ovFj@Zm|N;sLOn(ShXH0#RO${}Xw
zPRuNNk{7u+*=EKx^K2J;K5pQ=qGw#qMi)bvaN|1Yn2(gns7X{N7i`eubfHz_T`a50
zbDdNChkWdmd`wNnMaUNYJ4=r6OFw3y>=nxzm|i>m2L9r&zk&U)Z^Iw_{^#(@u}cNT
z>L8x|`k&wn-#>~~4=hKE^Jtzt4HJg0;;;Vr5AZ+#_S@*gG`#0MPh!j8d=9_;dw-63
z4?TrjXSU$#=>yogYcJkBbrq{0eLLpgFaeK0^ERB=@)i8S7yb@c21pI@3+FuNt*9oA
zSGpLF5t(;nRV}ddOu6L-hlx4a)zVH7OZ$NH3g?8oh(ce&OY?oWN8rh+p3if<>wE=F
zZs<8|Ers_wr>JOX8&m9B1CYJrd{Xu!z5uSB+>0%599JyIVeW!kuzcZMT<xsmXMX+P
zV$l@GIU+bO24A`Pah!Q=3p$3RzAS$5VfZsI<8+stn|0etEEqb7eOIb{j#7h7T=zhE
zB7XeckK+07eh&ln653Dg!SlOM2!^2ra~Ir#d+wNrx2_H0=YR7*V&P<qFK`YeU3BPQ
z+A1z=>NwwY(Qs<#h5(eDscffS4xMe6(AnLG_KrS`n-IPXx7vrJr_N)jGK|5YAvBM_
z5jW4DhYx<@SMbXpe<!NlJt&tZ;bXt_i@2fEj>CtKquw+grP5@~7(a~59bLF~^)kBp
zhTvQor(gXRww!Io&;IJa$FF_#ZJ1Dk1ylt-dN;+vaZu>nPsKG#4Hv28)LUnC=i;vb
zR|=s5R2pKO>{%Z9U)xlG&p{Pn4enYl2Ng`w?dVunr(|WhMK~GLXs+l_dBnHk9hkQy
zrP48x!72F*5OR)Y6HQ4HO^1O?eczbYnf=N_D_zxbJcr*iNEGsTJ;#w3$teNQl7tpr
zRfi(>N&M-vvgu=8<U}4dHhLS=p;##zFN{8yP3RifE3#WGkLna%%*)gjdwWY_)bV(|
z$4I=q_YwmkjY)PLixhi|VTM*T-uOVtt>RCvOM@TVfp=9xUa(%o*Gqm&pYJ%o0M9#q
zdJ$*zhfjFhaS$DJ&Lfr;#E50m*g|6@$8DZdV@@tJxz0K%-|DzgJ~sA-N3C8%y<SJH
zR_7nFALwnx)oVTI>gYvjTr)}~z}1Iw?(7+K^$lTYXb7%674vVIiznarQGEK7AHuZy
z0DNg8-u@FG$MRXgv7<-PT`8lbc|2xJX~ETNSJBzohHD+&oWNQ>4uRx+#xE+S;&_Vj
z3}&9o%**7Mp0DB^&dKwe3crXBH!UjOI7Xc4sAcjJz`U4%v+_uTFFfI>suoR&)6J!3
ztl}7ow!md{CxvSx)pt9dU%rZ=zAo%|;jonsd;kC-07*naR5|R}wgrb;$6?8Wsm#Yi
znq~*UHBZMAYv$tl?ZtC*g{>yy(MRsZ*5_UU%E4%^8S9?hj5mJp9BL(r)1pDASn%*@
z#yfucQ<!@CJNWwclRzn5c)`~&JkW#f&pnGB+g`-JYc05Q!BkLbAiYaQQg)FfbTL|Y
z+VtMbI32*LxwKT-t92IMVX*f+-aLL0FMjK*_~KvxExz>4=P*zohj}xbaQMyRXr4YB
zGbT+$xzvf%ZR2qN;@fc7^7}A*+9Y^*jGH(PhqiwkU-;6uaMPXlV|udy1gS#~*R@e}
z&%G#7RN&^uI&r=_VYPRGp$Jl3)2Copbn+}rIQHr`BoH}Y6>tNjBfnz5!~vD7yXgrV
zOEFhYT6Hn*A!KZ<hYB>L4)BpJj0<vhD$vI4XeBb}-7TX7)DODDxKnsj!IS2~5zRov
z$fIoL?Wr)HR)@+8jdoWdF~vK5L0($z^Y!dZRScL{WSfi+a>=b*SxrZKHzvb2P&RSN
zjP&I)*K~6d=Nw64cTb|C5hf-8i%gC{+gfvIG*2QU7l&TmA9D)8^x}z{DkB7Pha=&#
zmZg!b5xt!I)sBq3DHbrY@#fMwn$QShYTh)jTB%^%<f)iCX*`<3vu*h*19)HOE7jp^
zIDF&`-q`wG{N-Q$HU8%7-^S&xD(2jH1J0Z{hJH5@vu932Q+W`lFZJTCWp`rHqGh;a
z{*7reG<aEikmwY1ZWMLpZbl&1fX7GzWXXnlXfN_k7!7mU_b!pc>v2$zNn}~t^Srns
z%G8WfNz-yLo;$`tYgaCcq4e0iEiQR9wT#D%=~K}%ZUUxHpN6K=5cVH9iDmcQi`h3X
zz`~ox;Yw>C;2g%!T!;_6_i0Qi6Mh3X-MtYrTKD7V)n2yK4Y%HlyC${awPTmLQ5-jY
zJ|4bvGM;<&I4`u|n(*Kw@51^!=LXxAvElu{g!Plp;_qKLgh^8-p#+a|%LL4rJ`FA7
z#$)D;>1b)HVE68$SpMLBxM|)kxMTKsTx#tF;RX4pJYz=jiw)c$ke|J_r8CGH`dj%_
zU1;jOa+i0_xc#=7c;nT*7^>D$YMG4rQ-Qq)PGH=Od1&rFgQFKZFn#uX3|u^lb8UTa
z&Y@oE#gS9(*!1L6Sar`*)H*NX*x7Tq*fkE%JiQs?I!@wT&oGW0IDq|cUc~w*p2qx%
z6<oS<0eklzLeq^)@$Pp$gVlG=!;bHK6Z_6ygV|j(T|<LzLtDZF@(~}O1baLsca2u~
z8hpJuCb6UgGCKiqMr=-kXeF0)<61P$FM7T$n|w|rA-d)cRF1A4uDOFgMd{)JWaii&
zy8L+i>=GphQ`2^LY13+5PQHlRT}J>bTI-!AeLGHMkh!+?vW*E9_goBSV@Zv*%?wpT
z$6zWeeQ8OY8Y{a+LQBUF=c@)wc8z*kNA*HncVpQTJ4L%|kZYqYEQD9|+zT?{g*1K|
zjU(N){5(%DK%Cem-euIZVrIz>Y1CSpTm7nYk@yyj(e~z@6nJa@#wl+XG}?X!+an%X
z9V!3wF|2JA@BCQ|h0;^gkNM^O8Mtx7005P~ZZzGr1W!G-7PYHqaG|q;xi?P6v4clY
z89yCUT^A0YxPbA~ZbEba6&yQvwa}$Dj8msB;Le9OWBsaoF|pc%V<%7G+|@yBdi-(B
zZR*Cc3)gVr)KTm^coGX&K8$-8OvR;(=WzJILA3W*VLxfBWd|?#O>?TSiLyJ*g9gi+
z2=m3}mcI27xR_gvPM^bLd@yyz`bwQF7^*mRNafQFds-(P^OAT}8dF@W_#=I^-tyRq
zQ-LiTOvLfVg)8up_dShq<!0P*>s%Z^vJWRuoW%5H>+$&7yKwNiuVd?>i|}w5H)A1w
z;Zq;Sxvk&9#s1<;{O^2V4X*Bc4u>vuvmUoBTZ!_eUD$c#GU&50)92rVTl@)pf7dBE
z+KDTb@xbQy;$8QT$M<#~hI13}<U8Mi8Iz~r?gv(2>Fn{?d+<$6UGN}2{u6IU6PmDi
z;mtU9ct1`aJ%*`wZN$^-@5RAqzk(MJTq=GyoUq%W_?|chwz!z<TI(eVIRI4P*y1Sw
zPWRKPTohwVh55aQ3-7f9Jmiz9_IV~3I&LXhQqU2qi!*_gg24!=H^xz;UJDC_Y<*S)
zVp}(wW9Bk%Rm`0enKHr^?n#lGWJ#=}o7NG^ZOp3BUHIthBU;@mmZz(v8Hw&la}nt>
z+T2*gMq_33CGy1>XkCvC4X=B3O;W=Q98EVpJUONu$_dWA)w4f*LD|BoF$~@rx*1(F
z>gc|}4vO`SY!K_&*!H<2IX5=HNn3qAaTlAJQa6s_k@Sf<k41O79&IM(b<e~W-yv|h
zS?Me1R};&S0kKRnTs4hemSNt=t2}UBbq4lam|oPb;;*hP*X`F>_eS%4#@+<~W8@kC
zO~z&hjIC1U6i91Lc36}dS=q)&3%R<(C9>!<j19dk2An<yFlF{){LD{2hQI#9KSk@X
zN8#sE5?>C~$4GuspHo+2yHYb&KK)@V9omU6J^v=W2TCQYQ{C60Z@-Twqqdb>xMZoP
z0k%7i0G6XJb#2<jA!?o|p#|vSA)wD~wMsBgi4#|N86z*_oqQ!z`X#IS5duYDN`pg9
z^Dw%^I}-WjuM+*AlQG7l#XWat0F-u02?2$fUU}POlDXESF~0_SFnwnhQDhS$;G&CK
zM0(E;_3C02H|0b#Baih@<oAe<InMbdL(>PkRJe4L5-*72d@?&<u=Y#sL<JJXvnASA
zkoYx)D6&Z|ytJH+cRo_N3fBow_N9Cr+4wvs3-k_~7u%)j>QfsT`kAy+cr@}dWDG9_
zq+_2lwp~FRbN1o*I3M+2%B{F~XcrVS^SaAJ=MIi>Lnh~9HNolzy0UMhoe`Tv$og_5
z`^AQ|JG6}nj)oV>f?^cgX)>ek4XB#Xu)an_i!Z|UN=yM8AAA%yBRRRf@&U)MS9M{e
zT9i2*<4i*xMxoc}71!1rjija~=W}E2vE40<%3^p;oqIcG*4uFEN_Rrb$eEd&Q{0uB
zv1Hj|oIQFNgEckT{M2cFzF90Z=u_mkQ{y5T@$s+$WS;%d0q2X~0WE&Nu9#V=6vv<d
zRs4+jk&HIqC;;s-%M1X{p;oP+S}(fVQn?A`QrPKpmS@A{%^Z7Q{8k>ELsPj44s}$j
z_2M@?y}XZCb-F3wXM-F)<l7(-;=om7t*wHCs0c#^R|HU&R#Y5y5g6r_+s;o1Me>ii
zpvAsV6KB*yIFej3?=h_Ku9r5`&vgD{0h(fX<JGA?hq3q~RVdN9k)5IV`tWbMn=!I3
zW0h@GpG8fvmT$C}Fs_L65~H8b-F|I6Pl_b?_d4@9S6pkXMZP(ubGo5}k|FSqVuP=1
zY%qA9qH2M?b|=pzI$I3R*(kQx^)$or9H|_!-l+xc#)7q1yjW|DYf+7fu(VP9$rvB-
z_MfYBRM1ClEE`~^3Z}7QD4bJGTQSqv4Yr{R6SE7EM)u|W3Ug^em&=<*^&!2Lo#rI@
z+%*1L)@VWCLmxx3>3jej|7JM<{&~3x<#Mr8SFcr3tNY@&*_+CMucKP2@^k<6JML10
zfKY@{uUAm17Qc>KDwomRRD$<_uZQ1lFO|{UR8C}XYE=AzBEiik!JRl5N>Q5bJ>beS
z@aw<&qxk$E{w4aWb=2$A@Tt$d2Y>g4zeIa~@gwRmPWFMfV<YLH@!s?IYLOlFs$H*i
ze65Q0@B0*f?7`XS9UQ{B1KaV{Z*N0?m7FH>>P8g-(2P@=cN11T`c6ExW*JJA9z6FC
zU&D#=UHHrgR^ao$_utVqXD$BiPp`+H{onr`CwqoW0LB2y&69JS90s7s4$2=vT0<+@
z#nDk76|7vrFhG_`3LU&MfOtb_#Udv_7t>TeAEBY<4}gfnIq~usCiGH(pp_}9AHauZ
zsAH(c7Ln{ZTgA4Qoo4YjNNf{V!ByD?G2Y2bVb`gDV>xr6COZLfsn+8ZNJjpMW01J>
zj$zVlG;V(NBGD{tG*2KrLTPi71auMIw9qSCshrdHiS5L3m-x!kOJdbrC;1%vJ$^|}
z=-kZ2Y}S4XA47vhX<;xs&Qs>Cj<{y}U}x=Xp|5xIRSU)XnwX#>sPtBKnHxr?GeU9K
zzM)tV%y#ToZTWZuLhEI*M<z*pA+cTZ8<^T3!N*+Sb9mv?@v%<}UQwe#c8chd^>Vnx
z^9huRy=g@9&04HviaZfd^|*~`Hg)`jQ*pkeI1tsFC}vrv3@v2zvA7J|JLqeUq9Mk2
zry4p{hJsu^%$YdrHhl2?KaQJcOvJSlui<aL{sRn;n}r|!;D@ke_GI*3J&M2jn{S}U
z&A~5x`sXma*~2wYf<L<h|L33o6Y9Ef0`R_uRX_U6_=z=h(KA%X(QRMDm!3O>rk2Tg
z^1UCzn#Hqm?bvqw@t2=Ry+r*(A$ya)-Nwn~d}!VTr1|G$xR_G26;!A9z8|xfK8RV(
zUqxyDG@QFO8TTxljW2s(*4zb{HFW}d+Aia*Yn^DGGy^jyRB)-a2c_~j%)4nSE?v3;
zG>ylCTNj|E-j6e9FQ8HfP;SO83vR{aW{*o3&!Mxw0tcI@^}5I0g}39}YyW_6?CZiW
z{Msk6{={MY;J`IZnK=jZZ<>nsOXqR5s~^slaO0etFkAuV+&B$wZ=FSJ&!7N9_;bVj
z`|-gimf}l)@n!UvCt$d90ORkt2}_nefR%UOhW0yFV(H>LFm+rR=qB`tiYGhD7FS*1
z1u#`{RLy}6=+IztgCd2=DVqA2Z^4j{)iu-HNf#J#4KY9z6o|&fDs4Cws7dc(in#u(
zY6R0Bv6)gJZ|xoAD*}a+nPt<wi<8A14@--hD&nt`F9Wh#cSBdYBrYk5{S;4jO-_HQ
zw%Nuc22BI7rhyK&iJCTTcf*vLYHcG0*h05K!>4Q{ap#R}QX&9TN>rm_Le4dgkH*IY
z;bI3Wg7Lg!w6?RQ7mwF*pKJ2s7}24T!wnmY;J?vRTi_gaIzQEWO(%)jOv9SXDP(jO
zAjx2Xh-csDV#O!(Sq>$iLoUy4PAZc`4N%MzY<%75DUq~upnYhc@6@KSv8eMTw~+8&
zqCI!41O+njnT<6f!F5LYS_2E|6=NElG=&i_z3`K!%d16~P>*p|V$${xrjhiq*)9GW
z9OfE{(8DOd0Ma}w9c8hk%uKaW*If2#w21<sgenfa{2ZQd@58%3@oRYZ={NC}13fss
zZzo>6dJPY~=acyOQz!8MeCrUt{>3k#**V<%%qOt2_e#;g>6{CC%)b3Ly!q^x@Qs64
zQ6K1ra~?|`ejgs0+l9}4;ag~Kc0hQR&Uxw<fF`r*_ux;QdPhMg2Gz;oj4=gpogc=z
z&Kl<4{viJ2?|lsa>EHb=hOS*ee`yBZ{piCeSBG)SlEv8ZcYlln^*izDpInB|{$GEJ
z8<staUw(QD{=@J81K#n`Ph-XO9$c$UL-pjd__J^A!UOO47@k?|ak{;ZOZ$I-XLp}V
zg0Fbh(AjzgXHQ>3$AHJ=2`!j@`)d69C*Fw*r_N&TylME-=RSuMT^=9(%zr`Cg;#Lu
zN<Xen=*NqD&+FRANjG8YOw5`(5qpoHL0d-;<}U`$9XWvc4{XN#adjNI*lB|*ay-%D
zs9-ZX-K!<53-9Bku}R7exb}rgIx3#h9L1|d$9^^6Fb`+uATvLvtNoy5F<{1jZ89c0
zIyHvv3SG}Fe|6i(u2MC)yGVbp07ynUfWKf~K+FQSwkti06gLo}pD)^y7}OPAB}OEF
zkSs@E+g#bp@>a*teop=4BOWdo!u+l)A!RO3ln!}0<3`Qeg&;|#kTh%Dtb4;l8gK&Y
za8B9Mgn*f&?I-?_uyLtP)W06u=C<l+T4r2SNJK{a++CwxDU%zGXsP+SnT!`8W`O}8
zJs0f3&&Q!rp6g_UvAIwDrF@*jDI=%VpYxx6pnRTLv}l^jU2440<zG(mAs&#KZ;H1x
z2C@Hyw=5MU&&3`lrr7VQmxyr7-jX!{$YvDTv~ML(<p=1~+7#QDUm2gqFFnhv-wYN%
z8>ztg#WHGjXwj;%Lha0J!8kwlE?xuwMaQ|=@*WcZksm$F-~d-_oWMEswq3w$t>;lk
zGg^Cw&{T35?rX<uue}EE9Tv3pVPV<9*N1WS(pxAsO~OZ(&&GFudymMkFwr}pGE~Qk
zr+yr>R<z;!U;i?$bqry}s{7F0w;%8O;0JMT@AEi&_F`z{B%Vvg844gAmo&NV)PEF!
zIv7ap^B4N?z`DC|Z2v{9Ua<%rmoA_>)Q2nG!<asECT85c9BY=}jJ7ie(Kg`@ESWt8
zD>tmf!B=;odD=p}bJKi$_uJpcbI)zZswdut3C(59xN$aoe>?WQ@*-Y7bP*~9IS~P%
z?#JWt_k9e%^`HL@W_2IJ%ZIOF-P7+t<@9TK_PG~uw0i~~S$Qkqfu;#1?EI&{$5+4l
zx7f1pJbBv$Z})|L`262Kj|ZQ6AAakzpT#@Z+yUntdaj+u(9~sU?mCIifod8o;k|QC
z{SuX|7;KQK^-cstPGz{=OR;>QuPDGa#XZ|-Q4v%Hjqrhk4dxmH5iM36NkZCKea-4$
zEd;bI8enwOAiOQ{SZDpxfNF$_8V(J5%E;DcVW-yv^1HZ7|4;!&csZy5P-jCDGWKZS
zsDX6dtK*nfhJIslG$7mJnF!zzek)m2ks+#p8)3scUip)ZbUwoek<AUzTVR#*8TQ5u
zVlb~_M#rKaC;Ww9TH6}P3&(KmLqf><LY-IG5a*T2?Pwg!nDdc7jB_JuLhX2^r?GL+
zlS3eu1tF&c3)QH}Zj(B%V^cSs&hm^gAg3oi7UAQ7q(&yjNl!H^94cDg(cKRuUuqrm
zCkz|eM=<_mS-jLyxLlsu8k?*-%wdGZYND=zF0zY2yI6y3OXj8FC?0QSP^0EO`<(4W
z4Kv|D!~7Te0N~<goMMyxF1bb_@94ZDuJ5Awm@(l!_b#%eHj(9u2$6+h=zgfN^bSkb
zJcE0ij^o?A&(S6sW-ncbXO`9QrDykp7P?bzU5z`24q*Sair7}yBv-~8-}(Z6_fNiv
zAMEeICx7K<F|Ay}jJdNgdFV2}_RSw)<$FGfdv7Y6pK1nxQhbv|-}d2<iS+m0hlG%^
z0*1i(3s<ml?IQfsw_d@6>sI6Hg>#s5-_v;J?g{wLH@}AGcb`G?xH3wWHf-O24eK{;
z!phs4v2))Ul$$1E{J3$rYsG4;zGptRy|5kCdL1u*<qz@P(H?C4v7f~!-?IVD(D)#Q
z)VX2o-ugX!@$;X<XMguk@m5bAlO|5U4L99_wd)>2Q|lq@JKI&n#XtwHbPb4KwU5ar
zzJk3keFy*bzx*HgZ(rVpN1oh(ab*D2ethf8e}b=kZx^Z>9P|puh?cocV;YzIZ5&NZ
zZ}IXa21aD(tDh~5E@VZ4#xv*A!O>y`w7SfQ%TskDYJOYQ+X5R4mprea@UW4oVtuSo
zX<--M4w2`=HQ9ZmEazKFE*@`vs8uF_OF%5bQGpc^+|z%0C(0C&M14y$blwiyv3lj3
zAJ6<8QrU^Sv8y~YqE*zd_?Ad|y9UsCAtyyoz7>*@oBr1}jAh8);>;7w($DeLIpZ=i
z)6YoJDH@}1B^vp>=W><HZ7qwrBVqzOj|s@oW4;Z>$dB^`*|Z;hCRddV`fa>pCVZuq
zG|FwhMHb_&vc1=fDwobpWKqK!VoF1q^;}J#;(?Zbk?*qk!7YiEISF3*%k^552!9P$
zL^6AQ%>M&g)Fdi>iWmA8v54S!ERWZ5#JvT~d6L)5KqY~Xdj24@+DOp5a&{-!{*_YX
z;9b=CDH96*QA~O(XC1hC`NR0|Q+MFc{^*Nn9jpT$m^S}j{M?5h$5;R8b2!^oEl|jn
zvHs~N@%pphN0r6*D7TD9OH+yQ>e~6^ICk<hUfHo7l}Wc?`h+2LwzcEn-hDWI;wVnE
z)^YPqlej4*ThdotCxBpYBkAkZbWmR^?xWX*3m36)S_=;B-H&k#7U1%Qc2sK>v`n0e
zISZCy?R~dn^2CWKl}b4H@^(D<&W~f#<-IuDUB^JzNxXXS3I^LQ;P|O?xO}A*RSzs)
zelJ|#H5@;84O4HJ)Ua-qN)BDESMk>6Rtyf;P;wQ#vGZm0S8F(Z;y5l|X-9i!SD{_0
zgg9v=0SnD1&bkfn`@s9LY5h8ET6-7TE?!2>JCsYnm9xiip|u~SQdtEhcU-2x#+Y6V
zwXhQ3LdgO!(^H8j6q5N=1axr1L;cvjO;=vPXrA8A)ud>pm+eng4#DJ7RD6iED!`Uj
zTEMIzTIXO3!bGohq~Bfj+SWoNUg3D{K;j$X-)!?Mam;RyF51z`H}Ndl3)XKM+x6-{
zjwfR$?I!|R6<fMoFrO%#d?c^<Spq~svw&wYIkB(Zsgk+z^G(j#xIoC0oK1DTW@=8J
z<#9w%+a~i;SZ(4#AwI`?iWixtLg}aN728DTj;QA&f5E;tV?;KN!vq;!Vsj6PQ-c+3
z1em6b3zxSy$5?JB#8t$Ds&dT3#*k;bN|UE?NyoYrcH-Nx$sxIeO=5k?AJ(QRjuB_5
zaihPS<g;_2pIaiEIpq^0dUzHtzGN2<D!zP!*yEhBG&F7j3KuArRCud?kOtfJnx1ct
z!&u#{IhRBwOpME;DalA$V5_>IcaFIqae9h+)(px<6lY2{(jnPp)brGPikT_%@4>(O
z%tvwY-~rsY<bEugJq0Z{%*Uty{jXv8#9p+_y9;;SItz6;G)<h1x2>9i@9jKk=s))1
z-@vCoun}c=c;A9Y-~B$k{fS5Mp$~l!l?!j+YQM)TTVBJ;x4jDwKk^vvyJ;9l&-EmB
zFVC1Y>$CP*H*F|VAQZ3M_nHVAhr-}7*w>5pOJ}kF=$mM5>%gIXM{w!lDYW@%xNY`C
z?0ok7IN#cibC=pN*w=~v$`D?EX&c_^=!dTj;?Tikn0@<V%$`00m(HHTTdmy~H)R%<
zE?I=}{paxa-+mEYgH>f@F>tL?!Ikr;akYDZ9n*Q~6fX8PWAUO};RZW!;?!C6SL&z?
z4dVQn(-^4KOyFr$s#R;4IPFH<a`PPYT|9tqes4Rv2Zqtxei`SlbixCu3=QD?nKS6C
zss@m37lJ7+;?8teQ7Ph8L*~4muiohT;$Vv~v+#PneU8E@I`In7B!Nj53IJ8KLC;D-
zfM3hlKo1(9UUO^@ExX;JOL>su{M-wKz7FG<zqB`0!&4iX_gqjZ`pLGL0;BnX;9BkS
zX<6VRd!;cEnI-X)?nBfp#GgEjW2c~FZyyCSD;^}d=b|Ck?5>}S?JSl;yn~}<)z|nL
zpCB)lvUsOf(l4Jo5<Aw<+NNv>^*5GZ(`>{i_cx|Nem1cl(IcP4W1NzoVkj2L*goQ~
z^C+<+AN!0h;JArnPvoF3YXn>9+(b+#V=s<Vm*^SK=l7AnTx5(y{v6$$o;tCjW=dqG
z=PD)7<2ceErf@vo>X7pjV@5`Qsx3JzrF^uQV6%>d)A+=4W_V_I3=PHnb)CbEW6okO
zFE*q2EclYBy1^4WL~GzIe-|AS4ev6Ir+Q7eVFo5Q)ln^v!^|1e(SP+UuJo5Nb4oJ?
zYh}!sIRn1uGEQIaMAL*BD77EOt~W0hF+qe&WprISg|k<>00&H-H4nGXpNneSaeVnJ
z-$v)KN5{qEX!X-^$K1(y;hSH<!MA!+j99p2sRz6?XdVSx(<fv?r670OpV}ggJ<kol
zu7tYo{Hdta3pyooT1m#5A`n&{R0uhw>cwr`l5@uKyt;4>zFr5Ri+nF}F_*qYnP}EI
z_*4KR1V&Iq?-1!^;$0|srjza<=m4iyS#+d8V`oHxqJx==rL3l+ht83%bh+?(Nfc?i
ztc(DRzw86GEV9^#G~z_Ea`3=74#>*h(xK;5k|k;YdTWa?Cw}mr{Q_eH+I6H|AHBnn
z1(%pcql#hXMEWU31@KkllCziMMJq{{ld}OTaq41pxnr6>=cg#n7!<W#D27zLq<$o9
zNo{XpkK&-gxOj{qPrDt??vM%(o#QU?uh5esr)9SOP!<m{9@EL$7V%tFylI?nY|Je?
zmVx3};F{>sxZn!#p^ihQtIHehex>oI@4x`DzpM!ui*vHNlrG~8EKX>h3l>FTF<-@@
z@*jZuVRwr?{8GN7utnr6IyX8FM)NEi5LU<d$97ZLr|E@2HvW(CT#=O+)-gyp>O}6m
z-Sdr2{uLuu27&_kG#(fA6eF|C<68r~9xDxvU(##HxOhrqeuaPQ^*Wz%rBXq=F2B#~
zTnVL;>;SuxlUhc7g>xuTjjBhz1od7%#C=JAVI)Wa2ZyHOC>Yl}-noc(Fi1laNCc=Z
z#0)iD&NDOTN}PlxCu3L^M@^l&&0`uOGOYv3bpIDp5ENP!ik3n==`O()2GCCO2nHs=
zAQ)stQGmS%TuJx_=aO>(XMRG11A$LP&su2^p^*3VjG@pk8pJr+)EFvUl)ugb4JT@_
zgHB%EfSdEmFBEKC5ICkpA|8WCe+`O6PZd+$H-|H<9nI(G|GbM0Ds!TO$BTuekP5IQ
zEA`Oy==VIwH0)^bvS7)!VI39XQo+;>1>Jh|>NW}oCE3d|PYR1#@I;k8C#~cTi?4v)
z6$GS7(Eire<R=5kX<Wce#Ok=#Hs;t9!SrqeFF5F&jyZ2>dm#X60t<dxH#IEC;H<ea
zP9}ShA3f_So@DrpWYT%7cQTwh2Tz4qYboM*5H106p88NYi+RS7{8s1)mPivL<Gqp+
zP))09v~&Xz<D655WI9x%6aWAq07*naRGU*H=fM7@R7i5u=q9?5JS;QIs&K*Ucp^Hw
z0df}BS#}jaj2}*94d(U6pW434Z%&WXyTz3LYN4RM57jxXi(q+t+g9(u6>Outv|V8#
zrgn~Ue!L*kFTQ0Sa@hxbiZdfVrXE1V(8ylQ;71(HM1~J#t9Vdqk47&8wVH`8I3p>-
zGgM7*@vy?b<#IWh=gMWBEo@I$(v}D~pzP}4fRaNQ<O9_QBnIJoDDVJEj)61~)^Qe+
zoB=&_g$V!Cj$HBmFe-}S3eCM-=Wq+t^<QJn#96BG;A;VRC@~p8yihX5!)r=hM3hKc
zf;3>vvAi#m-d-b7^n%a<MKcu#s6u2)PF>8>peJ8Y@$iO+fP|9U6|Ocb4-?4>2rN`d
zQvg{xC`n*|(o67xWMMyhQ&5P34pu7-%=nnD<JF0ma|Cj#NXYXqK|aV1FYBNS3Inp(
zpHABlAbrDC)hy_llqwvnftp=;8+AdCLOb%Z7v5&KC=?kk99myGj%=&#C}!5HO|qv`
zzAqH<A@(@EgIyd1<Y^427B|5<FJTs69+JmoT9(B$sh+Rr{Oc7jZWe-k6n`<UQ%+LO
zIV}KPC!P%I-q6s0bu8O_hVWqwReZT|keK1vA-OYjLr|-XEk<zbL(PEVE&ABTK<B>3
zQ7-Dym@HOuVN$z4D%5p>HojB#v$+@UrYIc{VTQ(f-gQ#4Q4=okCpJ)G^Gh0PQa92U
z${&mul3TVM2!ECjAoS*Xqyfg{j+6JMW~$ssb_2bd6Yx&Qhf|4Q?`+t4JVF^q<86G7
ztfbRd)8$v(kT@vcq?ff^x#lirki-`XJYyN<A6o~KSMZzTcSJ15Hq&G}_P=*I`jHBX
zu3b<!nj`^HVltd>;a}(Z(|5c=p+7D%K!{!m5TfV#x~J#rDAL1Ux%CrNkAspm1K9Zb
zW9!_>#Q*@+bSh&7_6l1>XYZlgZXe(<y$vC>O`qWP&YorJ$!;deR1z7)262|cNoi74
zCer}>P~;dit#W{B9H?mM+a!{lVpbDARI~&0f_IRO0yU|UQfx_oW9>LuRo5HWNnJe6
zZUk)OLgP6;bP_lz5`0#DeTD}CvF8fPJB<|6pm>*D(2TWSq%QBnIt><9ZoTV4g<W_F
zZ^==f82L$qvS)qCPmD{E|D`xOLq>YW&{8G<8*-bCC<A0i&m)-_>tGr%O6jm;4|yM{
zXb-eKbw2B}nOM=4Hy#gVC;B8C*-m7&ezeWBG{3w{%si3d3HEnfJf$d;8@0a0M9xPO
zUykwCM>Jz~ZIf?z@A>wnxAV1JIej!7$vnYPvsh48BOPQXRdywquNQj=6KUFYGI9<?
zvhXaI<|OB=j!&Y`K&<D38z-ZSS#U_G^1=h^PK>U9Ueh3-TY6F-Jl9F5VwLlg{G<Xw
zFNC>?$TVpA21W8zcaS=MG+lK@Xd0YXbE7#XmW^DBs891-YDTaRG{&giuBfS|jRIX?
zS&pcwFUCX|28w&9=qtUWzxu8rFY=6!)!z`;IuG2UB}+{8H7h8RhNKOZ$rNrjwD?p0
zI%{bc@qnVu%tiqG(Z-E7eWVx9rIbiEnuYkN;>H)os#orKq`B#}WQ%-+BHD<uOo<gO
z{x<esqnBKl_!_Oa$0SF0E`xu@docw84VS$h)8(RL;f5)&g$PI?sUuMBlJku`tt<v=
z1@p3{a{{h_A1;qZOQ=guH=@e2YJZJ6?ngKu<=cq9WjUj9W61Y3R!1Bi(ZAjpz257M
zZ0eOW(<UdG&@flR-x1~TmPexVC^S4Ew+G{0U6;}yK^U7M*Ml#O{5-~Z%=$TJ-x1`v
z?pPqZjuc}#KkD3++)?PP{>||tlA&>=1th@lwJlk^Mslk20Hi=$zq}ZY&J_@&^{}|3
zYXU#cW$J{CO&SdrLv}qqax~;GBe;h>&zOk}jl%YH9#D(3AQ!i9kv&CiY4DtRBl26@
zi|rc8;k`$R!oci=aKi@BcNG?0WxP%9?3WqBg#eP>9`Bsm)gu;BczLM#ag6xPIIyAP
z?fg++hKSD$AccK2%ocmYPY;d87MGp59&=3=I&fK==oLQw+h~;pFG-c-VRk4pGHKqi
zEP7{5uc{lp%=(O07}3ht>ZbuJBX=~G^ldOD(uR_Q1KZZj>^%Ey2Sq&nuX_x=v7j@f
zSXp~=ntf~`lFz;`WOUl6O6S}h2L%#3Wa#k*bt>;W!(D-JM$iWH^vmab<Mf-x@guAK
z41HB&8%mUkvxdB&b}d1CN0@8+fOMR17{x4Hn2Sg?2j1yvsG}~NsUGIIQQx6g9I;Fh
zpI8pNIiheOo(C84Z|o2Kgiw6N%AA>-$Q{iyb4S9fbd1lP^Y(AU-j4EKCpyLQXkp2z
zuE&ZaipP{<tGRe`_#5$~oo=<u&8kqodA%d7nkfC!jT6%fc|*N|i~XwWn>W5oVu12M
z$;fq0?`A0;az{baKr{zZgLrq%i*FJd;A=_Eqh3g>s;@bbnP)0o)AH-4kYqN^D5%~9
zEO!n~M3oiSpvk|X>cV@ct|>LVwNq)zJiOzPRcJ0bzzt&O_n*h5`V>6z$U00J*95o<
z_P(?gCpzocuxT^q+|UBJI*#w&iT&p~asS#!u;`{q06Z=p+l|)_pU2_{AI9<p)8PQL
zT{w*GufK`8cdo^%rE}o`^t7JAwjH}MX~BKic>f}l9Wc~)1zTTu8MT><@c5d$(Ohz<
zRlBkM*{$dtHyclFT8Z&ZB>)3><@qgmt1=0XJ-ilECo}`Df_>Xx!ikO=)@|5`xzol2
zu8tFXUcvrz9k_S(CfqS+3gA309@~puht6T){Tp!aZ8HG?Z5NJU$Lnum_OjJjz3gU|
z@ui)6Fme7p*l_<MG?g3%d#_;omRC@naXTJaeGkSpIn;)`u=B;2(9wJ&9^0@AlUhpf
zegM0+Y{SK&@z}g+9i~q#WIV8QJC3zgux9;6+&p~(;A%LrcNg}bZNoh)H(<%Useto%
z>%@M%cHj(dyLTP#Uw9(`p#805*zwvS%v!n<Ywo%Q4nS}FIlQ!eH^$$*92+0J15M6h
zu=gr<Y<mTjskdVDL-%5QlS6g53$Je7j<)hlJhE;Tri?Gc*9WlsrI&DSpaq*YtjEmB
z%>XJm^y*F=y*7l^>o#EijER7&;ne=uvG;T<makZkrSqo&@VIpH0AAgH8Vl}Tg9jJS
z2E0ed<&)U{%0bLn^dQ#Vb1NKxzRnAHY1`{)o_8lUuULYza~SHoh8<g9#?TE5@W`6`
zf{fjGZQFLVy6Jd${c21dUq-z?fPFi*<4j)@Hmu)(8>bX99@+IWj$9eQs<j)jVCF=?
z)p7dZ>)3PRD(+gb0n6r31Dwa@Q-|=%zLU6R`9oN-WDejxI<KC>OFQ;s+QR#>@&4NZ
z4-9m_g{?2Xit?Ogcx?5ZC^=wwunpUv-+_TC^YPfa2Qa?Lp;qa^t6N{fHB80k^=mM7
zLK*e?AolOvj?+C3>(+0=>>HZleHF)czl=j~^<u@^hjHtyi2&+2d-x5!ar_dNt=NRS
zZkYynkE>@6W9OdZxOw?ntXet;@Id#q)7ZLw52oC9KQ=$O5b(fY-xa+0>`u7ZOYqd%
z<!}J1l}>E?{#Nu&z8Oz#T!EIdL%r6AS6<kPD}D+d+q4chj4#9cVeH?r4JW%i)@|I3
zIX5%|s1-SUp&Jjbdla|NngGD#!ke$-wKw0w(v_RBeEw7bz_oLSu;Y!Rn0MDYtX@{g
z*wc0fTeiM|$+ta#M^+Rv4i8+#^WWQ!`b~@Rjt%z|pjYp~3*UJj-4o~GM;=*)CWSL^
z)hFYrM>b;0xDw#1*t_k;0%tZqirG_|iyVGq2lk%t#DnV|$HE&Y007=P_BviUbRJ7q
zK8m|<DP(Lre+VzV{wC($xgHNKodW>qZ99W4+up#W+wRBa2NuDhknzPIY)5U@9e8@(
zJt#S#UhBrz@4bkw@pJI@&8yH-E&*;3FaO{Lyj7coCpNFg4da^tSH-^VTXDRjhV>gC
z#@wmn0?xdQeP=sx|C-HMG<z}tkGGER!K;VP;f@D3;qF^!1S~v)9j_n3>}6}QdYNG1
zOFQ;p;(~h>7GA}+7hgtqg;w{{;5JR2hbK3#!i44$fI+<a!WLW{o`6R;t;4i{Gy8XJ
z$MN<G)~?@(dDABpGVXm9`_HuFp4A(%c<v1a&K%!|*AAY=?f0$6eYejlaOUDs?AUb}
zH!fX`)py>E5`dnzv)I04H^$Gu8yoMxgRyYO)>lxOb{peNt<r^;U)+xNrkQwj{VGfz
zSBCckczx?jxG>m)hc~Xrj7f!z2VdTSW39t@Xx#?f9B}63zFpXNx()ZNT#qF;7dd?C
z!~wi|;52T%XDuFBbQ1uNj!P%7{ndk*xp)QEE?-b!VaIuFf9Vah+`Jr{9$1W~lEYB%
zRqWXIDmsR0b|ck$v`n3Yhu5tx@}oY0J=?eAY=09rZdi|5lUo2(aCq0tcr)P4Ei(fa
z9(V(LPX?U1rNF|=rw-zkeWw)80G(G(V*AVcF>TQU*l^En0D%6ki`cex7n<fQ!y~Jf
z7FgKdik(|_VsOeW*u3^WOlT^hI^2U@+g`#oHw~NCJ%ni!%JB6;?A^H?XL?FlzhMJz
zno`L4=Ibxx@a2B2ShK0X8CS=dgRf)Hi7QyPas%$1KOKO_l{1I%@}3izzkCf=E}aW_
zpzGQxY}>vMQy1QcP4@?!>A8e0FT8@%?4@{o&7CO0p)%Bt?aysT-{hO|`1%JhzNv&-
zwHL2#*@mnB20XlBZIE#Y`(J(ur@DZ(8#ZCi4K47#isO4;#=(m{cyR4z+!nC#+>tl%
z`mqQL&mP9k-N!KRt~FS_tdOy%^$fPWv>Q`yy%(DwTm*PvsQ)Tn{J{?Rn-=5gb$7!7
zs8zeL^?O^;Gie^)wrM3=$_~EXk5``Gg3I;Ec<kZzm^!`;U>N(hZ^MZ$kM#-*Pwd%=
zedoLIz}m+G&Ujor`UYM-asf+LJi<8Bdj2rB?>d6H%h%(ffQ7wnXYu0J*D>kV`|;?C
zLdMGAH9YsfUc!JY$*uzsKAj2)M0u#L$aMP1J|L6$`3Ua!qWiwJVYIcip{eBH+%Sgg
z9;jE)(cX@smNJ~HVz5#JyhnFuJDMs5xb_WJ0C@Cucc8UA2*9JKe+V8xe{UDs+KP|#
zcJ~ax!(phu2d!-_Z~*$ddr|j5Wv~zJZEbK4s0{R=<~{1e188q+L%HNoulAu*_wdyr
zw70hv;EEv(SL=n09qp*}Hw781fUl#ws{_ro7C2W&?_dSqd-Qd8qP2Orkg;zN9v%a|
zU1)79Y|!0101pHi+k%XJg^WY}Xlrjn**Odkbfe}yYQuwQZ*N0$*`Z$TMWyE9-7q@Z
z+c3~v0(>E(uUF97(T>_cQ2+*p3mJR5I?!704|W^|yhnde7h2n@06e<;2H^n=^mU`P
zt#}l&r>74d!0<pX%Q(>8i@Nuy4)vj}P03jIs8t4o%^Yf#UJTbfydOqKM>~dEO7Qg|
z3|8wvy^7AxcKD$t09Eu4R{>uuWb^}otD|>t81QxU^>m@NwPs`-?CVBrYg1v{?jYk}
zFWTA);|})rpymNL)Q`5dHk6!0Ww0AnlCiA~Elm!!N*{)6b$B<7&W?6eNXAMHr~_S{
zZE(ZQLB@*6hyeg~^bQOa@z~pm);5x{_=S(bzHYR&HN$(4-tPV&V;@@E+6qwU>p=|w
zhWpXl)`pS;s>9u=dXKuYW4+psO05o$3OYO6QRy$i*M~7sslnGBy1Uv@t~3F@hW?>S
z;p^@Wv^piDFY<%z*wr@xz+<SNWCZ%Ud*R{OuigWdfo{|RxZwfzf4$m+st4-TA+)u(
zp}FMX>w~D&>VU7Jv!fluEhWHL3K=<vn*h|%H&g+<M{jorT3tWqFyPVO(;4DX=SMf%
zTAPd5>>?R^S;p#6A!EHV$hP(MKGZyb+A!x4FpS|^9j;zQXGa@`noDr5R`?o)jOF2`
zB0q*IaD+3Z{@~Z4qA2#pG7`=-13|_*9IAu;9FLX3Zq&R(y)wYQ_Vs>L>j4Yf+tEx|
zSPS{l(T?GkGRx?E9X*{LXs$Mg{3x)nx4RRqO+%a?05H(og|;>yu&_VK*v(itps=tl
z$Ws~UE^J2lt?{}#jP~|6jB6^ea7g8Fb)dk)0fmJv^?o?Q8Pw6&(}~uWip9ddF0{6}
zfHVE52P|wOoariLtPTnmRt9=dtpl~nAUYTeLk^?BnZbaCL)995y^5}mcGL$23%#$Q
zyQ>4^0?rgNdi3>nnH*-EX)7x%bOs9tdkUPX4rv*S90t`$)ydVX!<-)l781_114994
zD2EAWTpfJ@XZm|P1q*u%Uk~;7ptZG#z25FV)Rl|?Fx1<Fx(8~*c`WQ`Z%1Finc<p;
z$1u7&+EMM-SXf6_XFE!Sg+qm}dkxME!*flkvy2|VV1EzV+FFXb*j3<lMPcD^e-G*a
z{O|zUTHE0qP#f+>#Zw(_4~xZt!q>j2aaF>?;Tn7$=<e(&aK_hyj2^vRB%=>l7;vVw
zR^&&(>mio0=oLst#=^m_0<S9y3v0codH@;={UC;Fb@+gVm6j5KYVlpOb&Z8}^bJ-8
z3riXcfdPXvlEZ}81Kqu-1E?w)Yr_Q=7QyFvH_t6xvV=c78aCf#>yWn)X<`_qe;k<~
zB4<d@2lC3DZZPy;{p*L3JGz(uFaFBO^Fzw`!<pxLo?-g`_VvF-?(4Aa50f%uXOp4$
z|AYUt=dz)f+VOuMO#BN1(wH5a8#i{j8$B=j&wu6OXUyXK3%<rEWHuLyx&M+_^{?eM
zW{es$rsVzd!{FY(ws8ER<neOrnrM}r{*<An&i8S{drc@bbM$>D>60HGdWL+8&0eA3
zlq_WPJd=;r$}z|vj-oj{mnPWr(AOKx@!_2Q#EyJ|OY_%!^nCqESar_GYClt!dtye*
zkw2&Zimej4S)TZsngj4XcO#vXkEa_ug2qKMX?_U3zp<g`@mil)9-gc%l*nM;vrG{`
zcyHv?-bj4UZi{0gj!DZui8~Q6@X@El4o1#MCz6sMYR&2#5s!2vy9l2V=tR$(6sKIG
zJJZs~(UF!d=ety2pUl-ok<Q9mQpZw0zmy2(z0Q&J|Hw|-H!7Z(f1~*jKEP-^^Z9D+
zmgc?B%Ao9#ovX3XC9>+d3C~E6_~m_s6~?QP%@D~O&OzB;kF}AZVmry-$XBEI+|tdx
zRGjID_W5u0Twi2E@o9vA8JT13GILE1ll?TtrG8Ikjp#&UV_Vo9Z{+hdpLN0++~SjC
zcs4cIwxMju=m@K)ne9a$iXNL2U>j=c#s(RY8F#!r6U#_E;+Tu=6SGMCqmP~XPC_5r
zvqejm7Lj2d43&>k^QTa~23TK!f%&%l-~bQMrzhn8tNp&5R249<>?`(dkk^Br)5}jA
zl}^J!g?wZ@y0x6hIubn!1P7#_9fC4BfkJ)-5pf20!_J0+*Z%QHzGYvHY`hZpp1+J)
z5+buIQohpkV;_qBV%tS@b9Ur#va#1#{szv7Y$f#2fRdsUk~_D83}HUDT@lJQ+VPI<
zqWslJoHPY=PPypZ2=a|$rksBW<V=1VGsSq3Pm8hUxQVIow_%JSj&BIwL>CLZ0Mtjr
zwDmx_RSZcDSB!}8?mG4KAr^|%kFkqnlH7|^-E%B~$K=HsSw6v~bg1{TK#wMhFi}kF
z%Z+d|W;C)d8%7bxZ}d;{iCA{!2so|qrIAk?+d&N*rKaR^V&o}lTpDbT*^iV{Mvg3u
zS^8X8bFkKkfLB80cPFzl=Hiue6Ok*aVRA{zq-4UGvEW{#u4*KI(&a47kAh8O^3|A{
z6!R*1@6C=FPxo>}pMv4j;gT~SI|_-VzCoP6bBme@`6d=>6LeEYY>RVdWf41@9qpvh
zXDB6S)F*`E?$}eDz$u2Lw=U}7q?DCmByTYY#V3I1`-4J&do{{afwbU9@;Mn6W;me@
znHwiL0kpz<;e+@)hU7h5^bOL8zOzl~uUKW9H9Ds6v(e|EWFR8q2QTMB>=+te>#gmj
z<unYfOc2|-cp_`02E>hv)*HE(EdnBg#8CWQeL9yCny~{F4bD;JTcVNFPpwl#GR5cE
zkBu7%;%fkk30vMtgRieU9ivb>Sra5NrsARzAHg=KSjNRF*sfvorDI2zSfvjglW${O
z2!^KIL~JhON5@iW8qN{6I>S53ADv@lYHw+XOk^jqgm;#nk7($aHu$T2<c+-q1GO$<
zn<VN?e8e>5*8wyZI#D3dw|3STT_~g**%%7=JM3aPoPBS?Tpih*(1TKc3BJ*Hiz&M}
zXZ;89dBeqmVdr`GCXs=@*t<~JOtw2hHdwtmF94B~C&q;sKS*x_GRBqRn(ocwenpt2
zpT^GdBQ4_C2>woRIOZ4Oawwl8vTW3v*&R(wBZjwJ(b2yq1>W=x5yOpz?j|%qQ-Es}
z(#Np7I?gmP^3C1=l3=$RO@&-0QPp4zu0o%#7l4{Cxd<Td1aeFOd8ZOiRSXFxgc!45
zKFO<T0vleDWvWwXXialOO^gm)=;ZcdEJ(337El42$qxxwvM+MCO?*N_kJDS(cB?v1
z1JM}o_4&r2WMIR2Q+zc2*pBIKh6u=&-}oFkEdnzFez_%~?0{7F_`CExB!)zONX9Fq
zj<?i}PLAk>g^<Zp*nCuS>D_kiPyTKsWq;#W0Qw8I^~c?Lp0YpLF45I0B-i<z65ENK
zUZ&Xbpm91jATbv5l5-P2gDck74wB3Inm%VDH1?CG;k=!v?54ld%4%d`o8nn=RLM&?
zZ98h7NAk<>$qMZ_zI5lzKcme}NWoj<m$@y+kQU*Op#j0bBqw38g|B7eGM0m4-#M#u
zY)@*S$%@<<5rgQZ7a3Y-^BrEnpW3#uFC@0TnWt^T@_Qe88K+5zEj0}wd?BsXnXoiP
zQJ*ddn6So+!jJHX^4$6?jweJhX>K<%mT*hP_O;EE)*(K&cx&jICdle3e)mQfT8Jfi
zp!Fi`(fk_47FjN`m&p;8$NIV~Bt#}{F{=y224uQLOO_O52sk|@ln3834bMGrLenwm
zI^=g#wJ^m6$k&sIk?4C)8n>q-TpR60bFYJj0h=F&p7?W&jIV>X9v`3f0!*gC96^?t
z6a`d>w^8Nbn8?T^aWpo$&5-N1q37S_9K)Rj3gKuukhz_y<d9vIEV-nqZzN~Dvo|JN
z$Fvn4u~|yHFbMKY5(njTAR+4;`#eX3=8tTa{2ZBVg~-twF@MLvv%#Q|oip0?QH8+u
zVchkM|7VS*B5<8@F`bM(G_8Ln`X;{rVf{E(KFI+5aM=ID)JP(c<X&jCVtk3l7A4+3
zMq%uj$d)u1hGxzuOhWJEMPqrfmWx|24Vm;VNN8|lmeGMXV{(2pW0f<xJBGbxM9kPD
zqaP22-FXci7J%Y6;X?yQ%4&gN4CBKkUM;8s;UES>(4pA!_KHT)$@0!w<~PO6PE~`~
zXtz(tLL+|V9;k+);~UfSR`2Udtm{m|c&pe3sm9A7Z}Fq2W^5zY<kOMy%!?e{+%z61
z#B)9)8pU|0&gq6fL(Lo)(|vfCj!)tx20~rD;&C)K)4*kWP1S|^XmE0?E@SJ=!Z4LP
zvr?B;o|fGuOe8m=W-WBUi3yywpWJ$h=qbMO&RAT>6SXn{XLgmN>$z}CK@YrQ&Dk`4
zMwc1KWT1CmBO-cYB0r+5ymP7*$@)99Qx?WI(kGE&#NLxu@)QeEB#(+o-q=`px-@`Y
zgZGityyL5F6#aDJh!UBa8Elj@luzR=TTM82MrJ<Jb-6n;vOh<&cM*SSy)?U)*?B3Q
zLgrLPx?a=($yU>UYrBQUNjG8Kp5%1O{S=~lC66eD9dt9EYMjPDr@ro(dm*cjxxJrj
zl9;!$l-5Dn!`LwC8K@ysc3>XzN8759N9d)EKh<%ExXDGZf2lBT(V0m-bCRdbi=Nb=
zpOhf_PTteELaUC}!|}@9hsw`e=svfd**K!*c1ETEjgV^^j2qFU>$>3R-<X5aQ<Kro
z=F2gFdx0n(=i?-l;D~QGWMUzEtha$x1?1m-jMeu%Ty2c=$%qV3aK;bu?Kf61(%-a~
znlQyZ(pd1`o8N1+w7of$gtyI@*{)V{I%t_`j`C5&(;2&X&aIrx3Qe*y%Q$^ZdRX2R
zCq5nsRg6W8hSyBa={VuJlGhXuM8iA)AHDQ5hUsyvf1_s3+Pu-CH)BSnt9q{+Z|BrP
zH@1nEAXtSQU{>lPeNvEB3>ja<ITu#H$x^@}@lyevP91xG&-NkwFs_V2KCd^)C&AVr
zhqEyhFn|~R&NN=$Wj@&)=MJ-wT;y5>dCW$!Gc;3s+1rQwVFYTTluFb%XMIKV)%n<u
z)*tcW!P<`svfe#0G`!_4>0)&)aMlW!+pRH>MK)2o%HvB?jm%D1qw12uI_IOg&hev1
z5)&CxF(#_>PM@H$nN&;axXBN(i;MivqC_AoyLlmg3nid$pHuL-u<MXwRMc>?ZG@JN
zD{TketELN#FgH>{+r1Rd6aqsByqCaH%JKuR)NoSW<oAr`{6_?c@A)^_%FVozgh3P#
z=2}E0fO0M%q`_n;6WY!ui;sRt+AC)=6q3_o;0^!!z?^|t%b{%IRg%O%(9K}#3n#OE
zLVyui=@qFClsF(rJ_8IiUICWa*tA7#2UY9LDwJ1QRLB(2C(oE@o2i|lr1+D4Sa|WD
zS8~TOlRTru$rBRB$50X-Q0>zNNC}qE*YlL!om#QQFS|$#U>|}dS7ag)(raA3NED?s
zU;@HXPI7`O^yB~lAOJ~3K~(XOF|s42UVN;Ri=O#Wu^Q{Bs}9?aNM>R|YhbX_Cw?Tc
zT#(n(abz#UPwS$RCE4kYi!uQ<ki;5Q2EnelA@fF8Lqo+7e~5wJdm^3Dn`rCY(alKY
z3;k<MHkN#ZsS>lEm6t-x_yK7awEYWrM>Ls+;BaCG{-f|QB4C=Z5U0+>bf{<ggr=bR
zhGQ4|Ro4P1kDPcF|NM@cf(Q%frU>W0z^U}o0mn&RxeX{-S6p*Z48i6xH7X2{a-MTZ
z)zr8S$1fYF*6!px$9YeWODPAd^Pc6BnkASc{*XFB{PY}!Es-1|w6cS-0#uErXB0^X
zZ*8PBmc^oDne4*K1T^#HM%=Ool+UfrRnr#Y!Db@u6c%So^v%CIu(%;$M`_NAcib2$
zQ+cnPs4S8<WE>>6(a?Zw4M#G9&ztZQ!3zL1R<udahVxnmACg7^J<(PEqL@^6l?0}w
z_W~q3`SnBPvKo}YjrInWY?A0?gU}-h7&rxeB(D<O%Bp~r3${*FsB_|b0VVc)Mpgw#
z1VXBjGew)OdGfbM@j(VllXh4b7}PDH_{ZAq>Q95N=hOU-{1G+IQSeBS<iA2o>rKOp
z1(4*DCp&xL%`xC5)(z4_`6y>&)%X}ta75o5n=50oOnN2CSQRRaZkCvjWul6WT@h=Z
zg?vQcEB>N$Y<FoO#ehZ!Z7<4Osufi1Sa$(r@rU{VL=rRBro1?bz&Ew6Uf_5W|01&T
z8I%sH3G-C_y$^Az=0~_ec_#5C|7RBtdYq!mjf9h(03M_43+5Ny?g^x&@ssLcY^Hsh
z!T1!npiz(MFrGNV87sf_mAVD!y<jH5{wjnQ+>yno_`>R=@r&k&?7DF&CW@}kJFWx8
z*K-v>&I=I!-*MbYlV}&kvS^XKaxSu~Q+g^nwJkL*Q$$$T7=*lFyAYx)Lf4rYP@WZv
z5!9MsQ|Bd~?dC(wAyw3?IjY{nheuvw?4>fUdx)g1(#7A9Xt?<#4TV{}6|U3OiN<=R
zlGwy8TC!B!LW!R004{(Iko()QzD4kf(-U1e;3K{G8uK^K&TaMP=u$H4hQj5<7;%2g
zlUEke7jNkX!hF3$7?$v*^C7{U;-klsL-chkN7l5O1;1Cq+LhBtwt%?Sguoc)Dt6NB
zIVk^)?2Wh?G0pIZGe?S_+`I;Igk~%1FB-}y)FMBrCejv&bPNwsyc<avMW7~bnjvY-
zGx7r|57m$vo3l+Vu|~9G9&ui2?gUcicCE4%<;Y0P7?zksz?TMc=5`cv=rU5Qr+LvR
zP9omO`7^=}5lhvssBkj;W}-ULR~@rYtsv5bM2o3NF5^?qk<8r`W#6L7NcB(co55DK
zxNu5uV-0SuYz@oh$w>UoqCpi|+WFKQ1!KoRRg)sqTfe|GOD@SZoeP0Rn)YegR&@wI
z@7vLIN^95{gFT|nI%LMBH7k|RU=fYWurxVjaxdRQh&T<AH8VKGQzM(I<~PsN#+tEx
z;F0uX%v0-JoGY0fCS@RcRF3Gvkyb7QhFV<+0q{y99O_1b9hp4jF1tPeihJwCoBt8H
zi^ZFzp^X*4b3W#0>52MM1hw!kavY7}+Zik?-`o`Lg|G^VL^5symCUq4a>f8^Qdm{x
ziUF8kte<RS$Gaky0kMg8IRN7yO*<Y(6!n6Jx6r-P)LS03>N6`0Wp@*1kz6$2tj404
zvGQ3tV%cSUY6p@l+VgGrxeep<ST}kv1B{N6t3ExK5|^Tp8{2>e?@%~;e$26a23Rw3
zlkTW!e#9$+F?z;PL5JQ+5_O!h3+H{Lj?Oot@6>>}fz$83SL34>H3G>zEePqMn<Vqn
z{A{5r@&RUtNE)Q#KsN!h$fu{qoZ)Bfq30^QI=K~V<O=j{>^Va-#vG0@>PV6+fr?2C
z>t96vRFP#GI8D~TsFF$K$zgs02o_^4eU5#?IgMY+aVdVn9vgp3^ixb`?dIfms0$b4
zNOI;)ip_M%>K)+=ld#Vj%dVsli_Ff9bk#6YB>vPGu4%>3IHz$HKZDCypqqM?cV>sx
zGLyQJ;62rwTm`gx$9Dzzs`7<x;2h^^_C;6=vb~ZaT7XbYdh=WLR>~4JWZFAL6A{VE
z@_G*DXhAEWrW+tKf~KpK<AzBYJ?3qi2O8?4TOScHG3~hX=O!#1G9R0q&WgzgNusM^
z$+JeHfht7yHj`}{l&)$F+XW6#w|%U1>Nc3>Okm`qgi-;?O))oCHi`HpRV{m=Q88kE
zdUeSoEg9`gF0_#>mc4*QuHh2GLX&bYwpXk#B^rg1De5}Lior@Q3sl<2F~sIX_ECe&
z^*X1XZqBGNjEc4AbKc0V=Oy})&7#I#@8(i$<yP{Jnh+P3HgUn?CdD!Tj7nxf*Z>lR
zx2;SGy*U0Ipr4*l<Gry311TuS)pI>AjsXq~(<np`Cb{qIZCrhuk1UYrnbDC=khw;;
z=4eDFrP5r0@*!Y+B#Wy;8yh#8rU^p9D4A*tH4`!=)D&xKcGAM?jZXS$9@eiBJV5KI
z$)$O0?cvQaz4Y!3fsMZ1#<Ebp>Kzl(GZ9dWrJr}yT0^%nDST_JhyYbDm<zEpfJM!e
zNAc_!D--@I+r_)Af=#gHg>%L(iwQeS{4;GNv10*G=>@5~1{+n2#UFy!d8cV)`Uj>!
zA`)h(+!(3Rk6%)gGQJO(DZP}U<2_0&2scOJy0QlsA?M&h&jp0RszQ05R|IXnUPrwy
zAY6R!oJs~CnvYOX*$+{oQh}kQ(JM>PhYjNJrMbrbULAXw=2Zcf_a1fKAhN1n&n(_4
z@Uz8N#xlpWGh~|+FAWr0fAca)B4a6`<M|9YFjJV9Q?baGDQF~0jorPr0uy$L{PF;<
z)yHm^OJOj@0=lV<3=k&9V9N54KZ!Q^F90AvyJCu)0J+d}Y*+n>toS7bCaHzWjtb|Z
zWHq*Naii{_xB4jMs5xWlRs7hd!I}5=`57PVsBJ+!6<CvSK9VESm2;HhO4);VKq;Yh
zlc|87F<Hy5iUc>Atb?|Nw{noTqa5P#oaa`3qc=oc4dL27L^HnquXh<!43ELyOI(^J
zG|C^=Db02MswilEyp1RFkD|u>6~3}Q(Ejux_EbY+8d8ZxqCxIfrW1Q;Ty>Hcl8cHD
z(NpK?n!z@0PmV?V+^pElb8P82kY~cRf55oH3z^tI@p$70jkh9;xib)$YJJVlKlv@n
z)wu2;!rt61J1b;RmYXc6?BdNB^HnDi9cf&(@(u8k@D$9-=xiQVWX(iBV+I}H+|!8<
z>3ngl9iq;|$=5)U-c%fknF1$Xq&V2`$qj_#jRoGk(HW<)*TKw5Hzj28T;yBu?ZU-N
zJU_%re_fHJ>`S&?lvtonSBhgXr;I!9U5&eMzX|>A7qRo@-RK#v0V?PnTUWs&y>(4k
zw0I#d96yf1x~zn6S-u8Sx)0&-g`Pw?`ch#i=dqIj*uV<^=oM3_NV6B-jn(%oMAwC*
zcx~S?3|4hP)Ria9Oq{3KD4)lws)$Vhn;+Cy&lfi0kurq}K*&#{<p{(@h~LJ7<`c0I
zZ~MM6)pR;NS0L|z$V4NGVmVlG^3Ps1R^U2bhOL!-Y}7~R`@@XLx`j)J8)e*Ryi;3W
zpWI`S^*U&AV-UmtSf7>6&1BaGn8_5uG1))DkNl7CMspiQlaehb4o12V)38`$sAuvx
z=08$KoAdvy^lD&%QE)(zWTfcK5%i$v`0|l368lWVW`5gy?D-T)pC#WoK1Sme=Ne-2
z+)PDr<f-nFd<sc8Y*Z8J8lenSqtxIz;*9)yA?<@a>7zcjHs_+())=d6j&8hwPZso9
z>`7(U_Mm!C^9UER&(cLvO|q8sZNY*TJv`^hM4YJlnVI6e<1xHK0C3~oPvTP_+=zjm
z9?ZDqR!nVaD#p7Knwy)^+*B5!Q9`+?8O_bjD3u(562?!NiJ$tJpTf+E<BPHIaOE;?
zm^mBMCN>+z%1upZZf=%cka82s<uaO^n^C4!gDa$MYAUmyFnR#aIW*6>1E2oI4`X7z
zg3TZJ1fIO_77kRFF+{3&K)JaYEzQj+1zM$YIoQCVTrR`8;#^Zx6PlZwP@*3>D>*bN
zor_>7p<FJbrKJVsa*0KB&IOw_g>xm8OX9F{xx@~R7d^b|l4b$VvE${P_qo4edh$jH
zc=bM|8lQpMT_AZO3kVud2|WrHi#UZKRN#_V@`Ibj+hP@NcSDnLF;6dyxL=NprS>oz
z$eb?+Og|ctr{w~#R6b`%@w<i%9IuWo?}mJ{8mK-6sUM7WRSf70P7lt6agLx5yzE#d
zqt6|uV`I-a`bQ~ZW5hKKlQaVq!AvM>M~+mWSvFPh?0`&Md2jw_OC&=B>azo*!mG9M
zk?s))$Zkna%AC=xU0rUROHYYx%4*?Q@80GoD$<n4F+;tpl94*?)<SXCJEpL?mCE4E
zdu}atV`JnPF)K#A8aFlZSp8^^4R!ORpTb%%exa{wqK%tOo2Yj)<430STfH;PXN|w2
zXOyW?iu`4Ft}|sZ=tkqyM?~zMCOg@)uSdem>|Gq{T>|^WrvwKa!r~VID9@NN^RpgK
z97By9q+z2%PSK>Fi=k_>4d}3F%{y=>-o#&j<9Y1aa}YhlHTd#G{K$ttfe$?M7}l@8
z2bYf@Mr%(McWwL;{QO7WgLP|Gpt<WJF7!;shkov-v3d0h%(>|%Om$s2dhRN2zIzjX
z>V5A*<@~ESc)lCn*D+_=M*Q;6e+Z90ycu(vx^U#=1w8PsPvB!uug3CK4`cnZY1p^_
zI4bozrY_om|N2`$jYHeEqjR{<x5&si&H=Zrd<Lthp1~je)i-ekci`>!HDk-JQ&_g)
z8T{Obe;gauu0V6oWt@7e0}JkX7(f3rAHv49D>19wgJb8~@Sb1#RkWPmk1Nf$;@|w-
zGdQ$+4>teQZ{YopJb-&wt;3R;z?&z|WBQ_p@UaiS7Y}b(hoy7J<Jg-gF>lSg@#&v@
z8jF{&#N!X$fg}45W5AEcqd)o)eCXXzVfCuJ&~fr8nif8ck3O*wdk&n$%sV&Z<L|f&
z`}ZG5g;tMVnr-VN>i7eUIP1qW+9asF@wp1J{4}I%XCLxPf4W9c$<R#V%gY!S(^Su8
zLcu-lLPBouq#qebI$rQY<BC^Q63olkF03M$vdRYrs?1yhXeq?P(IfAhM!+oNwQU>8
zp4NC>T<n5P{1?kFw{FQ^dIgTi55hyoLL3+T44PEpgm*zm`QOp`|ChP<j<V~x?>s;C
z-s>FD3FzF&83agx01-$41eir>C^|^iNV4S_d1T9HJ?WfXPdLZBJ2S^)AJ2H4#+G*N
z@j8uUicuuRq`)AN6a$z5f&>vdM>HB8(BZvT`^UXi^{cw~H9*OG&R&V=ckiwG)h~X3
zzp7jBKIOO9F-J<s776I_F8$1VSD>+Dn7BNAbdS93U*XUGYQ0NDI<9hMJ!0e4`cp85
zPR6aHBlI4lxYkY|^6`(<djtnbhj3c}lzaE0j1B8%#?O6Q`1|Ob&xPMum3B<GiAp&)
z{_bp&(24K^BZJ+SCw#)#S=uFfEr)V?6*`Pu&#XLY@msShutSI($`%5WZ^OH3?{Fi9
zmWMecb&|l%U<?t9t~yXQ=J4zwFi<>UaF2vVG#I=vyegMliE%aL$uyU;A>zkjq;h#7
zvXC7+#GoW5JHIo2q?GU*?eN;=9&G-TN0NVswT6b3N!~Zbe5_3oO2#X2uRKfohFq~X
zG+o`I$e72@E-bV&FxJM7q$u1R<F-UZp*j7+ql~rO!hihj-{xO^`F@(Ja<uL^&RrXe
z{P+L%UwQ291%BqP8_Cpn@ryrun8*J75BM*C_%&WTJId1B0Dtwh|BI6!zR#ci;UDnG
zi|?U<{&%0}sW<wmtuBxFDrWhaFZ>*Defw|t&wu>)?7ioHdKz+6HnpKg-s8W2{r{n6
z%OTdR$N`Fli@iMk%nM8|7A>zvg+_bz4b5DczDOR$%%zJ|H#AY+xSG%Xqk}y4*MG`?
z`NOaA<1@oh)yU6%_EWt1$Y1b#|Lsrs?i*(*sthZdo2krZAX`C8O9L4N^{t&uyz@Q&
z=uiHJo&$HXp|ytT(eu3a#tGgz-OsKg_p-UQl3Z;g)e9s1)nEKMV|82D)mca5#+&%W
zu6q9OKmL9G=<9#S`N=B`eRPtBO*hfqSk1n}2N^bxoUM5x`xptGgBwv@a<IjQoX5D1
zI1iVz4+j=zptKo>nX#BXF)rAWt>qlhOH^Jm4$@#yz<GN@WLAU{JO;Bj!>{?Dp(kct
zT9**kQdF@lW(713xRjZjzoP9YOq&<oz=4dez1h<yS5@V3rPcsdcwM`FJ`%|B>}O0a
z6PoZht1}$<O1@(PCSfS68~LBJgG^fHniS421l*$rZ=a+ER#~KoeJ9cgqG&Y(xPnT6
zIAFdd?usTHP-~-k<BLAtSdqoMh)jZshV^|Q5f8#1W?W?lSN5f=R!3eo3S(B=Ld0(+
z3dopMo9H!8ZgV}}$|7UL?A>$vkNLC0X?HFRArq7=?yQcDBRXq1zO5fS@vIC+M+(bR
z#Jlkqne{~R7iV?nGvaZWQg|=tv}}bS6l+B5*zYBC$+)&#gAo28D4Hh*=DyXt%>AO6
z3n%MVI-v`%C={`49=Cfr#v+wJnUOJ;nD!{;@!)X6>CA?OXyZ-JVjb93F(Hw)dw|n2
z!@1#={p0z99TZ_6(^nQ_uBp8zWODbF9Lo1rQH=XsDbFWtW|2iHE;e%ZcT=S!f@c6b
z;}T9H;@M*q1&bHY@?ZYrZ_(7;!l(Y}Z*u>^6Z|N*nw4Ga`Qn$qLawTTrSl(AQ{O~&
zew>f`Cz;cVD@2b!3xxt&Yl?*e#n>Q<=*tFHH@b?Bx&qG)jxjZ;xTGtot*Icx0)vBN
z%+Ft8ZYe`$dBoV_%s8*VxyWq3C@WIW77O!NC@Zf-0c90c<QJx>sB5G$Kf&3731%0g
zBBZRoiRS7>PM;m&%G@GXG^kvK=TPPmE5%v*1}0dXon$s&PEA!chi?5Wd+J7c@!b(-
zma<e;WSAz;*vJTTa|_JO6{)Pu(Ad<<?8teBFV3(OJBnWD=Z9w&ICSU;YufVs)i*Ct
zjBO}qJ9x<l_gB8e*2{tdK)i3jYL_(d9a`Qup`v0?i~J*S8VL-tvJ|{{{EH9X3XgR_
zgUF(#4I&4Wt_W~lO*G2YE9w0~rt@aFHa5IjuE(^yXcKITj5uD=Qh&1a@+Vc67kek}
zXMRpgBt5&LWm*|t&`y75Rw?vo^<rpydNUa5gTC`OQ*4nqtp2o{Nn6!eojU$yfh1?3
z;p(EomqXf0u7>)onqtbWeafUDSFjUi_08d2TpMdR!Hin$DwyzRL&*9q{>qur12)$D
z6(MuoV6oAL^L<+qp(T@H8&M?trH5OYF#LMOqtG+pe!<ZJijW=aZ_fS06;c;>@?Frv
zD#4fn4+5KcD^g-St9RuWIZkAUkKX-I*7#g$saq^hp5Ckv2)*0*Gwe8<f_62WhqqFV
za@92+S2-N;R$h&d$RbpOnM5R3cV%vng@zE*k3gsYYq=(FCNFt@E$kiR5sB+~=Z;YP
zSNr;vlciFt+ev&;sh?A22=(ZV8#5-(jZ6;L9NH_vqo;cdl2MRkEZ`^|If0q|h-g;A
z5tZfJtT}|&p|N`t+t;;2af!u39#9ONf1myfXL#rvf6L$e?cea=b8j#+Il^S7mAyMw
z)6&{O=gJlFH&%))Wh!V{QA<TGXRGU(>1kHBby8nbMK&|XM-z%0x2>aV>vgoK8742!
zTf=#W)~r~6h~N27zscIVGVi{!N>TKmd58Knd)U<7#s2-<IP=~aE>8?Ion6JA?Q3Xh
zYp0{7fu*@A`Y)BSf6peGn_KDWTt!9~nVC_n+t9_Pty^iTu84-ttYG(z+vr%gh1SYN
zCMM_U>RieB({Ix|K1V}CJ-J*a!Lw2@+INB4p6zU2-Aa4UYMN^*0L`n<zsSv>`6UYH
zUZ?-k0x0D>ni7xj;&^m6Q|`Vfh?EljR6xf#oW?(w?L`e>e9_XB*M<LBpE3yxEPwXr
z*+L$?AX63uoh`A0D_gXLvXr8wJbrOaJ-{%@-cNSg>e;M9MLPDu(5()Mu?4T%o-3aL
z@OZ(F5%(+cc-+ckd_;SMoCLBEh!rVqAP?tX!k3KAy=!i!FD&jH(t=ZRRaA~4nM_X#
zN&cK(r9YE??y)6(jX)NKN&eojN+eE?59#w^j3xEt;xN>K>>n4{D#B&QN;(}`^g7kq
zDl8dSo~-7c^I3Clc+w%C_%ags!*X`8I#tRRFQG0?LJRfjw@bq)<<W8YBl$CW6?mEG
z@9Y8iaTn)NzpdzN=*G4~Tt{4w@Xyzq76d&1K*)vfpVA-9#M<+BfrBA#SbZk0M2B3-
z4xd_t>GxwDf_Hl^CeG1);lDUcm8-Q<02R`-Lxp3xduD+m(z`3%CGJZX4U2n&<6+K7
zlfl?p${*?@ab576ZE9}$En`&!@@AsoXS4W^&6plCyS5rCn|iqS6L)j)roH4w-{6~1
zzRJasUM^-jxZ}2?Y~R?!WbbKuhekNlKh2T5?qTnB>zL|4L*K*<x{zm<2JX4*C{1dL
z4=#+N6-<tevhC1a9N4sycTb(>)cd{cz4Z<b?(F9KkA9Q4&W%yu)&pbbI5Rp!S5F7M
zC*S1ayk==;oHyS*LEq>k1v3+q6*-E9sbMCwZQOOoQOd^N<eN|ah}qdG&JNCS=(aoA
zcinoX2hY(rGS1oaLtKC4P7dwfN^x?4v;AX?PtJ4r&f96oPctz!${Qz6vhk+FtSql$
z`>svA`P?JC_CY^G7w0*6^fop&WjH@D%JBIQm?+j!pTEe-b0e(m>|*M|NzPvAWn{6Q
zJMTQk)-@~XJ$;(VxkcutuW-|mz5L*z2RJ)6;~YQ)V)$qbQXDy@pDCr1`^Zf42(D>E
zQ#+@X{($f&;eD>3q2u#wRwY8;m6Js2b6zoq1l5#Tjg$w$Gk%d}mjn!1JGOh6;J7vZ
zlp<4bB(GVAjCva$WP6?LC*xFtxk;Q}EGL*S<HYUd<XgPA%KDR(Oq@tPry;&ZBwfLa
zlua4N<?K3zWm|&^UQ4!3%(7!{mH|W0UK+6)<o3`=Ke7p0=)hcaE7p#2YYXkT4!^vZ
zw&Lsh5&MOVAE9`+@Z#<n)7XN+FTi6}rNo)Pdf>;8^6cdqB2f@%L0<_Th2L0tY!(W1
zsH~!vjVY0f%<6O;P(geKaca*(z2dcV?SqvR(@GRoss0wzHGIgYA+8`xDX0uBB0RLk
zK#DFzNZZd^IsKQ67bA-m-SbMfLhKw$cxS9LS(LI&Y?5L}ZA+%4eU(c1i_xVlnk05e
z`u1AzH{i%#m9{hs9a_YT!!F69v!h#3)d{~u#E|nzAJ0O&CRQf{j@9~2o7|eeRj>pN
zDW;uEqwxhY!D@^f2C*R~i;U&2MS^93qFXu#GBN#PToq?T--Kf*1-j@jKjRk$zR?l<
zxW^5~IF84ZYcl!A(NioG`P{$#&rCh?M||hhp!Ya9jh<u3C_mXYSGSV8KlNGmw@>lA
zfAH5#F1S5@%gw0dEJb-h;TLWG(z4}1+_(6R11aq}WVkG!PFpTJlkl_D_CaAOd2vAy
zBtI*AG}AVGCLTAM{%Jg79#y*0Lw%U>q!hNb_JQ9#1_>ib@+@^B3pnpFkMbq8k68mg
z)B1<H4tX$~g$X_lWRmtWeO%lo`v|a4_@Y0y?C(G3P4_AA6M;87F2QT6uLXre>R2&@
zDIvFvegaua@M8O#*C*K&WRCPf{bE$cAlNT$l;pQ`>h<j!9?G^cbUX|wflF5(^J+Rv
zPF&sg0z{l8gz)-!JXoCtepmt<Qhfwxt<cZY@T{bdS$|I!M>0;WRl+z6FfuKsqP{+E
zBJO*Sd2#to)&Nph_DuP*8wS#2s5CxUr&21RPn*vY@&x!L?4G2dyfIwS1m17UYw^55
z<mdo}6PCB1bR30B*e@jy%ki$pvr@{uCoWJ>;2qBJ?uL@y;71AG>V>#&i>WXfrz8?f
z3Rhf(3B@)ts!L|Tq~}W|%j}H_BuIN_lHuKJE4(xfq_ozwt=&L=te=UQd0~(e@}?bu
z4B6^>wryI+c<%=cP0l68Lm_6MX$evSM|#q5Nh!$Qp>B;VLBhDI9D?6)wd|Tyl9H%P
zjg=@uLeTe6-sd7w@W_Hq$1MXrm+}KpR@YJSl`gTA&G!?34^tX{sRGB>G@-t(RpZOG
zOG{PD^}BkqHlIQ2!1VN!OxkPKyyDB;PQ`%~S!CRl<TOQ=pmA#7QUv{Bm+u?@^^?W5
z48pa?=En)dtB$_BV%$EbK&G@VVNS9i!<cn(>BUc2@my7cpF$DKQdlMi(tMXd2P)wX
zNzRuuwNQ}NpAL1NvP4sYW)g!^yuic8h;F#*ohC@yYFZj2$r3~YbxJxf!FyU<rL`d?
z|0MOFbhvOKC7t=`qQtxOW}+5bw{ep;4EdF<_KE>XUZnwdiD(O#)}@-<%3e}w$wFrt
z6tpy#54@|*24$hPeB02M1f(Pqzeq@Knpz^!<F)mQFHvy$G?uGeNzr2odD3j*Dz%b<
zTB@K53zt$liOyr&m6%z}A(MSmX5xv!lQYPaez<}l&0ixRNY02QZGR@4a!mqQoV8L{
zOw;ToW?mYxl57L16v8@Y(`=BcD-nMokKt_IS5Z>tD5jLnIMS5nQ<`szNwL+5<0U21
zweuaMeo~H7{uLIEhS5^1dy<~@b0JguN|BITjFavwLA=DL2^}w!vD0GuT6El|!Z-i`
zAOJ~3K~xcoE|SvnSh(B~;$H$Mgf~b|6BDLX_x62sxq>3OitqbMseF_0DKQ~R!<n?W
zUUqdj)lamGv#6ucRlwykHVm~@S{KQ9_UYp;1{GxJi{4X!QaDVaO7I6w<38|#RNc$c
zGw2}4E0sg8#>LfhsYHMhcdzEx$!gHuDq&x>E`FRUm-B<TN)k`!y+lHhD{!WnOODr2
zUz9!)8MQQ^U45{y_(6R0C>_9M<Fx%^ALnpY`Z8`}b4xN=1+hi)jvL1m;b~xGKL@iJ
zKd^<LETisHIw|!CPo-U0Zm^UwWRYu;5QTgkbuCFPjj?6j>uT}^1yFjimK5<S1Nnt#
ziT=sL^D1GJ;!IwMi%#~7?H`kM$vgP?6Pnxy{D6kI;TK{C%s&QWKPetvOTVk(=W+(R
zR(jGOQ0X*+Bw?dPl`i(K#)rg6?Jfp~NF)l(S6RJ@obYAiW2G>4xq`);h*$?oFyvxJ
zvX`KNnBe_n>1DYUeQ#okA9nI@q|@{XikA1%U^1;pSte&&$i&y0i+xjErpqfBD^U{!
zvO7>r$FC9-PBTWy+?zV<6pRImmU7Fn6FK=>{2^e$`&?p%BHc7yob)GG8Lxp9$iAsN
z{>LDqNRkc$G-ej!wX+OG5=XyBDE69_boyo0;y&83%(<9FC%OoL3CN&zFs8=Jtdy5*
zjI`dNpj07YKkMW@c^++AHETT{bKbbdVO7AB!WcQAZGPi0rmKuRf@g14ApY#Wt*9|G
zE;pDltQ$uQzU_)`u*zX62yZog9i|D|!nC4=rw?CGMmI`S0mg)}4|;`qx}#~d;3N9%
zW~5SP^I7za)!_Spz$lqy4G$t7oV-4dQfhmMJ$|;o4t0VBIxB~z>GWe{GIZVjxQ+0w
zC@hq1t1K=!Uae0UTYLAl#TPlYQvR6kcR=H@WB!e|Me+*|5piK*(N=QF*h9<y((XyW
zFuJq)l;<iq4=6}^{c(gs<stn{@a_48*T#=W6XQ;1VV4Ac65~$F5%`khIT_~`OEfmo
zSWJuUqD<RFdEjW;WDOpEcy`h{sWWlU@?tZywwNF>&*~tNq4DH#dZMI%_^~JACzv1`
zI!2#b*vaHZ1)hJ{<qzd7sBJ7n!`V1W@kPO+>#JN_$(zj+@tIMNAN%ft=bvI+Pfw|p
zjuyXDDr{k>3V!UQH;qn8p7A9n@eV|J+xe0<slLQQfZ;~t_Q|=csk*@B47a*!I{Gmy
zb?v_>4>!Id)z9KDHYlqkokGL0D#9=JZl$FdaqwynD(v{zaxLu$A4QhBR&o^a;>6|0
z32>|AIH|o8W2g2_8zi-N`o&ZUuTejCgZ@>HYv=n~EqLv^y=Hx<5=>(!oyQVpNVZGS
z*){9?nte3Ydx+i|e|5gES+^-8U$dW-uz#9IrH^xsv6bfYsiwX*yI?WvCyS3#v5>^L
zbbF;8KUEB0vmQ&>OAa0u??1^n*NA)9j-%<vwIfYR-=6||O52}AA$hHOOy}_@kJoFZ
zS2A9&5mVB~`G?nSYJR*{9#iq-C+Fwm)EPgGZa)q`^3XtzxV~|-m-v<`=xkGEyP1~S
zmZSFjAyKJUQOEk8HnK{A$}@280yD*O*7kH#p39(Ak&(V$Cg-86yPKMd==V`4M+O+Z
zyg+MbPy8FHn(2!p3{6ba)Yikw`bq$<Opnt)e381A4%(V(QNZGrDf;?HD63yZcWVO*
zC@js=dtm@w)ySHTm1Gp?;sSl=`&h`<u(rF6Tt)$T1~2q7Qz&CiPbU?*45%Wb{e4W%
zYdX8SsIACFJdO@BIyFygM>h>M<$z{pa+JaG%QUR&X4Q(Sn8yhQh9{_Q?x4Ln;Bm!D
zx>hxkQBYW%rLS+0V#NwpcdR6vQRu<~{k{DxWU5%*)lOMffzC73*T-}|$Lj75D$BA!
zk+H#kE?z0p(b+{^We${Pa%70HsVl7N=%TSUdbixnr7?!arf68%#j1vC0On^d(mym#
zRbv|+E%gzP^Ox!GA0}7VLRV{3#N*N|{k?+}DjF=0#lixEef`X<DtfxwDbFgj&NI^A
z#{j87R=?$?EZyCmRK+}w5A}0#u0Tge7j;#UT`!FdF?#6=E89D1s;l&PT(Po~*2bEM
z$Jt5x2gj&vXr*iA3ILWCrs?Y)CR^7+cU#P3evbYN0~9Li>FHQSF6MElub(TblJ3q9
zDsmBzqXYd+Eh@UZx~MLXc$^p>U}7duTW1&bF^^N@!;DPM($dyVb6w<nvr}UXj$ER?
zrIWU%h{uJwOZ4@RQqj;#_o|4;{NfC~7Y51HHnX~;83h#cSLnOYPriHwt2-keb#alw
z-ah82V0Bk#g2%3|ZfeS-Z)sl~9$<WCiMFn8R>VABo)~8M;w&xgU6#kWsWApdE>YLg
zL3>lJ$7A`5RrItqfQJ0iEWPIkP&G}g>1>I3EY8z+u8*a%de(NgMlr0H7`V{OtS)Cw
zcNgV31r!+V@8i+}banSQ9!CZkn_i@?V>K(PqnMeV7-s0=49#slwA4rMPrPz@oW9|U
z)VFlf9_w-O$`rkQ!<4URwLBK`b6hyvkFIWHeb>qeWAp;OAN8_W7U4|B;LO<z%;<8~
zuI{2dt5B-Qa9=N%<~3bCt0NwjW^#Cd(aZC+b*+tggz1aJ42@r=sckhY4Uf~~^bbu?
z+tNix8jr0FK*VG3xqft2BWt@>#<TfFde2^9F;~O7p7sP58k~u+Fv6Lxo)TDC6Jz1z
z2t(u3G_`dLES#W!c!JszIMdzQXtA*G!XOJp5-YEnidxoow<oZ0b$2I~Wd;lTnY>b<
zv%8zxN*@bXwF{hyv9PhVi&ZNO&P>ohG(mN9JMGPNF^`w&9~dTAyOOTf#sFso78Vy6
zxX{nMz(TF_4EOaho%eBOY%s=|&J>*K=%OjW!WApKXl*n+PSQU#Mpcu;!ln7k^!1NK
zIAgJJj{d$u=JWFDL6i-%6*cs9c2J%(SlG|yr3~F&ofc=thX%Mfm#3}6;moD+VMZtC
zXld_^V^}kLX`I2aOVqb?&=z80V}ONy!(?ij=xJ+?vG7U~3yTX3^$u`FRa_$$`Z!~-
zu(_>EVBt7}Ba_s(w9^*gOhv;gx>q%LSg2~7Skn>XOktkB^L^yY>sj5|DzLDhIm+pY
zv5=U@OAFA|)k95%!I?qErx#PPu&KR^7K4SC#~Fxg1%riA43BtNSXi3n!ubKHX=Ghz
zj5Earde22Tv#vX?XLO!{3m2Hx<*e@Rig=8%aMHuVIEJSeXzg50gTxtI&p4dv8=9c5
zrIQZB<NOr8y~C8PXk~TGV=+I+`Lq4xO$`~Zpy$j7=ZpXfvm_U-zKIQ(@yymP6WirM
zz_hMe$<bSnQJzs`GIKos*Mm$KR&d~!qcl|I$Yhsz{^19i7%Ota{#)p7tVCssy!E}u
z8J!tt%MAzF-dP7qbM}qzGdwZF>TP>DuqjgR@Q1H5G(17;x?LRKy&8pLYUn*48W^U&
zV<X3pT!%t2f4P^3`Uc52b#v>ngX9zxmL_;~V1V)JmE3aVR;tS~C}#Q2H-?#6s^_Ld
zhiR@UCzD;`g-0Le;%J`h_ub5zmTFK1-ul5)j7^WQY1e*su5Li1Id|fDhR0{<*|LX2
z+uA_G$Vaa+GCaYmwL3U|<GP5);gdWvI6`g5MvficNmfNXKJu+0mYTaca_d1VvWmjO
z1dl&3%xL*a4jn#9T}2l3EYCjpFf&)G*>~^=E9=TpnMHp1_+wlgUS!wagREOs3#!1|
zFFeiI^avYw?&JEkEkKcTZ@<XM*fd?6Z{YBbPPB&6b8j*-G|tL3+c~~(BMQakk<&af
zFhX_v25vpFi(Dqd{N+9#8yaSzxr<wl9-=CjA-^!rQxA?XoNM9WEk{{VnMD_8`Tir1
zGBaDwjr$MNwxR;17J2!pZ*y^Ap6hNr#QL^clq&G<OV2WPd5{g)?PK@)l|Yg6@BEOF
zv1vLtUC*t%dMIkm*!i~@862azXDi1KZUGI`<7arJf0**Nb=-RF268IH!ps1V4Gwdq
zv6I6`Z=ouu$S+*v>4!!c%rtZ0mZLOQWzmIMo`3u?rl)f3-hYJ7hH_w$*PeZXiTN3}
z?>WS#n8%YZKga0Q0_%6|<HijUj~7n9%<$+G9UFIY{Dw6YwPvF4U55KdY3ka{ZHKp`
zi<;TVb3D{HNN&{{jvd=e23VXM=F!1HW*XW#eDp9?xh%z{NuK)F2>mp%|CXaPRb|n|
zIbMAHQ7%tr*uDQ2x*95gCEon*6O5mqV%zRRY-z6rRpf(LzsvCCdDd><$G(j%Xkg&f
z%M6WNqJ8}?Zr>B}I63eh!+j$(bZ+MMqdO=THCHb8@~!g&(A>lEWBXA+esP?K`ue$C
z-^!7r$Ee6Eba9&Rd~=8^#YXnWdW3mieC%N^O~4KN57XUH4&-_3`%f@7JHeJchuGF$
z186>a{W*p%&a!65KK5^HMxz)y{W1eX6SS`18S@BJLnj&R9i+Zv6L%cDjzU4RFx$^J
z&-PMm>fyHA4x)=1U6|y7^SxZGS;f&?k5Lxm%(sUIm@d?F@RnOyQJINw=Aj3;IIh`!
z;4ob)%43}QHY3wxY}tK?Z5<JhXW#fPL*tiOvpwPw6eAzL%;4ZSt?PDj+n&{Et++gL
zlHtCH$J>rxht{w#)5inn`^YzSbNtxBsE8{}@?dWt<29=|a^%*yV4LH~zZ+s^sh$Hj
zAEmK6!on9Gd60{v1+Ks8W>z;<L8idl-+z+PnK3qBe}L<H>LZ+a^9PKKe?ZUHJsjNJ
z3c$#jA2U2WPV2hsIKF3X6vM+Od1zpm+Kx>eJ8~Tv6!X)4JUlSO!lIdMM~707tzF5@
zhmTTIo&{!k`kN!nEY@<<p~JM)mXpaW@#421VRCqpU3(9)c2!M;GtWQG*z_=)cI{`^
znkJyg*|%O`WPF<L&AYjIdq;$YXJ2P{Xo6L1c5r;}`WR<U@yO6HH61a|#8~*~&@hWl
zT^v4oh{~KIzc|5@4-7L>7U4{NWrT&#J@ha$b5-m;aG2Km2xoruoo_QaFwf4t2U*`*
z2dcn3FFwQQ<smj+x0f5%MKOHtotI)P+{KaWdeB-kcK!q-Lu0h8-p28pHla{VkAA=-
zgCkV7t>@OGH;~IH=BNAl_JbqL=kqRS#z{3-+Y;xv2n&Dk*rUu`E@#hy!*r~uK(WZH
z&pgh>zBzX6ImpKLI+QB%-VdK;Y-)fFJNB|?!>R~p-un^5W0&dNu#00itfr_n;}_mx
zWN?h8?kyZYv=ubWOnk({{lk>6TFb4+_C#1XJIJ?(hL~;W;Fcq|P+gXxusF%n4?W6&
zhcmOh@c3g)PiDDc|KS*C7J2=-Cz+VP%=X;}*&Oru{wvQhI@Qa%?R(j~aV4PXJM{{~
zBbR93u#?+%ucfFp7yI91xPO#}&duC*WCyyanY(nJ2YUy}tX#vfWBbSeOIJpCbYOtX
z^=%wJdX%bMhGKq-r@!?uy}FV8Hy@>`I*Tq|;iV@YWom-BR*10h#P^?Md~TAhHymPX
zM}&nR{`h;0OrB-!wtd{RDZ;|R)2}cze37>GJGp&N#N(yGQw;SEv!Y`Yx8Hgl#iC|@
zx{n9W_oJJ7IClI1N=LQAL%n@ms&!b1p5e)F3^G?}usKDk1zvdcAtuK)H{5iX?#6PU
zz}r7~oY4=)*|Pf(+a=CSOtWVDUiNQliE!qFS7Mym#qBpnSa^B(6hjvVsc+xJ9mgXq
zT$~$V_-rrJIwMx;6l(p(&Gx~7ioG$KpegZuHY)184a}PO8+*|ew<k`;&&}PkpWJYK
z^=lebMm#EZk7`|0^Xxs7a!vU(i_z56AE%gX7mt&<1|FkF&jF87XR*RA^msKMQ~4>P
zQP;#zcnxiy-8&w=Jr-^%9zUa(dQ8(<DZa1Wu4c8@<Hz`mm_;P`F|u5v&VCvm<BKUe
z3%{Kf@{{I|A7|{JmhV!ra1HVCG4=D4Tyr{0v#ZZzu_*R6{)6e~>bABxlekvO*8i~a
zk$x==3$MklrLoZacC$GC|AB=c2OmF<?@=PIe-bP-{6zA^Y}=1NvG0XCv-~KZ$AtZV
zdQ2&0>!i*~#qd8Q7XAa{%*VpQ|8H<6)gP~+o+;{hQ7pyxV(tORHnueXmIF;|63i<f
z(8|4MMHmmI;#ZKU2y0>*-a9F^<wx`t7tCj^Elu0Uu`N2w;gzST=p<s)T#MEbV^$<x
z|A+Z}!U0V!u9?+H+14J)Z;KWWm_2?{5Y#-h)_&nY95{}y(MQO(9M^SKmhV`#I%RXH
za!>MQVI@Vh1v<fXLDxGM^tA1uJQpzE{8cfp0#<!~lp++{t_o}I$sYQI(4?nZX@}6(
zvaK1zxTT2><A2)rkK+Wf=@|E_iO^0?=TdKB+gM4VJb^zF$DEE6WtuH4kAk0YuSKYv
z(C;I*1N|m+?viyvuM*^?4deKabe4<_jgB!Too2|3++=J=(~d{au9h3$m$f&B9|!Wu
zgv1pOhSL0O8NMTp1=j-Lvaw))mssjT1#yS9SICROb<cKje0vyY<qGtxwUF6dGx~`S
z6=IgtrK2h1$nj`$iPX7yac?PGFQ-?h-*j6WzDxD@@iEDx(`O16#seF@I=*da8)$U7
zD!?wR&fP@V%A8_@Qo4}XVFtD~(dlt5`$ew~abO){v%@`87o^ieJDid=hx7}paRZUo
zx3wK#*3Y(Y!VX54el9hUU};J0k}+WfQ_9iu;=rLyY^)M8<78J35o~|MlM=MFwwiEQ
zqpjQ)FO)*9+q5|z#Q;H2Z<%v(A{|tC1!+H%WN5qckl^Ke-YCgKI7^8ZB@fd_3x@4W
zic9qo0AbxJCM3%c13=5siDoj%wyDLGFRqZ==`F2qs22w^5xCngph()HuROdW3*R!y
zacoKFap?5s)3kj8Cep2c?beRd!LscQAe1tfUtE7YPb*!R%S1qGo8|o=EPQ+l%eiry
zkHl;wcn^IGmiKGFf6`~txm~WO&u^MPFGowvYm<B|YxAU!r;UF#e<-Dck9&CdNEjz+
zlw|})|1PidtBvlmqH-Uv8e1i0u=scFJcL={<LWgHd!&CXM`yWMOuPT_ctU9A+WVJk
zb4`5{zVEClk7Qnvg^NF$DL9Q@t*(Q@KP>>m37;uu5ua1owI@D8i`;BuMzlyNczu3c
z!Df6_^57;irD9Ngmc^`4K$1>$-SZ<!M*M4JvxR1&*z)y}F&|77D4TjJQ0{_K;Y~>+
zaW(qCQnsJZj*5yrI|K3EOq*GwmVIOcZ7+ta3o5$kv!Awt7t=I`mornWvE__WmM@Z(
zxV^`d&$U<31v5dpI-rHWDA6|@&p3<aC2)S(KXP7e`%1mYxO4hLd<!qxFS_JrHDdB(
zX-Xa06%`?aiU;-Wk-qvyK+oyR{HIgfDojzUlk~I+7OOF*Ex|-Mt~>DqP~Nbs(_Zik
zDa8yAt;M+B_rS66FHJ0h-&0*mGBP?AO`_{zXN3o}Ld2x=A$`V=PovM!hO$K%fBs;C
zu4pU5DxemxL<I4qWE@DDw6?#ANX)xg$@G2Ex4HYD<WK6r_k?r_O97c5wvu11rEi<-
zF0m@3gA&8}?vdul%IZWWO376z|JUMNY2|!P;Jgm{q=}!jsT)fP`+2KNS_<cQcbH;)
zJUNOkZH6Sy;>U#s=kHgMEBYN`xq9m`$zRPO>01fBcORs`hhv)B!>r=S1`#*f!+eZ<
zn&7I`s>=rm`NQ%ndi`J0cjScBd{$;$kx_j?u{_z>(gt7TDz{G9*k4VyLRu#6Q$t*F
ztt8)0i9#?JdkKHlW_?fS&#sCYt&5`0sVA1j&qFB)o;WwFdzr}5%|+2z0KEhyyTpoY
z8VeQ&jLPPyuCAu0x|)izY}6=|rMkMBs)};k!?GhP&BWX-R%WieoNUGjEafrFiV=lO
zwv6&zCKgZ$9p<PgFB9Dy2pTXp?o?4x5$_Rj&X&z)ow4I_z2vMxsSFiWRa943Q<lr3
zG8w9>s>o$BXfjk(R>t41(!L%wHsM=`)Q(U2dxvtF+|C|kMmI$<%F#?i_n?@T1_||R
z3W&tgz2`2<^5=EeV(D2Wx`|=@;=#FuIT!w+{XTe!%*_hzOePq^eD-EV_K&lwA8*ON
zmSzh08NLE3(<HZn?!l$<0n|24N&Ie3pxsG0iqw@5(6^UtBaNFvSAc?2LB=+uVw;z$
z*F18|Bvln$^zww3IBCEg&rh}R7yH;n2^2E6w;}H~4VzH3#kh8>+L3I+5A5R;@fb=}
z-j-$%57CGYkCZfO%%6LF;_JyXOQ2}mJ}iP0?aVmJ<u744hoycj2JBd#!dbpc)~1as
z1WZm*QZI?a;GgqX`&k(bH)ATw%FE(!EE^qVsHm)>rmC8(O6tK|Ah5-MY)7rhWOJ0|
zqHhf=(>^`SEVmZly_~MBzZ#iC3^sken6ib5qa(HH#>8u9iWPHFo8m7BN;@%-z!x6I
zmjikpo&<<x_t0ynYtOva-gaOcJ6g&v@W+j%oc~F@GF!Z?QVc8zd5ItCsz7cbzI_BM
z$hvXewi{1Z5|fNAC2(oWY@)cfbh9}sE6QzK4P<g<R99D1QJxbzGcqf(P0cO8Wd&Bg
zLt}u>TT;Xl5Kor@x6;;g;Ai=rfAx!O-LZ?K2X}G)-4o2!bn&?_{t92XV<#`a^a@uB
z$|*MiXvWbE9%Q)dOTS6a^qZWWSa5m5Bw|x}=mMK=`(=J%M}}A49~5Assa~~<U;Bmq
zy#D4X3fecSD+-{HY2qut`i~j<;2kc{N8doo)^+fi`;Kt_{SR0uN(G9bS6Q~-avxv*
z{CynQzn`v}E1aCD;dlS*|4Fv@1m_EF{HNdjKPc;chYyA?c{Ge3(uxe1U}WLojBWs4
z*&C&c77qZC{2Rloq|=+k7$+&^IZzx4CT_IvD_SIY31Ai^35w8AZV{aJv4j$K7s2Y5
z(xvQTS=}UWH)=vhu1NK5EPdV3@%-P|4?|YD05|P}G34RTRHZ*)6=wQ5kqscbKF(Gc
zZt<0_Jz9nz8~DcEl$%V*#35rQw1Hd|RZbC@6~*w8iJX}miCnm*!~)JHX*#qt1#sio
z@c9pX%i)KYqcHIxj3RjP`HLhGs|<2{9ggAqp0Txw7w=(795)F2CdV~CjUNjM*^)l*
znmRi<N5I7{A~uwx>$)q40r9vcZ6?&G!g!W=6ZALZCjHm7$*$<(*S`ES^qzQ&%lRS-
z$W+vD>p%KD|Lhk(&E?ZSruWjkzmY$w1~DPhtlsx&K66teCr(|UAiv#bbgCWB_$$Ep
zc1^_<vyCamt`?6F?X{QqWx<h*4ej)a61Fw?<xH(YeMlQw`}#3w?PdCy7`Jg1(+hG>
zES&QhztY7kQsXfkO89*Z&RS$w!M1`hjLa&W((z|we<6#_=@!uhZpGt;7!+eq0;hvE
zwptRoOIs(&ap?YE<KKMY5YIpN0`odUd0iV{{K~KM$zwNjT~|4;y!m1L1sE$3IZ;w3
zenQ7nT>MGf0GE$^qyuQms#kFF{pa|-uRY7XzwvwAenUHd_S`wX{+AE%hyV6Ia@LNT
z6>V$Ty?p~qQ-i#I;uKf%MY5GE*m=V)R@P)V^X>`Gj$Ed;sfERgEENr%T)(cKH{UwV
zLP0og9NgtK4K&qN3B75u<qhn<el-`)^;3*vplj1k)~;$Kzc@`<bsZPpeT@s_i|pLH
zlS};<xUv*|3#4jA2e;gL4|m_Oi~Q^Y@4fv7CohZ%`jHMY<qiDYr*Gl$Kl*J>jxA7K
z4uwn)SyjoNJzE)Qs;9oThT6(9ub&BUAN)nY%VGc@C?XODNlmZ|_!I?)wUfnB8Q8+g
zW)Gc7CK59ws2cl=Xm{Tii5=PWwDq9`U&oFI%0@Q^M74l7C4<Q&&IAf8q1U*#_5x3$
zlRI1vih?{$JYK<R?#aShJ_9$v8p~(;$|1c8h*gRqCV<h7g)m_xr@tC(t1w}f7}#8O
z6%<HjXeB<oVnx>FX%h>}lS%Z*M5b)wm3oo;X<U&P(~R|Od__Kfl8Ef;n)JkGvXjLq
z?bC_p`!4QuoD9ufZE@Lwp>><kSFJfn6s6K=wl44!t~VKV!x&>YuDm$W?taWe!ai|J
z5s4o7Cnkp8lNd3{_KH(cP)UCj^|AFc%eQD_d__8zUo#@rXl0W+aN75Z0HqXU%V=(G
zXV>0CY%0$3_KCN-I5)#n5B?n;J^wO(7aDP{Xkyp(*HNEc<h{2}Ffw%oGUcq@yo2?f
zO-u}&;oVc`D66Wcv2L1dc`aMFZD#nx_ZXdC5KQ~{>}n_Jb1^atT+sqUtRG9i2orwn
z=YFxMe4Qr>K#5b%zipoIa))*pD}2b0SK%V~b_*w8vQ606+TQccQioqi`S|FMlX#Nn
z2K3}|j+?Avd>N8*OMNFs2@1|oM+rTM>&+VuaL<7a)YM!eg9cq>=k32h_vP34um9s!
z6a{p~K6thKXB@IPx$Hgy0SH<fV8vphY39xy5UpCXonyD%$)2tvr_NsjG)4Q>?5LBf
zZs)7N@l|^2%h`R`7x~A>w}UF<&M$nGPwei2Y&GlGc37b+8@l-Auly5QD~c4t8L18K
z<e`gst}N7W`|Ssap6+JYefwv)zO#uhedSBEbnW2h@7zNc&Gh5~2X8x0OKnta74wT+
zx^$WO`72CboMd)>Db|N5Z8YfOBBL`|Zasc0JJz=|KQl`anCm~uWX)D?+tbPEv;7o3
z&Xfo`LsKSLWdTCR#fPnJgu%>X2&JO`Y~dzAItaJuuNHD>e@~mdUHUr!03ZNKL_t(w
z(FuuiKb0c*jfxa4l7L%@@@AMGsTdvv*l59k{UnPn(;v4A<!EZmDilIT=06iChF2lB
zkkw1iR!(=S?O2w!isiI*n<)Tn#_|%im!JfYm<MmZ`jw|+ryCJ~mR>xLv5Sq{*e=3P
z+;tLqG$$rmMtnOTRn`yv$3>A&!$<eVFC3rRD9zxDDBujM(TVbO<v>^1#N3f%YVVIB
zWqCC|pa}fRPIR>MajO~S4H@aztuC6x?D=boe_4PxHH=7LC?k8?c9ppBmBqJ|&BU7V
zL40|Qj<wd&&wqK@Eb&m!P5QKx(bK7NI*@+g<d;~cy%3fw{C1yTaITcHab;n}7KY9i
zTE>9TR>P_dY+BRI#+&Zpi=R9|8K8C1!$Os<;I_~FQ|`RJhvxPB`06j;Pi>iE{l0to
zm7hOOWhP74+BH<?A{nyfwcPeIU*O2rHWu>*%Zrgo7Ej(|XDxWeO`KgZYHSzV#o{4C
zKiU}-6Ng4ze??vvlNRHoz7%MkW_wdOCXaA(;TNX{PhvSdO9*~Kq9ULq9rMx36y!t;
z8v)Id#hc92!C!eOzo~yO#mB;rQdW<*fM}>gsfU@NcliB3`D>=45`&^H=ep}QQrWtN
zU;XN@^My~{OnE#Zmt->%V-0bex0JH`<;=e!NSl8n{85&=E*8n>m$)=NPjhp9ii$N&
zU0Z1{oZ)XDe3ZX?;0IiPa5v@EjU3+H#$WyU-}2N`Pw@1QPR4~#j{E-EZ?bOYb)J0n
z1M=d{<I&J5MvNW5ur$l5cTO?0RB*QnbA0gD>+}wc^7=dPQ`^`~S*F0blP4LOnYZm0
zXUBQ_?URg+4)OYrU*r79rFeN*jH49A{0x8j`@h3mV;PS9qhI2G{b%=6SFXq~%X{bN
zsn7S*H*qD&qa%@=Ws952wuxkt&fto@=$<Jg<m}#(l7GdRQ4!z=zuow$uwRXK*Zd`8
z^H|Z)Hj;R&eRj4Or9~w`#1uN-c|tc19NkYGx%S~~RC#pVs)wZ+ew$kRI#H%@Q+RS%
zd4(msdrIDkXxMm47$Me$UFNse5Hf@bJd#x|$y)(J8ydA}-^U_-i5OCXgxO1D+iFY8
z@)UTMv5C{4wne3-tBfp$x7btckD>cEbE}COIhe@l5osk<Y7$}m<B+CUGFWEqV~PqP
zgOykKl`XiGSdCB!+Rn%QRU=uw`FgU}m3#|}D|uBZJGPbA5_OOB-7tQ6Dp#H*o(t{y
z#aTRn@iAjZ?U_Icx+>{ILYH372n=U)i*j-8t1MY882_>n7{)I!GyV}zJo*@a|KNA&
z*s>#j%fBykuA+(En^y4P1K;9X-*^bxx6sy5$=+M`^4jBn&tp$L#iNgZpG!+cbP;wO
z{|ukm*UV#2JjbOgdF$gQuASI1U&eQp^f|vB7SfFGD_rb|IKs7c7~wEgc~yoSL&Q?t
z&-#mqOHa$*;)I`vl0}Z3Oec#q;~}ny^?JrPWff%Zh>4y3p#8$vW5^VeT734^p|y^-
z0%^n-7+gbd%wRRc)srd3n=-|@kUJ={WIj*Rp#g)1nM+)pnHRLO)Hc>~`Si>D-hcZ#
zwR`SoPgl9qT}(IQzwSeTzO}BeC=t6hkRt5zXIvv^-{G039_Jg+oZ`^IUE~0Y8ZBO?
zsEb9&<j5*THdlr&6wtazv8X7|Wn2Kn<3IoRf5yz3Te)#<vmMj&9TYy&&!eM=_u|aB
zw=Txck%{lAXhp#<JX|7oj^_i8La}gpn5Q24TYmd@{yWv{chXo>1}ejQ&;K32_w~n^
z&6g<k4Mz!wHhh*2ESc;*b6L<^W=uJpbas&uCA`XZQNLtDu*?Vpcr)(clT92+D)zGj
z0bxBMtbL?q+xg?kqSi<w77Bb`bZ{Z29r7gkmTPEcEUO1%J$Wk-uAhZIW#o+SnYfID
z%HNxsrYU0&|B;%F3@;!Ya3#DWh$}~2r?hh{n0;a@(1G@RK;geJoRrl8cg@6{F{!tw
zEy1@)^|l)HRTkDS;o7H(BpviqvH6@{lyD87sMud0@D*s=<)#vc+*{?xk&isu(~HqR
zK}#U;3&WmGvz8&3BpeNy;KF>7zA|E@0JAEsrGv+f%q1D0LIVjt!tj)It!rg+<gytu
z*(|zP@N#`7W9SsoIzw4DOD3BmgGLuM`FxSGvYePyMmp+!;~AbkQ{=9rdnmK9uL6Bb
zxwLoP!*Kgg9QQii4?@iIV#c;f(}@{F$M$sNSuC~w8MU#KpP<k*cpBL;Y$F22$V?!!
z(4j<S`S{qJV=zQH3NEMk6L8|b#tv?!I%p%$!`Ku{nijV#FTUQb{6f0umceJMpqLjY
zr>kJ34jt{Nofe->!M(`fz$n8bqs&fUVthJJT}_3kiQ;{rO-;?e<rv2zotzEY1fX!U
zSy}yQzkT!i8~OB+?Ub+R;MT)?IrYM~IdT3Xda1y5NA6-{djnT44sl^<oGpj$VfVT=
zZaTb=cb|KV6K94ftk}T)x9?_U$2!(FXE@V4%C<vyaOub2;NcVVeB~E!=dD*>XL_OF
zIh58#R&BYNPaWD!O-mQMc5X$@j50Dgi$=5S@I9<+u3`4#AT68sQaJxMo!4E@+o$^3
zy{3{EUp+;>xJcXfTiCa$13fpv!1y$Ia_l>NjLwD%mS!dypPCcs73(Ei&8NTgtK7VE
zBR3wpg__GB^6Z<#9KCTZFMRI>#-<n8wEtEr2H)kq{s{-}A~(CBF*9}dSkmPLvj<3d
zMYG(;WLH_Nlep*l8r(amls~?qFX<`y*)-PigL5y~;x!2^d5VXL0mQZsvZNd%fGrny
zi2}x~e8j=5<f@-|>})YW;RBQd2P1*FY0{(hYfneso!DB|PHvSYw5_?|_M|9-MOIcr
z&nrF*aK?I%{_j~dp+`v%VjI|k-o?0lk!}d!R<}a`hK%$xs{>DkV%16s5H~AY#;0Os
zi3vA$<wxp8fPG>QrLc&n1LITn9AO)OFPR$~XA=qX0ytd`Fpnv*TuAE*wh^9zVBE>=
z*~=6?A^%cOVueidFKrd{4|U<?sX(vEcoVk31hq=o%X+1ZeUW_4L|8zOP2k~}25)?f
z@@2C7^J08bRZZM`?<eVOs$=I(dl^3QeO^7)PqA2}sb>d=_icgsS^5VD$h2(b6Nh)Q
z{<_^%Oq}GIm)>P!F3Y|5-9`J#R<>>FrvJl_s9t>o8_N3m9}oP1TkiXL%EnG}estOb
zjMS$WX2P#kY*vGtI*0=wAEnM*@uTqV9r<(GN~wuE5xdG2cD|nd1q&b9+*m3EG=pWr
z@JiBh)CKOz_}13$E@8wxXn_j?+e9OK#lv8E+oNmvcCDR~1U=<NXewTNc`C?ZAzt{T
zg8XcGb2zr5eKYrc`jgzeXDivIc`o#R#NdSH{?FXYs*d$s-&y2=Z#~0o!MBfEzj2c(
z83{&}lLc|ajMySBDk$Ve<m$!_c5LWEp_#cj%m?QN$QO!epl)S1o7c26dhR5><1<t@
zw6bYk7y0QiJ~-RQVnLHBt77ej4K!9|=s$OckxO%|?B0OBc%HG@BI~wn;^O%;OwBJL
zyc5vWwDhoTZF_99B7+~Drhj}IjiRw_HS4=t7(9ED#j2GQCr7AjZDV9?iuQ&wdI!cR
zYE4D`N;Yp;&HUIoKIor7YejR%8rF6;Gv0fK3nP~T^wmIy#<p&{+nUK1<~je-877y?
zS>M%2?}dIA3!3JxwdAHp7@eB4*(6M!3BwE9c)*1NL_ZF6iC{4^Fc)}Xz}f^Ie#1wM
zoyud71nrC~CFzKs@xL-D##c;`SP*x!8UhLu_ryQ;2nzz)PyUS&76Ycs*(G_n7>Fsj
zc(_J7-YS!QycotLVK7Ppo}VND=~~8j6^TtuW}lXuP0B(gz7oGwT9NcOFX2iV(%Pkt
zWBl3aBw;^M(1dv9(X=ZLDkX{fx{yU|BxABTO7`&v_MFb|;}>Zqbr90L8o$PlcIeoK
zg!0Hf+Fq8EFyD45V~g1Iu36EQ%^_(;W2iS-_$Kx5>%uD-<*)B31-U9#cehejR!wzw
zfioYSV|F3hMpIthz{ZX1U}lul=lY?nmUSD}Q=47n?3s_aG`EOSIaYP8p=(t=Q=|Qy
z?;oYSp`BHg3tSkQq^WZ?HH9hqh9`v|nn#*x_SVvkJah_+87FQ~q+$mWaim=gl!~{&
z7rgtnnATn_g*G*qj2Oq<h*U2Y9*oJX>uJoy$&XnxwxcbLt0W;UVnT^{wmB@K;e7$n
z_?|5soz}gD0v&{@^fw!a$xkvS44*D`%#*yBB-nSYs(`k$ZA7PPMJwAkuOX*EFV6Db
z$&<`1DLU6|qNAyj!E+xnFfrr#01*9ls&x_}nA9uomUk%cnilp@CEJI=oNz49>cTk%
zI)^}(2L5uW#Ee$Tlu{sg#er8ar6p?-pf=D*fSh$`8JGqFM4@4q(9Lj`AlAV{o_(hb
ziWr-4;|<#)R~tXHwvt7ZoIRoKPv4mP2}Mb}Xq$Q+A7S#f$-?Q++1K<DHc$@a-FQAf
zNxQknz$%4Luej4h`y1r9@)WV$$uZJa4($eHCAIEyjMc5Mm(+p4nnW?O>=6U1<ULss
zh{PA_A?C#;8s}e9_eq}-PH)=B*A#9*Pk$wS*u@8yBF^WnzMPHRB^PT>kutW@&g~_y
zSaH7R`1R9};lZ^FAE(I#nJ}#Isbp0mun$wh%5nUdkjY<UPT{{HO-WVwxlFG@RA09#
za$8*RlTj$MoNT(u4Q?Ri76oh!p%Ut`_E2&nniBKE&plfsbnn^H_hB<82|dO7Q%-Kr
z)^_Y*a_q&vuR|aA3=iNfjCeUHnUfsSh3XITr1s+>sFM@zQV$oH;wPlialF*wn)r>w
zt`SYqx&&VdOamY6O7Rrv-u6plM$nI%sYD;2kEC8hY)zBN(nNW0d%)ARPa|5169C`|
zEi#b&;($#+kWf-sa5?6MK_q=iFiwE3+LnU}<q~K}hYsZ@F)x9A9B-2%Sh=EpzL8=e
zQ0~+xT|!wO>-hNn(t1cIk?fRmz|>W+fbi8;n@X*u8B(P1nMNbwS0HR07WO4%_p3H*
zPkG<1Gz;ZOCQF*aUzG~`d5Hmhy^EOeiyj}3<h5lFEoaI>xEP(45<|2}#GZ_ESulyJ
z+`P^u4rjkGN=sTYu0#TKd^>Ci<#&Aex=s_Wlut;2ObUuExKdy8YUNC>PNv0>k<#eO
z7N4e=@d7K<H<5Nc1>jE}Q{^yzS<-ET<We$&`8EJw-*{$9Ny^ShCH?m*&Yf6kOyRYZ
zP3_vQ^v7k#2$Go4Wf+rbG?vq4Vhjlc_S@vqd8sE5rS6+Hq{yM=2a^H<Z1u1vrjuO!
z5V2W83t_(X<`v3e_SG_*oREZh$WFX*<0SkckRh?CBPJ>;z<e*QYZtrWcL`ib2iT2c
zC&cnJ9P@P&2Dhm_<bszXqj4Z>-(YMGlT=~=Q^qJ(PVL&+E{gKHCN^!}%+}4D=xC`2
zrO8&-vT^fP)_1p&&A8d4jyDkKc%PJG(9zc@R;+BNz9JJdVeUIVjWbr(x6syD8yU@(
zE1FDM4P9-`0w089Yi<8(a<q4}QIU&yi5w_PeSIC77)0HyUFa&4qhrl_wr<(Xy4CHJ
zXO!hHDx2cjr}y(6LPo90l-JPS)=WmZYi>p>`eky|)m01UlI>-3v=Vx}YI)N*o;161
zv_ktzp3Rtc^-bynUq7P6_U=VEpv4`8%<ig7j=|WrNC}zr+Zn;McP<ANz=8zPxSh1O
z@;C50J|l3q?+ddahxM754I4kcJ4Su{)uPar;fXJ!l+F5oSWH+TFnZE~ula?POwggo
zo-g4r$0c(WhpZCBTq5>XlM;it0RqoT%5U{;KwmZsh>IAJ?8_p<T3CrT0`s)dSunXx
zu5$Ul>5?zb7h@fSxJN+4LbqAb!TC^#M-i48da+*$8GSs6VmMp{4e811%&a(BK9sgw
zU1e;`NHJq5smBmAl+%k&cM8l(o+*C4Xv~(=+P0E%i(wI7cC6mWmW^wvEzb!WI@XiZ
zgBZjn|MHqfRyEdPV;o%k1V=(=sD!Sy(1nxF#GiCrzwi)-!74aKLxwkHj4rQhlR_Q;
zUc0~^$#bh7(;EbK_&5@<90Q+~TljGzMukKwQIN+3EyihEJ7|d&!Vgt&&B`tM%8JYU
zqKk|OrHB`^#Pt9my;%1LcDXj@!xX{B9}vy$H(XVQm0fGuym37Z)#WbVnm7S6%0+i<
zU}bL_p9*K7+O(H5VBF|k`|jg^|L4EN;loGy=l|y4v9D(ZnTk5r?>frA`Bz_}(fs<A
z28x<|K2Nb&u!_`LQz+yq6bclJ(cs5F|1UYRz7DO66bgkPh|Dr|f%QlJF~4%p^|8Gi
z(B<lS_|#noD9c1fC>9Ik^U)ZELV;r3Hm_^A>l1g<T${5@X6n26<uBe(wJK697OgU5
zvdxya@Gt)TzvtfDZ{@T9^f&nOrw>vd>#11Clh5ZJ*k~QSf~8QPSSZG0Ys%|a^NHIJ
zP?>cGE$Slqe4czPGg?#Gwv8|S-0hSVjef9t$rlS&wYpfOP%Ki&7bq0ndy<UoMf1(v
zIGA**gU7xa0E+=Y@6oGLHb#;wV@7r(o1r0#CzmW_inWiP<QTS}@Jj(CA&*9%?yr3O
zCjB=6L`&NRnvP#zVdOIREf2OzbBQl)kD-ly@i!4pDU*$4(cyS?a;FH9a&9Opp>yro
z8|wp!LP7X~QJnXZVUcJ7@lHL1F%Hb#<3hjSvDg$AMS<O&Ke^AYh?cAJV&zObZAu{3
zF%BvVg66M|aXZFJCFL_?6OWPZgJE2_;?`-^?)ee&dCAc@ik93<tB4f_6T)FZnNoq+
zHrjm-eaFZqhV%X=VlPD&**=_nnf6iQ4t!bsO*|QhX=m!X`0}rOo>k?FLc9-KWpcEy
z+rdBi_5X!^J@vLoga``@`2xNV6)EKN6pKYPMY{Lg%a=ZR6FGF0E6|?$N@1SG9g!!b
zK8!Eg1q_@|+P_k#_&5~4A2GxOPiKY?dqI5GUL(H`$#{#%ddBek3%oUXB*k2DaTTvD
z$9fT^rQl7wdLy7AF;A(8ma8vC0WAF9_c>*o%2$sV-a~}-R>3i{D!X?$?XajV>?mn?
zKTBr%#JIQiz^D1Qzy5h{Ira(u-EV%Crfd<5HF0s9vpvike$CRPHL~&pUM6>^1EbF9
zRwhgD+u!HwUwexC{`DVm!^UP_dG;)iJ@FiO?YZA}DFPe!9OsS$TUnYK;L(R4XJlrH
z+Li10)V;USRH=FUrKfrEoj$U;G71V-Z92g1TgrLx$rrh@Bvv<KC1o;Ma@kDGYQ$?<
zbu0JXe+LzdAH<wyxZ(JH?Cx$N!xai;wY>lQBmC&}6t~^~DcWnxV!zhZt=i1bf8nzn
z-M^9YY!$D3_eoxO=R)Lc+KmAUb!Cjd|C_&LylEeQ@T*5y@x)6^mv`}rd+(&9KFeD#
zKFv$-oJUo)aPKGYrM-NSISoI4?jg=j)p75qj#It#5!Rg9N^ZLKZf@Mv#+A{Jc<}M>
zQr5bI&wT!~9NpGLu6iZkfBX?%KRZUxj)UBJ^Yy5?5uSeHDb9^ev+jl?>|R|@d3`Ho
zm(TL8Cx1Y$c?0*}eUyf>0&l<c6feHp@9b*9#rUR~`AZiNiPt!tjW<y+rGS~|FF|2o
zlWI_KS=)MtD2@`oNh%Mv5RE~`16;v_@qW`^`=pbfQ6=S;D;I8s#@CCMp;Ss|Dc3A`
zA;GdlUt3R#1fG^eOW01BWqJE5qY?MnGU)D*BAUG}n#(~w9^6=1Ka}*n&-O858sABB
z-gPODPWJK7m1I}k_d9qzTDfGB42Y<t_opQxdEI#Jay}9A;k8K{GgW`d&$^U<m{cNV
z4i7OIG@{9Ply)?99W58zRV&)~#Lxc{neus_{r0!{aQHH>KK~3mcJGW|F%r>e=-9-)
zcOPX%c7Y#0`vfP>j*_cf!QngaVOv)N!yldC$!C6u%4ErA6_t%$+<R;vZ+`zN&X3O-
zmJ+d%s_)d8l5vytM;i-iSQqF+xuA1&wT}@AJ|qwXbOo>w)-AG7cdNJYihGip>4Y?s
zIsC#`ta3{FW@7J6$m72n_fw3qA`(1!11ig;l*^x_6Y%O|q{p-IS)nphJz0AMd_-;h
ztrKaus<meQuHE#$_&@pDb3^>j@Bc0xb(+a3x6-dv{EH2)XjAbUy`y35j7sRk)yiq+
za&0&5yPMzm7yp{OH|6=!dm|*fXv&*5@GD=qled5HG{Y6!`Q`ieL$->~{K~IUIr277
zKJz>muecZVwr)PiS3YwK@4bDJg}h(gL#a9#^79vY@dt0ScH0J``cBcaWj71Gr`WOo
zM&7?L&GB2Vr!2F?kH7mOwLR;pugn38xyfF>^PTT;?(F+K@~ww?<D=mO-rCc$N_tkW
zWzWtnj0~LT%3_93eDTY)&Y$Cn=icP#{hwh?V>#E~{&VbXoaM#0d)a^V4mwwqvowE^
zAHDP*U7OZXo{5C1XxYp!+_RfEpMRED-#tgZ2vb8J@YJ&}arT3^c<_Nod4FJ%isp^{
z^5^d6?2o_4yAw71+^3FGsfskOyPkUvZRedIKFdpQoncYuxarQHp{sC#r=EL}!AtX2
zZ(h!I7+}Q=b!&r-bbt>J&xg#6$2y`Zu>9rgQiq+{dNbN^&J@sy5k}-(k49nzDEOR+
z1Gzk)q=v%3j);`a;g+f5LYV`3w4Ax?xG+rXmprtX37h_@MPh1Cma$2StXC(B_Imn#
z(@`aPy_9dJ+1<Mm+)iD+NE<7Ni4;;PS9J1HfPvP(R0{7AgIDD*d6bUBpZHMWc8PR<
z4c9KxSSra~3H(UdUaVTksMYbRO>J!plf7?pda99M`1Gw*W<!oOnX+m=`MJ+iG4dWi
zIz7$jzW7<1t1?`F$7eaxqj>hIXZT?7l5Jnr*u&?4>2qYp&oVeUk6%tE{o-nVW$nHU
z4Tn9;k_rx^8}0D5WUGji@J0D4bxUxl;BUI=JhoMu#Im1G7}|T>N$_adJa3ebg^_xd
z5{hwC3OK`u^bf#p^%AWtUs`mO`$<iG<ZR~9PLyYU0WIH%ZtsK2DBk?;cj!9)dH%P5
z{#6PeKEs(yuAk9IMv887i48|nok}m#MhR&Gx>?lVhi~xsw;$oT_og_odp+Shvq96a
zY8_RVPxHff-sh$7y-L^CO;l8`U|V;V?>zGgADubPyC0gLsw(IBeV?Uyp_kMB6ZY|E
zQpG7(b)LzKlPnd5nTqp_4Gu6iI?2H380FQq<do*}<VEHd^H!b3rFkYMCb@EDj<L~E
zrsozCFAX*FWU89E`Pd!&%$>V=<%O4+$*<tXU2AFV+Qi`l*U{3vipKhSHgD|U-8Ww6
z?Gq<Bb$%$?<5FB=YI2grf&-}h<snX-o95`<pJd;THIyk>T9{+v;w7%k&M-PU#_Yl(
z&F!05*V)LX8}_qveFvRgom7-%QF@V+Cw|Py56|(@`97AG@|=C|1m!(@_~czjSXEbH
z^=2L?giFe)hm8k6L+Vn#5yWRWQv?;ge=r3cQWKkLVP-`!FMjcrlJ;$k#Igp}s>ioY
z6fR(tz3v`C+mUq0yOhBcp5l7^>n3S3r=2Sk*p`?0T>MH(D6WfS2YzWaqf<*VroGZu
zhdMHUQ+QY|^JtxrT5f$vtH1rqPb4PuS^8uskJiY<A5wQu_#})wek6|`zk!VQg~g`e
zPOClz-!kRM$&klgFkwm7P9YCEdAXD=!oJH+3q%4H=OL+-5K3hsZH#cQtxsQ`i+m;S
z8!VZ+c$U{*e}~szevR7p)%N#NW&3Pd1M6Ghg;!qV#LF);TfK_r`YN{VSjTJ6zrg!v
z&hYkIr<g5hbP={5ypscKYkBYF85Rr0)QPSU)Wqm=F%WKz2$kYvS<=_yp;C@0PUSD_
z_(P<OD+@n-|FIKB?c!1e{S3B-F=6tG*VN!<=to*&tsF0wC#pz;(#&nyNXt3WDQ)Ch
zy{FVK_Fn8?DKZ%vN-h8h+7{P52NAL3#eFQ3#f`W?)A8b2F~ju-4lwY>b3FXmlN8%;
zV0}xu@RLaHOqeEZvXVoI3fS#kk${mDbZ%;#^XJa;?wK(*tnZGBxO^U+tDw9*dL#Dy
z+$B_PD@|1uG<SBeI5kCnX`ZQt3Rbt*lPfEyrn)lbBhMp$`X4x1v4>CIw1JFCio%b~
zk;!H$E6b6~<)SYmXwbza$W>CF%Q~gYsg6~8fdV-y%gZ9Ul|t(xnOu&tZ2T({9ymE?
zEL<Mo@BZRz{Pv$b#Lf5IO=IN}BO_yc@a`MD@Z$6Q=^y_orv@%EIlaiLmL{s}>S$h3
zXXzD-c~q`~^7xJ6g~dxe{8wM+FTU{v-M4&_-Cd0+6h*Dcm6egpM5W7>xk*Nc26+AD
zAM)IH9_Ek#@)2eii=Z`2OG|OxuRv+~-g|*R{*$ls!wUs|_Ws+c783&N3nl?c`OLN0
z2klHx_<`MoFP55hOjQT8jQYKe;>fXlpVIiAYI-F=N`i)#t^EzP?y^cGT5k`9w6#4o
zk3mgb_;Oep0`$U}UySq~i{5@S*FBDV<BNWxjkf10Z(O$@Ozu&P?PBWg*oFx`>+rA8
zrxHbl(TCQEErP-q6kY-l_w2r)n2|_4SJXy)2|Z|S{mrk%wbh~5$5XOl$2<lxZK7UE
zR)AXmr#v6BI*o0e=68~|5|0I?{QfFxVDlicxcziOrVw{cacvWz2xy(um)BPX*Hf@m
z+R(_9kVhU~t_XA^ecbc8nD@X}J<RYvCw1mk*UDS~03ZNKL_t(mM>JK{O*A*wu%dYt
z#VgY+6(TGw6cv?K(Hrk|@e0$03fh`hP}9^xjap)UVUh8Rb98mJQc+e$b!`nfrBE5g
z$?yJ8{>S$Rxc}34P*;|*>?@&r>pEeK3f<Zj{lHAp`4Yvf)P=NlfHgYSo!xsHVuHt%
zFqs5E9c-V7|50WJDT_2)s7As>Ht;7=)?eIwCC){n$c<%`TObTpVdGBQ-0MAVk9|@W
z(MG0(s1k&QADb5bb`vdfLy}kfpXdKZN&a~9$x~v<Qu3}qev4dF#`=w`>HFX$AD(`n
zORAZ!=1MVc)HmDI)bd-(8&^BRG6-=2tdwKc$*nBFtmxjxr*1pQ+D+ToyQPKiJp3@{
z#%5vh3Qb!MaND8lDU6-t!wdb?^z7lDqx-pG>nfgn;6cs~Pck-J#;5Q91lR4@!OH9m
zr!S1MZr4F(Pd&|JFJ0j0zi=OC-+Yb9`6a)oMANkP2JXG>CN^xjjzb4-L{Ik7H+Gpq
zVV<^ahdFlRHs(e?qP$}(ix=LddBY~&d%vG8Z8=_k^Fs<OkZsz`J+~gDHb2IP{S)Mu
z3as9FfP*(|CNnci-{@3g??)zE!+}FPc>Vbo8R)w}$G%U{aOrJcIC+VKhi+oSy7jDH
zRmYhRPH|yqmLqrH&CX35*s!jhw_bdT^An2{3-h#XKESbkJD49o$0)7*;^%&r>$h#9
z^2#vZd*M|s%`cJ97umk=2sd80j`^{E&iC~~!#eiw-p+;%8>w0x=k)nOR<7N_N<GG#
zr~4tJKxNr=%O|=2j)Sc0XySv{U*fGZL-B9Hs@RIk?J>|USw+DjebnuN2=^%@9Mn2$
z{48Hww{&C>n`Gh9_UX%g6w(pBeQx%0TEa~hZ-ze^Sm9$<f4`1@UB*sGKa5>}R4gFL
z_L9EY$`S6Jur|P5PfWVR_`Xh+)wS#E^BeBlG1p9C=~gIJjBBBd(!p2q;dCHmv%kRM
z%j614oNQt&!;h5N*);}t0p<z4nRW{C!y>+p5VFX>J}<t${$JMKJKnOZy7T<*bHmH8
zUcFqea?Yw!Nve`miYTj85+Wmv1lR<7Xq(2N+uc2%>6vlQ=i}+mOi$Yw+hAxg1_X$t
zi~^ECLIR~y<(%`&Iox;eo<Gjnd#}CEc_r}7)kpR2J!kK=SNyHD!#!7yA&WqXrS3U@
z1aha(QaU}Qb!oGf^hGISIIg`XbR`Vq=|78})Q5I`WWg(Ck~F;cp>BljTmgwK^qqT&
zSg3B~>Z`Y~VEz&|U%rH2Kl228PWGa88CBcC?RVZxYoWx!!$%mHp2hZiZsUrLt2yz;
zbG)+i1Q#w2u;q?>*s^gwD;73!WZxmGI#;o<>LNdS`~{X=dl&7cGaNe8hc>(+#`k<x
z=q6E&@lTU4eL3SIXr{AL#@@;q-Pu_T>2SkgsOKNb_cKI$?=d%`2vHdW*p+Ez0!Dn<
z(Oq(VO@JV;8z#g}NnbGP#k58qZ?lgzk^&hWTZ~eIxY4awXgT?{e6M^yj40?}GN%%M
z7jkGByHXcM2}XZL9${KVnSrr9+uys5%P!kU)A(V2_|&f$ulO;emabT-{X=$I@M?(r
z=irP05)`ddYE7=Hmgc7TksOm_!wd|KVN{c=s-tCg1LK2z42@3_6{=~TJ&VfZ2>pYj
z$t)$Jv1v9ng^1z7e#WNC)YLViCx@9RX&RcED2)s<QLfnMPnslJsIF)Bta{=vcxpxl
z`xzde0-#t^&+J)sj1KfMovTJqj!~?sWqe|anqtJr*hFlYd=<^jO_axn7#JBxYpAMi
zpsBHriQ#^RMkgI`T4hBvHr6pZJj`@OQ&rbUp)$_M_%wBmP1IKxm>3^ncw~%xZ3`PV
ztfp9O<klP4@|`dJ4TpNC08Ozv;c=*s(Me6itXb3)bBqlSGBP%qI2_c}H!`cfnvsD%
zMkh*;E7H_7i$YXkWN3)7$r8otI*MwV(eX(e1jVXaW;NDBd6I$tK}u$ll&q*IVPj?e
zOrA$@H;niqDw12l`2`b9B(q5E>?H%r%`pAhi(K*YV?cf(!X*&53gE9AyK9C&jeU4?
z$h(&FVat4?3r8mgUoAc(+B$IcRxBjo5!^TTa0$&89YTJ2T1{FkoEKWKNod=Nt$1{a
znbR3tTYG3w%HV<fY-LpfH5}o%j}^?=Hqo8oQTyXq(M=~U^eTPEEI9~$NI;csjh>8_
zJlh)3w}~#^)(Gtt+Q|8a0%;$qOOf~hs6bc0yj3V`E2nc~NvjKi#f}HTzq{b`6nsu&
zx=GxAqD|?<(n&NU;@F!Ucm@j$D1646TKqx=*1JK*ny*WF)(Ffmb{Iu_7-8Z}Yl!mH
z)m4+rRZ&nS`Ui(8C&y@_T!E(9vr*II^bZcBRi4JCCW=v+f&KwX$?@yzx&|8StC<`f
zW_Wahs8~ZyuFS~TB*of#ifWpX(Fu|Kvuy5c;Nn+XCkg#N%jcY~(#5d7pTt~D6x-ez
z(Khz%YprJUc%oP{mWR2{a|IMxdswXUhIA`%)|O!r<Do)|T`#t+j-^heu7bFc*8{D1
zf#@BH5mf9rE;_yIj+7Sq_p_&$uPoLCag>0AF=Zl@z`2Nqre^AjilKpi#wMrz+^H05
z$?_H232YLfU;@Kq2-?<Xe9d^4F!v>VnQ=S`z5C(I1FA4%OC6C0xeRIu3(h!f2#y&I
z9Pn7jN)PQ4y&J#S(7|nY+)8JC#J)Fv$!oihQ#SlCL*f8t420B^2c2*!@Kx&FWqTL-
zLm;sQQKnvn^JXdAw9NpSu$@*xf}VlLtm`uQt5lnr96=}Jiq#!NfQus1B|O*4_fIWu
zXW1ZaubFt$=+ti)Um|=$<p_{amkyP8Cx(pqOnU@)U_mSku`_tpWb=4Q|HQhKU-Xj!
zSAi{mJ7v!7(=+<HpgVJ$8F>92Th5fw-i<_xuMBAU?Rd{>lg8_JWh;~S88nk&>`dN+
z2Yjb_|1GR<hq?rvRB`PW#WM~#lX(AMea42WSe^HUk<7aa%IhOy1t7$}FsALW#t?pS
z?~Ntdv~|(UMX|#Wuy`9jpj}B(68(v4NsEUB`!z<w1b9xAjt?L6rNn5luqFD2tBawL
zwCO8(@?>#&%)Hdn6)R=2=~vIbEkyz_icXBFogjvWS1DSVb#h60%$a*xvl)}zsZ<hL
z%2`=Bm`fxT%#yU6c^aNlOUuF_+UTI-44SoKl;w=_oQQ2F`4)vTT2m>P(LfaCP?4L7
zfE(MNZD`!@N5+iKw0a|X3JV_3flT4#%@o6gkWPG(H|Hz9(|XaEnbTbGm?Aej$<AD>
z$km|i0>U5O^OZzzQs*-j;VR_Q^g(6(&a70@4C71vi6SLj>G4hFd_n3uYtH1~m;GF;
zbP=Ayq?up8!~N83GxRU*{Mi>G`6$5L^~vH>%%W$k2^}bgKe=hfmOs5FZ#oRb+FZ`!
zvtmuYH*ufBJ^ME6UC4ic$H7=(;*$E6$stA8Q=`N2tg+Mb30puaW>aH?7!t;>PAr-7
zlZ3pImd@x0R05or`6#{~Gdh!RX03*5(>IhWL;vYlHV)3HQ?fl_M!&|@F4?h{jgP5<
zE|YbG)A2|$R<r#;Vr#~IDVFdV+sOD{SlorNqeb+|_MZICH?2)&5oGYt^B=dWD1p_n
zd<mI|4Uxv7#Qr4MA;-6DXsZ&Lt7HE&=Y17X6f7R7<V>8QX<n{?5!b+-1H<^Fa!OY&
zX*AAV$sIRsWySKPwAV*m?CB$_>)_^_Zem4OBWKQ>W7_WPIv_MO6bL5cF&D4f!mLsc
zgOg=2iuQ^@1-e4>!u70ai#XpmVgnDYDb{y#^@fF<JKv3#)wOXB^Es+z^VJ&}?Y+q4
zbR{06P{WdiU5pM5;U0K`Z)gQIvpU&w%{8oDv4mn}l-_|+7G8D*4W%K5#-}V`DHR(d
zigF1sm;zFHpeAjR*wE<(Xj{CN1@&cmhsTq?+S03QTgr`F)-iDY47zbXH*LL~kqhS-
zO=QB_Q)3IU#IQ0JoR9h-V05npZ<1fTnyZ|B-2_26uZ#i#l5V?CjA`GwO)`JXaK%i<
zp`HP$ln-P9(0IU?C`OoEjN{Lo4D_d0L1XWg5~X8L*dSD}XlF4`=G~xBl9Lf3F*e!D
zge_=gy(}on`?hdMuk!kZs!HAWbt;sa{I%QM9++yctpiGblf#QmFcF5G(OZBt{5kpU
zy#za~{zRfs0pIUdv^f`59xvA2gi9Ed4RxhFdHrHbu6Cpox5J(6m0qE}fIm650iM)#
zV+$o@wsJX4lKe_pEly+jN0d>{=1RzFQl<1?r4*{Fk;^x%WTd-?sYItyzM2hJ-^k|m
zOBv}t%kV_mVX0xn@}n@^npfkT<*e?ip{H*MjW|jW#G1_^?yV>uoIWhC!uS${9EJrk
zFC9tf7fDM=ohyg41iX6kT4o9TN`js7B{Oc?H%%^eF=1sAL)#b*u*6JU4M91EAM-vg
zIP6wltsLU2tyw$(59Bj`A!5^ZQE8c_E=>YSkMHat^+Ujh24>A!&h?v?a^~b2rZu2)
zEL?p#TQ_f_v%bQqbA2dV$i=;*6f^Rr)0eD+X$Op6I|UtUZ|1`{uc1;_-1*5r<<?bg
zRLYZF9IxiXAKFG^K53{ms2s&&6@^0HP)2J-zEGgLx=55b<K}H2=Zg8YfTECmdlOvo
zuC<0rnfX`T&HJxgX$@zzS<~3Y@+DpIw_1THm#0`PQY;iG7UOY1k*}&}+0rG{6inZU
zhRzjy;^X(zQeQ(M7rEfI$wd?8YWTn(e4fqA+Nf@5Va2lf<j`!s`vY7tzm2M@s<?_q
zgNi5=s;ElFHOVxe&ymmNsj4cHkCF!#M~QxNIr{|pnD;!(ufB`-T(;2qk1}6P)|%PN
zHuEQc^2c1Wyn|KO-o?NDlRseT>>4zXFBB;j3&~?&6oq_|VzEdeU!ba(4`T}-Qqvw`
z{|geMy=y0D1~QcEC-3PrRcm9%3Nk5$$<&P;$e?{bjs0w~6c#TMT(sd`#GkRh05%CU
zN+dfsneh7?9J51a1h+PmIOXJ(lPnbqYFttn(Qh9P8oUk7EN!CF*V1E1kW*I9l#IxU
zLWXY%oy)PpM3m)~GB)Lz7!}yb_+tu;MIMuhOC~38h7gXeMPE&(j?xKQdDmy!26Z-&
z@nwweYa~mrDVkdJ7j3e}mcE|#a{9_D9MaCCw8H_9icV`M%tZ=cw^qk>?cD$VZFJVx
z#NYBqqjiPB;c+&-=N{J2srR}S3Pq}l1&jYW!Pn}lDvG%XMTO4Q*K^+u>&Pj{=L_Vb
zU~Kc!9xIMd`8m*`Kauq$PQPaGw@|ZLIFjj0A<Ab0fX~m21>uZiWYAK+J@t%@T`b79
z9xm8WSE5JA?99B#W*M-S;wPgdnvtG&&7Tv-jl?Ri)=+4i%cuYJKl10Fen0i93}`x5
z-^3R_bsx2bJnfxrMBd^~%w^tLC#Y_<NZeYUS8}u5nZQ@;3Kx#=<rgo#MAPMW)81S|
zdG|%$+Odn_?aOV`iYju$_6N9WT^D+Km>+-Zo9sC|%-l8C^1!_}lCMm%=anaU{8tCi
zx=e)}Yi_uo8@q=1#v{)&R;r{NM_0=BAgkfMP}jkIAGnXzg`>m|?2fqZeV<{=oFWZP
zMS4dgj=%hUe)8&RZr=W0u3p{5t54^E5)JJu_}K4#kjpnN;SWBi_|-Fy^1_azzM&LE
zxq4PDYUHQ?&-1)>tea9f)>FA0vGL~Zthv9Po}Ewg&7ZzZ!{SYRXxmomia9PGeuE!A
z@k=JETKM#*Kgr}kA5F7rdHtCm@XFB%wtwiOte@A&_`qqt|J_G8GgifWA9#?}Z3U|4
zEaSqn&t*E2Qqvqibd&`fx6s~D%b^n&0L8pDTlwhr8z@p@@2{Tb$=7@M%pZS}IzUYg
zL!|l9zkGv(7e+*I%naP=WTWJAfqgG$4qE=J5~oRg%AupbO5$(Vnqptf=<K$U^FzbE
zq2c(IlTI1oSzCVDD+?+95}EuJ1j&O}!HE?czd*KU_b|!SQ?@Hkg4w%)1CLg?DuUTA
za0T1U+bl@~<zJ60L01@=)m2|$x0a!``Rt8gxwiTim2~Q8MM5YmNrXoOW4^@K8N8`^
z1`Env(Bjbso=2;olh_8#e0iONbs0MW4D_Mo%0p=PfM!6~>PHM7kCV&*QZp1;S<WOx
z`X!Irr0>N%8GVZ|omq&$x6BHgX>N<&j1{Q(hV~r5#6VT!Tt4v7XDQa_`1y~%!z=sF
zaCF};j$gQ)iX|VhVEy%cWZPD%RGA$w{*)(Q-A`4^LO%52_tVvwr~AZCzW;-#(OOfk
zXj+$C&b_y6;MpI2pCjF4c5Lg-UM`hl`&j5|QZ4*bY~kAXGP<+lm>^c19;Du#KDB-C
z$G*QK<?Gc<g=EI}@h;0j)7*Rf3VGAvnT+P((vFu#67Bhlrz1Iu4)o?{EQe=lUCWH6
z1CE37#62zkH4{bS3f9M!gCGse>YT^%*Pr9$bt~e+uA;d5)>}F9n`gMtU*g>11C-6t
zD=}Ua7hT$JmHOKbSOvv-(txbxPl#&y0$}Ae_wZ+b@$a}}$rNuK>`iF;ed;>b@S*oC
z=38I>?>xD?%*Wq%JyEfN2R``#yPx<bfAi(9^4fv3RuyY*_z(|W-^Pz0e~GbD1*uH8
zG#<Q^qBM4aAAR>J3au@15eGE3w6gyfPm*t%#Z$j{hYcH7Qx#3|%=aFlx7bK^Azr5%
zzObKf{`2?Pz3UDB{%`({pTBd=j^ob3LAlh=W6!?BNB;27`CtC>i`=wfQTz>ARmI@R
z*ZJ}{e!_~YZ=}68&shIicD(%-Z|^z56}R5U%JynddFHQJP4_#`^5uW{C*C^R%hD@v
z=Y|E-eElE4!aIZQY`=arb5>u^HFL-K>OX#+qXQ+PumG^Hh~6`YsGhf(u3{f&`p3vs
zHSxg*?&t7}kMPxhd5TT9-^bkMI+|)q{PMMZRF_V%v#-d?d97X<m3{@lAZAK&4vKxm
zCjG!WLacMEbK*u2C`xujf|e`*{na+tNlt&T=L=ktFt`2HM6La8+c*eikSasWNv3_R
znShg-#xI=G;2ai>z7mYM;()QIM;G8K(5WecWuGvSTH4{*I2<5|f3qs0jDECO|B8Xb
zqhs*HJr~R|lPW58+KjPXoDT|XIN#CQ)-Itec6HO2&dC<M<m$31N{t-_o9>Npw@m*D
z`)c2}QsakdSzBps!L5=MXQeSpQ-U2cknj`BvX=TZZME0e#8&7R2p+sFpUJP#jpZlt
zIfY#Xw7N+8#ybqIzolvI>xM2w5wHF1JAD78Q*68EW@-ySpxRuafqUP#jlDnnE?@b<
z%iMI|`{=CCbM-wRrLBJlfA^&?^OK*y$wVa{v}N%XeD1+*?Eb~i;sOq_?JV_xv$2b1
zZFg^6)VqSh!w2!s%IoPedsoD+@)#a0Kgug|>~H>>PFXJzQ=$wB+9ZXR;5hUl<7INw
z+D<$T6P~|04QNm1Fmt%3shx%7o?WRGQQ^yA<a4@@oBP<S#I+d8(irb(I~`-=MYa)f
z6fA-%1w98}<3}&-On%$|G*!%9FqgG!SJTw8kT3lEKVyDPqBI@vSOP@$+=-DwI*r0T
zj9v1r`~&n}f=aF&eC=`mumAej{L>2;xc9cp$q~>%Q`gjq?my1SzG05<JxIfxITWgD
z>8zh*$NqDSjtq0LXDD&F0xMRppeC=Gnyy&EQwqnw7XJmUE5XeY6Jz6y42>~9F-fji
zl{^@@;<2f<MkhZMU#XOR^+*OP)4cZR*ZH6S>aThD`Gb7yQy-_TzJOws!v{~(cd?tX
z=^TYz#8tPykDE3wLsh1jri$9?2#sQ*|0H{koM&QkoYC<zbLK6eW5F^$`p`pcS=Jms
zj-|Pi!Sl!I9qi-akyDgK!4YnQl*jsb<&~HC<tuM6R#xN-%`BSV&ZhU=%R?W&lUhAZ
zzEGf4nq*>PoUxH{CZ@|2^M%B!%4Qhtp{Mf9p1q77o;$J%wJqf8nC$6S7Bc`&RuN>9
zPAVZe+BQj~I2NR2aVL_EACT#DQ*l?3T=D(VTeV7Pn>y64L<H*cYal-ndfO&KR`ZKA
zQU__k_H|@rasaG+`Eh>@0LgD$#Z8^w;;nr@xoXF_O}=vbP$q0Mjy6B{<f;i|Wn$Ey
z(woGnpfI$BrOX>iYg^c8QygetcShG9_Bi<@KE%zmu!T8Jkr6WiFP5H2+D=C4Z;mgk
zi-0k?jjRQ2=i5$3uedb@XF^4ztv=*|>kuyu9b0N=<$pUyMotVSE;i|-t-NL>$@z*=
zi(+K(3@1<ba`wbgqK0OQ`5fUZDm2hqJ;8xf=jb_kjA2zvLv0oF=Qgo#&q0PpN9exL
z!&D_+p`EvMIc*J9l%`6CNsDh<=s8_*`zwrLL844QS<Ik>NsPWzA_Nbe$4;Py>Jr_E
zf>;TgC%lIRlPNLfB+>DPjVI^VzUN8|_F~iM%4;9k+F)T|Uo*ZoD$xl&qX+Hon*x@k
z*s)c<=__QCf8`^pBp#d9zSwr=8>E5GJPeR4oJus3R>70adV-f9DwIp({Oak)`SA~b
zz|O&D)-7sGNV!4taUq*HP5@BmTZ=NVvktw=!<z;$qFj;c>T2>)j@p{4_-bi_>0$$|
zElmt|4>R0<20i;aR?KN-_T_6CIDLSri4jf>6xgzHF)tk%q_$Au?8O1lQ~dNF{|i&s
z{2?FTwwW*g_*JIL_(7fUQ&rnQOG`5v7#|sCe6mD&dW=f3iPok@&JT<Q`V^q3rx>3u
z(Av^WO@E1rscFj7B~-D7*^Tv#431Jt9w_7;yv!9@wrT}qJr@}0?PEeK;$v-o^i_18
z#S1&w|LS+xb+(TCYHG>FKj@0CRGfiSg)=9Pa%BHG9(njVD#d!r!{;c^X`pfIa=O}g
zvv$QIa<BOXmGp^7RoL;1Cz&cXv3?s&mwGvH;3y}4{seEG7^JScz}Y@#d>KITp7E5P
z_#|yz5=r0|2Xu~J=B|kC=w?Jpi|{tVX=ZV_;zkyN-YQw#AyXz<Xu0umkJ7RHo8%yb
z4wHJ)NNxcJH$6Gy?!L{eY_>p=z-B%N)gMw;sb`_L4Be#Zy8Gc41i?Gg)`Cp}aJD#*
zg_!jK4^onx<PwL@7|ENVCJyIT-(<z=2piCMe2OupZk6OQZm1=A2%e1TrOlPOFB6+A
z<mI=OrJTY>-ZT2-*JO3zcS$QPdDc!IFOiyY<u_+7o*U;5c#(QEwzPu_U)F|Or(EZ+
zp}!k<b=lDs`c5{3C9bWT7C0iF#@*_37-uGIk`MJw^I5jEi-y*fO!Zx0vQ&mjnaL?l
zOGhiUCq|em4RL<7iuKDEGda{rLwT6N(Fu+o?`G@e>-hDVF^V-+^qf75ieTq+5A(wV
zwS4SD@8{%~zRtPfQictDb%YZDR{%SvjV^qdq;BNSfNvLzTShn56_RnR?SxO5xUg7+
zpl$HZZR=aP%y^cX5Y5DfhigjEF|VLV5qP7#&jCF<jA`Yji_Twz70OiMsw`XfzU0p<
ze*Gk;{Lx$ym?+Rr3Q=x_QL-W8$D0oZ{+ugRGrMUP#bOoBvzr*|8|IyN4s+Ye<+L4%
znA?=&?ZNSsUUJPXE&tgLV5(0GL<&>nY3)gcIQ2qkoZ6Oo+;rWwtX+Q@^Q-&$&i9|7
zdvpp_8eyV-5!>&25BlOE4jw&0AMM<`{dN{N4D;=WAES48j3dV`aN|Aqam}W6<c803
z=yWgbi&ipv>~&svWha~Ox|{BO@6b0^vSXVnxvp&yH{WnAmu=X@)mL9mx%(K$E(}sG
zji8%Wa__CzGI9O@6Ez)-pFBXZt%C!HPBN#y%#OV$s4zwUR6X}>yMq>ek^RTJDNT$~
z)v=O0Zn=iS@Oh4%>rad)3?CKPbkjX-yX6M1x@;jYKmG{s9Jxr_!sUz}+rx>WGRqft
zaA4P49P2J~^X+%As-v27J^h?Jwx81j(=1-t#esKrFkFhkaq!}4qP8{MdCT>zS-yb&
z69+hS^ccmi4cvLt73i@cP956Gfzxp%<_ZK&P177Y3M0I8@FY4{Wd5RsoZ0moUO(8)
zwYS{PwO4MWt$LdMN6ykUx0&O|FVaw);LJ#lO3yKlcMn-db7uwoz;Z>IDcG=1ZJgJf
z>vBOalY#tbiU>D`dG9{^r^h_~iwH5}c&ikW$6zeGLT2=5XZ^0A2?5KWZHaLtD5(&%
zWDvW|BY|8gmv5!qQrDR)pDA5QUAyEi)vi6A830OvNdl|@8*|V2zrifCMW7`uA&0q+
za(<e!tpfQCFl{WQjeNdNP^9!CZEwD9ZEUHM&(<fcUy5xn>+e=`o-M)^Z{w4OA795A
zy0v{=K`(i*GTH>AeYtSD@Zu|RQkig+u!~A?Jd>Be-ZF-zZe)Auuja0=6JaN`C!-b#
zPs(8)NbIkgI$w&DtJ95)Rr87pbu5}chxYla*|@ld$AA1NCws?Gl`=yUiW~3T#-jQ%
zdk-As<oQu<xZ`fFTGh!*kN=Qe$9uVO@&t1>+`z5ZZ=tiH!h!uq$v4fVHR|KV7hfhn
ze*^OigB(8H>+$0Gh46dh4Jo?{>~Ytf$2tEDY$E)@7GbG}%^Lc`ca@{*V&2qvu5XCx
zV!Gix+IvigFMs;@hl?g}!iV!K9ZVA3WH;&O6~nU7)9%X{64X>Ms+HNF{MqpnF6HFR
z<Hc$rJUS$HBNAm=*2K%nEw$?6-cW2^d<CC*=ssrGH?nL|2mAND!{Gx*S+ezRZo76P
zXI_7dr+#ykif1c@TDoF|mWN-xNqjkQ001BWNkl<ZQ$Sw!@-EuH{w&`CuD_Vz3ch%7
zcikGvh$Rh|@$x|+bh?;$*wa<V>3f*2+BcY+)?C_no?bj3aQu0EXV6O;c)%Jvln+oM
z>eBKBe3({#VEH_ex^I+W<aUWP^h`g&8mP)Xui?!c#Wg=!r(gbcL0w~?=OqtQUut~2
zqLa`H+s!Da_5)CI17wbu{gTI(XW%n6`po0abf7o8pM`e+p2Wr(#-EAT--cfmSnu8V
zmEd-1T3NDWtTqOHLx0Nl=}Uhyt0=Cpc{lt2Hf^jBUNMy2{oUz>m^@SaH0eZJspjuC
zX2$)01>Y%$%xvNzEEW)?tpa>A<Hy@zJ{HL>el<h?6oz|a&&=ngV>977C{RL_nbCG~
zMSLde;Yy$>IB?i53%D$sNxu)}36E3ANnpUNeY}S&$lPk08&(Hu$<j-@?(lXJ@N=_=
z9igtZ@*jZ}^!Ia=#2{HK`8>#BLOw|3wFOwLw3HsPEtC7!M+N>1Q6ilrM7NCXNNCG|
z{9n2Iwtz~`f|v)|B|ie@W=bThe9k!5Kyh8;&0rvCW9}QXE9boOx*_JxloOAzXaD5?
zhL$jo{A7y1aF!L<VoK=B*j27B$g>i0gRB{*&x-&G?LM2?G@^|Xa@zA2i0vEex7yK{
zGD_R1kiH3A8E8&evv7sbl`WLCYhz`#c1pjpifU0mTm8%A?q+hPv7i1^!Jw4g!jc7?
z7HzGZqN}%E9`GprMCw0NAVWbUfRINm<nZ6w3vsb({Ly>~BV(jgqSHSAa(-=S$Mn;>
zv-ZrQpWRgO>hxh&QM6CU>o4Olygnn}Sud^aZ7)u>)2@B^rjW+~dFE748zqUcby|kh
zXF{E7A&I%?&X#!^&}lyi{nIr~7cSv>Cut85qG)ZCt3Mw!dJN-VGC`&t!uEcbY`#%p
z{3d$Ih-X(s`;~_*DC7#Fi9eGsV!8r{g^y(Bo)9Z9RiEJsAIFz`FL<_jFpxu*HZtBs
zp+6G~UV)I|k7|a$sL(b7KfDPzDZ9Hx3gz{;er4N9DIK=PY&wiE^kP_)l8XC*KyDo}
zD7G~|Zwc?Fu#|bx`zz}a0{OxFVwMtibh*=EeheK_4@wLW=5}TM)U<UAQOOv>!6H8$
z1x$*QlYSCN=&3}hC|jh(uL_lNnR2;ozZ9T>a=Ajq0ESg$_Uf1oJrmfKioL3{qC*48
zbJ8jjGz9%hrDFSr4u{1AO{G%tNSpS?Fj>K)(M~pVMc{{gu2a^_1h#VsaTX#glV*Uu
zEKX$L*<c8xHc+FRX*wV@XOa9F7tRS(2uc~kDFd2L9yfC`#hSB=1VC@wQpj!tCDD~E
z@VvrSyB=1@hM3Np0SP`-x~R+gYiDf)MU@tEXki~g+vp|Mfeh48h03OX6@rl#vPwX)
zWBI-ux|2CSWs-xP4CtT;v}5^3_KG0sYsydl1R>}8yZ;gTO6gIy3;KsXqO}WY>!UJT
znQI}GXk$-j8^@CaZWQqriby7AqUaS@8ew<$zw}=Ziew^HVGR4e?Z)u^IBVrQxscd4
zz!gNS+k39PIXEuIvg0UEg{h*!^H-cc?L-xqthJ{T<7eSEolBO9yE{dTF`O=voKPuS
z{5OS!%e&h8gUn)1CzT|RS<M7^=Cfev_@#$5L+B$q<ws60VsfnT=sJu`+dVx=3|6y^
ziv<{GeBZrKoKxCg$-ZKuYiK$5#p11q0ZZG{2{yjNMHDa933g1L@a58eUrXOCjwC@W
z7HA}G-=^t;)LX1j0iVW>O6VeN;}0gVRpXyqjYl=UTP};5B%qyZZZ&7(oVM%?M#oNr
z!aWsU$HiS(p{pb+6pD^@H}e;t`xu+Ax{@^unmBUg7&I;B6QB4Lw{4oo?wxy?tR$;R
z#soIdHP$R<Ock-^o{!TxeuQ&FCBb8&Xj2L3G7GM{hufCq*nhGoQDjW3rezf$+kQFw
z_8+67g;A9mEv8e>HSy60?xXj_Ax0+SCwu2=X0!F`_4J%QAOB#yv4?Vww0!fOY?)W#
z@R@#;QY_kdJJ&CZICQ!nP~vBP(#!0|Al}tXM=%;n7X;z#A>0OtuRFWi@e)OwA|d{(
zL;_UylLyAij3v%{NcL@ni9ok9x_|9jp9=pbexUsJp2?N?;OWHyzH=+1C$qKcpG|Rm
z`?5+o+&x3v-)7AMH>o2#^AohK?o<qSp&kTW*%_H}39bD0CI%hQhARpgd|G`7zt-+v
z%%Iq2FxVhy_<tQBOPb320d}kpCWFQHR?6coc2zf~{HyWhkt@zNON)ZQV2ycc;kPN?
zF|%P~8-FF<@)7za&c3nup-hO+E*?@k3}V~rz?fskm~nr#Fo43vhPTRO=!r2L4^AE<
zzrlyl_HGQkb|Rw$eky#`T$5wkcp#-C|BE9+-sDnL-@*ItyNT{2M;R-}Hjb+5x$XW3
z`N;O`891_+3nL}ZXB=O`UvW0<TyrD0u5IMV$qNFnM2scM3~6}<taK7&-ZB0wb*7wu
zin#OqLG-o$WONb4zsps=UnL)e&yXpLaTM22r?AZEO8N(amc&CdPOx<3VnbXuqR9mW
zIcdWiPTL0ZzWI4j&xaB5XvR|Fp5#pME@MUR|CquJ&!>!L!~m8a@w0cm!DZ8U(lGzB
z>-oJ8-Or8JUP*rBEGN5%93RP85ib0kF|_~y<rL}0Fn}Ti)=p`Sb5_t^KE)4z^eDy4
z-p9S0x|r%e%@3Y<nMEsCP#rnt<chU)&09c6OJnS=8Z<eY+d5e=zl-|n_;D8t*Idc`
zrXng=O-EaE@;DGT8;R*vXqvx<jZ4}+WvE<{w$?g&dj?GLOKsz9=FFK(drJ#*y5>+@
z%%QcSwWF2M{ys`c<y$D$vuMo~eDJ+@vtsc=S{iC%Lt*knJoShg+PQMwGU^*<)7)6Y
zqV=2Us*B=Brp%ek{5c)=*>fsaq-j<oP0j7hpF4-zVnnf~p62Ej=FXc(b7PJDMU3kD
zCg#td$DG!t__=e!#avKJhYReY%xp5bZ?1Ws>Q=>EBW>l(TH5z~A-HFH$OHOj7G!5)
zTKF9PORh?qzjDSD^5m{dKryRzp31D0+S2#O4QJ!Fkjao&^6n)sIlHs8L(p<}?W{|H
zRXEFcahjIbrzmN<&jvglO_4C<vkFXqwEXBv8_tjopt+S!WqnL1SmF3cu6y7w?39eH
zwT}4`!00EjxUXILll>B3^Fce%l?;Fk8O?Qzg}&-DU!Fa*0C>-LolkqoX9kLv4@nmP
zgYR#~y2{2c=`IO*rx)d|0)%uEeznoJ7O^5R0%H`<uAy{tHCK7pL~=55mXa;O0~Pd*
zoefoN;IfVDncLY#S9=Tj2r5%!yz>0h3>G`+npI`{s9b@T_BqV!YNwbptFMZ>#unzy
zpGWJgIux3Qj-{+$HV365X1BCbQ_R^FCv9Xjx=DOpVu6Q?S^lI$zcD)ZwC2o~vY(4b
zx8f|f6kT^&z?oIHRN<mRUl4I`TblT_ge3*XaZ>cTJf)m%1qF@4i{KR$IocMEa$MP1
z;B}%8Ii4%NlBP+XGqR>}*HetA2e-VTGunzgmW*#Lo7hKXh@gYN)kVdQSDxp&*N^e3
zKm8)@xw4HZ!$;l_afP{D<*_yp%%zRxF|Vx)lgkyT%1v{AqJ%C_(ceG7blE+xP3If<
z=;yw`g4`fgE$zJgy}##YJI`_XZ6D!7*UaP0#X&9{c$FW&urnSaSHqnjdx!-?uk($^
zUZpI4sNQm=rAlLIbHxTOzxrnGUOC3kfAJ=za>R`v`aBzpJ<MOXl${5AsT_QYuRgJd
zWoxhGzI(6W;G3_|J338OeH&L^xsk3p^SS1_YpMFxi~MG2%-1w<;oNy@Hnwut1AoSr
z`nPyX7di9hAoJE<%k5V#Lg(wL89B(;zVj5*t;_gN|L!v!-@B8EQi)fe`w690@8>VJ
zFXzQy?_+M)EFSs#*El}e%xC`i4~PcNQy%Z*;YWT-&q&ED9E|F%t-NGs6r)n+lZRfz
z)j>J4i{B|Pn8TNx<y&X4U?N;Z7DFnVmKP6Rb3eJtnWG0xiOs`poGji|z=KyHB>elU
zH?EY*<lg93fVU|i%&g3tMM@wuD^&*HEiR{Jm_c(EB=L?xsGx-+Rs6}kV!q9Z0nK0o
zC-?@Tw-RiUh+)Gr;cVmJPuNPiB8`xia{awR&N{cWiO++ylRuMB7c5e{ZspkdzFYwb
zSHcZ%?z32-F!t7>z{=2{G(?zaJ&Y58Yzmpo6_)U&$>7@=$y?nCc#sP6*7Y*_`hR@8
zS|P0NjXi?GA{aDP;HmU=M_SX^zJh!2e=n+~g9Fb$%p))EW@LDf(TS29Ugg<z+ef&6
z^IV3es~9`-3%>e;7ie636`%e1T?}9BW^DKrKYaMHgs+G-Teoq?hGxF|FW+Nqa@yg6
ztsR`rjkQyD3?MQ|TApg`3Qvd=Hb;a1iybZoO-xA~GVf=4rQ#%Ln}v6&&&rPrkU7Ri
zmK-USn=V;qg#IXG^!bVvYkpx$Vn#^Re%V6clKV9pIZPTa<VvQUn1=cZi(^YO8P)RW
zWW@GUs0a?f^-B&_DimgqFcIBH!Lw`JF$%Y!;zS|=PrF0%nD;sbmiFJI?VRNsxqI8)
zENh(P+`wcalrKuty!C9EJI0s(=5P7hPj_?owyorAnz(=aHGKac{)WH*%2#>p<vqz0
zj3aLO_@8le^J%{I#H&n9Cuihb&d7MhK~oy<;n^piXGG;}!(6Vy+b=%Fg@Iw7e(??F
zELuP@D)H*mPjhl;GWjh%80|gAqmMq#p+kH4-nYNYuMeE@^kIxSe&IZoh7J}qmKd*{
z$D-L0rw7Iv=srX5&;;djj!j!{q@y7RDDrBYXMglvzV^*;v-i{>`C@?!2jAp--~J9e
zd+WLG@&!cIvsutq&Hf#)@uMf6qi?j7*xJk-tSU`4lhU)Cv{=n+2m{rS@f^|E*i;oV
zhU_dt1g;i5CF#XYKSCbT(-1Sh17)*fm`*0<k7+Mg4Ll%Gj;;rliCn3q;jkrsV6EsP
zTtD+=leJE`Na@($evnK$@Y0#%7(U#LUlb~K^&-X~h4Ow+)2vp>mCBeK0U8oNe2y(0
zyA39I30EF%q7pyeYJpGtK4EpFjl41mB3#a#f0EmaNqbS?NuHHb>4V2MIAQ>l1-02e
z^Jc|zB||Ds8<w@7ge*PBufa+A`?uaom+%MA6~ja<SrGces5;PrlR@}+g6(b<P<!|7
zzh+!dXr~jm4jSJ#zu9bcZ3-;Mm?NO1&V>$K?cvL8vz)YZ+U9a)vozApW8eEaUwQbK
zY`X3m>Z+npE99%1xaIn_JpQe(@V8(2KJ%`)kvX%9Tzl6x&c6I3{_Y?Dk#BzgXAG2L
zM_;mKJD<DvG9G>8hnyOiu;bgf^DxJ}S9aAzj&0Sf;zB3ff>aVWap7sHCIy?r2_@{P
zY>eo@Poym*t}4N#l2~Mzl=(&*?{?!kEH|+nCS@nHyYFJxHkS#_%QbE?piPdk{)y*%
zT04ooRcXVJ$yeI>Uuyf>ORO;+7thXC&UX!F$FZQy1YYJEp=Xuw>2O2)I?>wh#A&S|
zU&9^me}KbJ|1)PwIge)``PnV`+j_|lB74crIEXPqq9Rv~I*S>`4!`j%fA^*T&fh(D
zfP3%U;{9TSPQ0N|U5}m^WMpcZk-k0(b@k+P1?uu8diuvHPnVdOGP9=~L+1`tn7f9~
z`YLB*;qR7XZ4LzbMwJGK3gziCrKxGk<!Pc^-YbBtV|ZE1yp?Vq8^1hyfr;wnG^!&U
z>#k*fBlM0|_~3&dNA(=wnHOHBe_WHxE3{IK^q;49crrO~t3g*78yRJ~TxNW{L`_`{
zlRf+R`~UGM%`2|vi+}#dtZJ*z5L#|&BzLvjDvMjEw<DbZ=DdwgoJ=SBljheb?`HsP
zS8vrRsUT!WVH4wYVoSkr7XX&ApCxDg&iSET>5>`W1PHDWGVN`U3EkP16~n)mw9Ty6
zBx2J;ZrM3Lqz;s5XLy!HZ#=G}VV~uyjXr!mWA9;CacqH|l1tc08{7GXhSQ5krm~++
z3dR<K0VBWhIhnLA9SQVKqe%yAV?#@>;<%L-91dyIFDbl?&nQ!fYS9koG(jQ=F1Yap
zT~qmai32H*@DZaQGn@+KHA%zV5cbgWx>J<UhiPx@;l?%F?$Z7yiOc`})d!VyOjc}y
z1nYS8<H5rLgQdn#O`#w#Q*KvjCHd2Wj@foJ;|jA8Y&9n_t^)nLxb};2P)tpZFg!BJ
z*zh2#SWV7b5lIqgRHP<9P5;OklOsb+lBcSWr=g*W-rfPG%hOCwOj0q?I?>Ddi5gZc
z?DTVqEsA8UC+I6uhyMyLXl=jE?ao!<VX?zA7sHNL{J#QR;ef9*i8b0U+${gvjbU`@
z<#rLqwuOy*Q*1h&I9kpReSbE7isv6eh7vk4y8uSMco5^~2Itbnn}{VTgd9@fv%xi^
zCvO8-;Jz!;1uiA>yUD2^-v%g@xTdU7CKrghN)_0;{o`z`I>_ID{}m5f4E@N?Xbt1C
zIx>n82dJ@1vZ=~Bjy0xYcE>_CY}m+_4a?~7>5Es>#s?WGv~u~T%V?if!@#*i3^%Oi
z_N`ZO``y=ZZ09~E#|C-(=s54Y|5i3`yn@Z^783zVmHExn-{l)WKhN(!xSfWooYjq#
z3lyyjmUG1wo4E3djdV4~g->a+kIAaJT(N!)&Gl6%6~zt_MW7T>WRy5Y&uD>-8#gkq
ztq}#3CdR1Lw6JdVGFoTVI#*O?30qN~9%Q)CM&<lb_FtHwDL=~C#5DPQjwmXyc-aa%
zTV_$r=TIoS(&s=R&+4nL<=U;+vUznY2M?Yh-`Gx9Lxo*$zsaC#psA*awX`YPjT4B!
zs9;4-1#PU4n1IvR=t%B6_RQ*$67Nk@p@(EZJ7jWSifKYYuILE=b&jrlKE>qQ%4K<x
zNU3lEWaUtvEZXQ#=t9zxTU#NYhIhZO>#rPhfv%)p+)AOdmyl8NlZX}blH3=CmZdBD
zI;IjIF+?n2Sk+*PCg0A!D`4ajxZw2V>(F?By`KQI{Fb|~Q02+(=|tGs_i>?*q>()*
z6Ywbul#~rpHnP$M{F}a6eSBSn`c+adf{v7PMp}|iXdf>YP15lF(5D;Pl(ami5K`It
zLD0eSHG_{Qc9pXFD^-5ndp=@)O4!AI*4}thPwu6e)wY!DZn%b<-*YYf#}6|$Rfckj
zf#DLXF5krBxozlDKL^g1x&4-#xbD_ls2Mp!_ux1?-#WmS+wSDbjT^c4noTt1Bce!g
zV&^aU%C{b8^F1GA-P}eemrTUM=AmrTb`nQiBRpM8MJcCP&jvz4PG_Dir45a~wAHB}
zYC@kPpDThQ%Y~Km7sHPB>jcZ5hwGxR=Re6mSrb@-dot-`CJiPQ#1_6CH}M_QMHEyj
zxaa9j^fhBToDf)K@hGDz3~XuTka}{E?#4)F_G@qbD4)J#39r0)jFs!xQD4ByXLRTP
zVuK=?sfn=?s%F+<jcv#D9Zvx&%F)u%MMp;)W2fHak*8i`tW+T?k8-kSnhhJ*F?I1c
zC(oVb(5XSzZdixDu$xDodWG?+DGu-5PxI2XES=lJ#ZyN(*E>uU<>)zekX<`>Q9W-d
zLnn_gI5{m8oxGwvZF9Ql?CfM-R|g{(PI9hqgmP(wbAyVF>sK;*;V9k1CHl@DXK=E_
z@na{M9v|T7scy6?bMkZ_tJZCxW~!TGXZx9+7@>D8$NCMc(E}Ga-7{qOvyIFe&E(`1
zM|Zu>!Lz*#cAsMZp%WZF+0F7bYlsGp@y5R6oH}uWp^0f!d4eOyPEks#>GtItSX4XB
zWK_+Gw|~w{JC33AHLO~<fmMs=a`4q>d3EmzrgdT?FBpU~D8~_!L;F$Ac|iw1`fh^D
z#ECpW?7fm1gEzbm_;Do!^2CJB2=-7G|8xc3bqVH@R-Eu>>gfZK4TQL(5K0LhCi+Pd
zr5Ve_fY~lnWREK?KtRK4B#^^^p)Fhtb}1Q)LE<vgVB~;qg+kkDvXEEE=<7Y(0x8=j
zJh|1E3_r25nYV#{<Z4ugKS+1>cu6$w`LSilfvfvUc;Lt3XM8_$pejB~7s2=6(4UPs
zS$^(?A;NSi1*+6ruZa^kWhdKB@ShdOR_&oJGvtwc7=35yz}eId==?V=n+E%96Pdo`
zF|ICYJL99%<Aan6%{0-?V^6+7-&hGnne*o^(zSdYt%V7W9y`I&Bgbi3yox!s6FmNt
zM>*X;PS2^sj1<~fyJ8X3!xuSv@;qggV`}Ikdv@>Q;$$_AYK$`%2kkCR7Uub{neb?a
zm_pkJn48NXm9)OGSRp=Uu#jP2b4M0RO7Q2!hQp?qPttUxv2EdEuE~N+iCDq;NOHl&
zzjQn$aK*`Pbs)x+eZ7Ybo68b=tS+-{V<(F8x)>vshqQm+FDS{D4egDqb;P`7%P5wo
zXlk3ooVF&8@7u-bbTGMEx?+VEXy5~?3~rQI9+yT(Fp(HpYs$&9)=&|-NLiy(iSMZ>
zN0f|Lu2euNa*_M8Y^4%^3nPkhs7Rs9Wr!jo1?6%%>F;}Myh2-XV?*37ib20psZgnC
zq9_McKt<?Eg(y-~G;VbaO}Sh_sdyHq6t=I5BBCe~KbGqNTUTP5N@*$;h@uFnSX!Wy
zYb1Oa#w)D8<3Vn1Ji*_7_XR5X98uEVKHDgYh?Fy)+zLz=VIknpkR%O)nxas<!<K&D
z5^YSsWaUfQZPM5Y!o27Hx-CDsrDThXjH1l9tsneKUK3B<4m(;sNj-Qvi)n^>%aY%K
zjHBg7b(5SF>msxQTS2D2JsU|13BL~X3>}#OUGbF}(laD#e@x?BKF)bEWw=x*XX<+L
z$!aMfV%=#5-+8xdcAPMwh4-xw8b9}2q>^V!M{)abtJ3#bCrE0-Jp;Y$d$OOy7(eOD
zvC=+lKG;I>612lun^C@i^_loK+jD9LU0mw*Zhj_tOyP_+&-sk4or@yd4jp50oRboK
zEtk<KqFjux2FEK}C-@(Mu22D@NX5TzjY_U-Pd-_@*drYoQJw9>{xtAX`pS%FLB|==
zAAY6@ZaPk~V{Aqr`~T@R6MlH(1&WywHzgj6oRih(-Eihoy#BWQ$M*yMXAa^ocIZll
zvYC7Xxm-Tw7i!6hl^T2p$W)jXC$D7kO8&i+bJ!$r9sOTn{@=xGoB)YDGq28McbAFK
zfzhQL#!p(NaPWc5a6~{yYfZ7Xf!b)A!I5#1*k%UynRy3U^<UBv-vwBI*Fbg2{@qrk
zX6EC!4x#{12-^QPK^XQ)x5+4o#NRBx&jeGlb?sJ<W(fLA6iQh<ruo0LF8@^_^*g@o
zYGtr|GBZ!X3Sjcjj0r8F^sh}Q>9b=Y?>;xb8<M0_b83uu9J~S`o4<EU?7ur!_~Xx<
zV6x-+w~z6#RG!h3E~SWfdCe%l3$dB8?zeF6EWf<8Re!fw{cT>C#?VVm%3L1)ZN~R?
z<>yd28P6j1uPKnUz3E~(#8+1!W#g!yuT|QgGr7wknAABbRfaR#8A!^!os~1PbmR94
z_ZhNnE3ZY~c{Xv-8StLvWR&Ux8?UWotXNe;ZEY>JHOb$qB1+|`Di(r6+@_BXD1;Q#
z18JSrO2^8G%&bcGk0=)M0sWAOPKI)Yt5P{?YHO*kE)s!VMUh=i;R2A$7pN-a<Gxy1
zmz0OWm6_!!XN=PLFoR=L6pIBbhhsw?49@1+yfqfg7mDN~cSt$P<;mwF!M_fC*H~Fw
zep98VH<o``+33O3pVX=BYveQiL|bM53a`_#cJsw5YHF$}<Z~z$Q7BfCPgXOF#Uh1V
z<miSD0>12C8y&PWZB&Y$Jp%*c8=x9<{9S(0Lh4A_NrmG@p^}GZ2VV>@_mkm6zNOw%
z&-e1~xqeyxqjX@y44XRnGc;w|YtsG97)rSb4MHD*E^L^}U#%d~v+{M|(YHRIK0H6u
zEBumz4r9vP3+f3GlD=I$+Wud;g=0Al?PRi3FjffON|EN&`j?=cA&ceLychSC@?+4j
z>TKm>7-*q~bR1{Jccu;yy0^(O)WfAeOBuX&2@m#=d)7m_GF~oB0zZ>jfRAS?kXPcT
zyljgfEpFI90X`ySbvm~;NMUb~m@|a(eKD&|7@L|f;aeJ6z6fG_)#0T1Z1`8=zG)Nt
zpj$Z$;L;VLBG(%Hk-H)GJZ69+W`dBy5wk-m{N8^11EFuZbL1<|wzG0N;C4DN{$OxQ
zd^Yc+T%MZhTB@t6h)g>bQC(9*O?8zm_}r>uOg}ffx#d6G!+pWFocz)%N*s{1rGi^D
zcGn{Ae(+EDz+JcTiBEo*^{dyhrmKeCy*2#F1K0A#>wDsJV9F&g12SgZ)HRy~93UCs
z(b@$J3Us;5`kU@$Y4so{yN9j$YueWHxsP1U+dB?W)<$`F;}|1IN6*){@xBNCfKR^f
z1`fXdD*Y2l@#9TK%sHtX+kgK*@q5>`^6Kk*nbKI!J!h+wS$F+5me!4N^1`6?*t(AO
zeDeKU*t`1x(-i^UGL<9_VhS6hGh+dbMpq(k{=g?#H*1J}CwkH3*}DB9u4*4*&+%Tb
zuiKl$0=yS`+DPeUNN$Bf*+eJ(+XA%K?%TbB?{w*Jf<?9!aw$dEs%!YnCqKpwTd!ed
zS2cT&4e>wyr~j4Jg>H78o#GGw!~f2@nm%?MI87x%1@kp+KM{rp<;|=o9@B31IyM|n
zh*^@czmojAwsKsj;&hUd!D#F(&pTM~v+);`G27VTHnfq==-H7<Z(nOMp0ta^0rT0)
zW(o_9I}>DhGK`Ah001BWNkl<ZGKdgtNxJwD#{sk($Id{lP7_}dvZ`bdX*&XKN}M2Q
zXB#V_XY-%MHK#`<+IjRem|4E%A0?jI7U;?Hp&kFi&+T~D-Z7)5_!R_^M;cP}ELg|y
zr45u)LT(qZ_8b}5Gegb@QeH4;XU$_`+_(0{`ivFR9G?-%IMk;bRK%3>lBAQ(VXc2Q
zOXMy5d}U?v>}R9Njo}q~;$Fst#(tJ|H6UPP!uYF7VWNjCQdc<LNGy^*Z_cVaK1s=k
zQE3RVBIdpRDZUaOl=7Y<YH-hyGyJ*2D0NIg7L~@<mMvv@vPG20FJ8)U-eGfs)0M!J
zKrZ9zHdm>D&lzx2j)RO%t$g~n2=VnIec2n+Vx<x}Lwj}1Ww(5U-~aI4+;HRdw5mS#
z9zM^i8{f}oAGntrZ@iKDHACz?cs7Nh`S2W#8OT~do`n+n3DS=9eJcFyJ7412sEdF6
z+86kTFa0-qhR2z1zmhp!bGdBO)#TJLZ|~gCc&S2t%Uss2S^{Ie?ApDb(Ncw)Ssh%q
zb~(}1AiH<%Wq7hoZR-LS&CXGr)k$r6fH!v?proo<y>=a~^$|z*?dD9+I7`-V<nC?v
zP<5;hX6<3eo4c5Z8d$exF2|3arsA?Pv)bpgX8A(Y<N&*O?PF*<$I=z6nA1|vNY7dJ
z9X`R-NH^d9mv6J|KYottTmbZ*lWKCcoorq)o95cf>1ceEWBrp56<M@m6${#D(RcPJ
z`ww4W-r5b^wQU<Ur)Cq?@8q2wyBSk;tX?^XGbhe69oLt%&0j%%={(2I57W>(pScZF
z95{ZFS)Gemy=)#+1LxU$;3(sziUsQUpJ_%b1=e)VVe#@Jh0*|X=Fg>aZj4zS3t7E%
zKGF0TM-J?#duW_OLo3VYHd84!(p){w?p?bnv@W8nsgAn(T6#_$;>ejEv?{P*=}H!M
zHZjtDnmvb3P%=j!g@5O3XR~_wd<umca#I8B*>{++QklBhb6C4-3DM*Ld-on-XkwbC
z&V_U~7O9@qLUE#(clI4-svML$Q4M##cRMFv`6f^9I8Q@;ktuT2H_l@I`ZX*$UCZM6
zEnL`J<NL4GhYPY)(Ar`GFL5X7A<2ot^F>~DsaYl>yq7HG1XICFBwPd9hQ7hNB)Vi`
z^NWlCpp_5$VR2`8@(MXQzLMJ-SV?6eDf=ZAFcy^zuL3+|ZVACOr9Z*HOyZdF1UMQH
zlsa)MJ(iu|DgVB$+$>(D?%N5Naz@E2Dm*}!gKO=pM%dI#LQ+AgS-)UjD8<^%6hz4Y
z9w|=|J{gls%>>5xMPFwjQCp0d0!noE6Q;Xo_2vv}8Oho@w#9L1KO3haIjIzjpcS2q
zmXRMl$LXFC09v}1(5U<AD>txe>3pJcf`x<Q6zf}9KEH)Zp@Ej_3cGjhBG<BrxwB&|
z>^*&$!>7Bcs6<~PPHaLpJWF{)>_G+cB>v~AS|UxBzNC)^{^hG3a9{k8`tk}p-~1k5
z#^B+$YWg^W8_|zvH>jOB;Bu%+;v}4uI^KgqG5BnX&eZWIgLz8SF=8U>P3CP~7LMn}
zk^1yr(%6p0pkQ{G>(NO59DD0|zVyapsP>Kg*DpT6&tH0rz7sq7r+@f4leG)@>;L2P
zJpb&cI5VC1_>0ieSwi0YnQg4BjArIS0wPlr6O4_IQ>jcdK0eO)#1v4WbMabMEb8Lk
zPyHKiUfF@FY3H+F_ySif>0teh_w##qT#m}u@xUK_k*zD|vf<_r^O^TvO-@1U>YMoA
z{{0`Zw4;Icjt&Yr#WnYTnvdOaC2jN8@`s;!fYzG$L3kCN{LsB+T~ny;<es~)qmYZW
zS!i6qzxm?lS+}5_`K#8@)liKtHnMKb3OZWbxbuNebIZDUm}OCKR;GNTn!hc}H&VFx
zCa<1QT)wysv}V~g_wu=q+)4ZF7FMrYNo`&wB-3;YwO!nK$5v|c@#on~uj1qP-9S+(
zHsAdKH>_-@ID0Xl|2Lmy?Sf9O+x{`OZCPQrWvv+$oIlr1U27K~c<}Rl;(a&JT3_Mp
z#R=vtT*1=$oh-iWdOrK$cItA_(6xrY`1gO#Rm(e>-P}ffb&fT+e2gzXcqg^BEqwT)
zhgi{CL*x8SeD+fxU~X$OOP4H)e@4uLzkAiRFXPYt;!j!GUdQ^I-_O0*t{`9A#)F^x
zBd%G!fc4ki!$<GBn!GNvbo1T(<)8dMi`$xLZEK~NPkx%r3)l+%LzArCw1qWGyO<an
zVp_x0z&VbM&f=b1H*sR$ex@p<UZJ0QNf$doOn}d~vB}wX))v~_fVO-e%--couKI?w
zeDFrZcQD%&F{x$$5L0I0!K^ab)d!zJdo2xLX^MUu6(j)V_fgqAS-GVygEqDZ5ma6A
zVv>bV-CGGFDe!DzXSS*xMz~3kWK5^->1<V`#`kC_SseNR>V1~3X>Bgb0D3@$zvvl!
z$Mo0YhP1Iwl0L&JW|i=HdsjNB%>RD=k<WHc$onzlUd9H2y)g0Pv!`SOBaThBu7N2=
zLlbIUVeyr>vhCWn)YQ~dTT{!`cYct~OIv7Pusp`Xb=UE^2k)aU2lZX6`SXAG7i?b9
zMN@Mt^)-1`-~182_|RR{);9B@2Onf*dmSckhWSwYtU8>LiqPq|cB|FuPhyEqn+xBB
zGsnMUP^lSwCyQ}0xk~XV=qt|wk_%E9-WWmOkXc!jDS>lh)!2N_m@QlWTr7(C6$2Ss
zW`Z2I)V7fHcnJX7PO;@P5Y6qtXEQ=9TkfRoP+vDq3)XPU^;dHK@E%6W5f@Gz;!Jlx
z6D3V)w4d?mm?>F|N504Te8<460y)f{eB99anRC|cG^Xe|v5TMl=zIL|mnT`eU^cA_
zuV6#R6mRc6%KrVw*tGR(>Ko^A#oTee^38|&`qzHQs;yh&#IDPn+VdKZKKTqke(D7V
z$7;Ffj*aZuxrd|scT?zG%i=k;9NqN>`;MLC;LbOA<=5{pI<2Wpjj-eO9SlxRTlqTI
zUQN@5U-OOcKElKQ{4o1_Mkr85S2eO=(L!doE@a)(wv6D>LRJ%88dQ!Ao37yOkv+V#
z`ve;|uO|;&f9qCW{^7Uz!B2k7_kR35L(?UW?Rk@ZN6&Cz*PFcZo1F|#Rj5o3vFpuu
z=o>HDagOhJjokcobhfs0<%&99*>RKwYp!8lv5&onj<N4Z4_98bo<ig&2&ELm=Py#-
zxsbxxNk)sE%&DH{!pH=uh?dTIbhWjzY}HyCsuh|Fy{Gr`*iWC}v8SHlbniGY!5hDL
zf#;rin!`i2tXbYcrBY_P45~cA-hBrco2pn}@Cp>Ixp-m^&pz{We)hsUtiN(S9Shd8
zaej`Sdk=Bo;7L|)yqx-~98l)a+b{Fz<4^F!^S@$vqU^~6peqwR^{p@S%i|?(z3=1v
z<-h$DoppH>Q|#S)mIWQK_jqqm`1#K070ucuarbm>?D;Ys_LJ8Vx{{ttKxuUL`b+;2
zeYIH%v2hfpIKwM$RY*HJp?l(ylXOiNH;XoM*;%8;yXM)!)1^ON*7-W+^@|(Y^I#s^
zLS7-0^=Hd(x=*|!;Gg;PY!y@R3tq7mEem3)Lpz?(fjc`BDA}?ubY=N9KIOObwh#pa
zS_ifd88Z}x@2Et-&iymaw1$4+5f(+QS2Tu)WsJUkdBbx~^890%UM$Z__IHiUb;FCN
z`>+Jj;ytpU5WW*>jvYM8tggk}{ee&L;k&mmzgcnMXb(z7G<VLUtG$h-tJX5B2GAAy
z&g|pSpFGZEPd&q_o-tsOw|@B|&p-1s4h~eac6nz;Y-n3Vrfg_<v&t}j?Z=H4R*~2s
zSC>QGyO+c%<G5fR-YghRiVhL`M!%_9g44M?V&PWvO=s=Dw|U6_EbXVAvM?)sDf}?l
zl8~`}Z<mh^ec`*7pQO$T@Wk3l8C=l`ekI%m#WNNNvGlb!CZ0`v{rfU0(I;iU#8%aH
z4b)c`DOWVPT)d!BsBPnchd#oK-~0!9wE5j$r<{nmuNw$|`!sb3I4SH<I{D4Rq@|3Y
zjKUBUrKt%jTA?cyR1}+TT1T{YbkLX^;Nc%VPyFTZh<q+0m&>6m+R_>t8bp(Py;~`!
zOJ(Y3H`CSC%rBq#A;){hEigyPRL#wzlgED;m#I`B%8|=OMEM*_!Q2hEbM2fFzWLRE
z;HlS-5G6`PYqW|`Q5=a`XA7c216N(ShLu~k^WN)Lv3lbcnrrh^Diw11Jh><$pUWZl
z%_Akw<d`LCacyeo6uT}Ix#hOosqa6?iJsBK26@`rJ1C4E<I(5eq->nhPPG){LuaXO
zUCC7MK6)mbX|0-MR5$X0`)}sgPkxuLeeVedCpF0vPZ%E_q*RG@Yvwe?LV;YAqmYl#
zS}}O)E&le)-{f4mna_OT^Q>;IwfczVa)n$jU!ahW$ma{7<0Z1Pj%aD`pdmNNV^92?
zv66;Lnc<O9DkP6?A-#02l%hP|&vQTd4*%t^{!cDeFJVbnJ)k(Z_c{LRzy1#{##0pE
z2ffu5=MVmG9V+WHZdUJ)ZQ2?jwr7kITN&&ydhonHpk=p0#qz#Ai{ZTw3erRdd7jFf
zbsD^t-L;v~VzOhR7Gq&4i_@FDCRZ+fegzPd;KB?`1MyoCJH{?TE>A}?zO-xNtJcWi
z$D|v>Z?7azO7s`H2uOQ&mLs};GuT?EZaH35U^5bIl`>~7yLd#x_Pzw(`AI30U^#Wh
zi9kn^w!retenR(gt_yGyd0Z!rpU&moXBnF_jq3zUB^%pbsfdBIN6F7w&62h%nip+m
zUU85M6V<%`z3<`8ryu5@9)67e32h%SH9j)Ll#U+}Wqh|-ED%LG^7))g-Uds^if8d&
zJD#+>uY|2^8>xGhrX}8M?VSL16EO=^iS9!TvQ>;}<NBFG#u`EvMFtC#PPPt_ab?C1
z?uERjSFV-CDAP~!lBzRQ))x(|Fs{%PF(dskHHH#fyGA(eXxcN3!VXSnPCwqNpS5jZ
z+`wOB=vL(Nn!T?*$5+1c4XRgM$HLh;a#gdq|C66a?fog=fB8uAm@y$QkdF@{<0-@L
zM{C^klnk?)_<3<Q3Hpc?+c_!tDuQ7x4$UVDzIgl%-a5XS)`lW|rE2P{a*Pcf<Biir
zKKZc^GTOGD{l9vOsd7d75*o;<L7x2C>)bfEiE~3eG&DA$E7PD9XU|^bn$6d9U8$P4
z-`dUSG)#@3r<Zx$ch{}F`r0d;+Vd*?_x&*+``{FVs=)4NpQ5|}Jo%Qb+<5CYF5kES
zy`wIcY;u@W{hC|uxrf{@e#Ne%=kVsa8tOV%GjIGI{>xwgE&Y?#eBrPD7glu6;up`p
z%xB*FVRG~Kq0}hPJ@q1kWyRTZ-E6sHE7wir*|B3U!&7A@N6*vG9Jb$i8!x~1D~_HU
z=Jns~;s5^2kMh(1_8%CTh7)^!#o-$k(NL3TOx07Ji=SyE{aK@!7#m=$lBe(30Zz?Z
z!-mEIMn^}uI8@=PYj0$2?P6v()KV<u88;Ic;b6Lw=Zc%}<b%a!tZo|T8;8y@YyMhp
zyLJVmQ;OliK_<%;YY=%p1fXT!TJHbA`>9^Mft@dYiwh@udGpW~W;awbJ)WbkDv#FY
z_iw!{fdv5@4@Iuo{!v!cjxm~Rp@WN@zBme6Q!b5jv3G(w+RbBaZ4o4Kcg{k2zYiSW
z566^2rqlP$xhUl~w@sXz6E?I5vIPyR0cl5TR0Bwea0<?dc$nzgdlNoyAi<L)5)>e2
z6-~~>l>sQfr#F1Kz3hW_Jcw0~)+u^EVgHVY-3i`!U-S?%r1TW_$F)oI8TMB$KZ&B&
z3}&}|l=K0guF9b6ZMBAaRT(x3gm5(DyIE&>Oh4zR62t9aj=rFfQIn?VUHWSJ+;z;a
zI0Gfeo5?;_Pi6?=kFu?jaw^7$E-<88sP1`<^G!F<HhqBp;ZZIQmAUeo8(3Mhh?WEk
zM{zcDW9Pa2#yj|6w3xLmll=3+bE&w$q#9%h&jd3{qr|W&3`;Q9pD38ZBk`Lo%*x2?
zIv&pHP!ua3jWBjRBT3(J5_zSBD5N~U_u7i{rHIa6PEdYZD{n$a;eJlc*d_4mSf3_l
z{qa5VlDbFg*>Y_E(GCLw87vLND=fY-5kWGRrlO17{*i}huMCoJT}oB=Ue5GQbM;66
zh!1U?#lvShxp(`W{PNkS>6=teC)i(X5P~XSFfSpAiwaZVg+?|r;Swi~9q07fi<B!G
zN~4@Sd5Q~tLyYua;N+RJ3=H<OW5;fERV}6AZuT8G!qDg_dw0CWWUiX-gTLYNmv%5!
zQItk{Id%FBy+h+x8K(}s!@1!y#i-1&L;E?~GeiYN&+&tFPexSd%bY)ZfvJk3GCj)P
zeMhOTt6`w~9R2;hy!qB{RI!@j^CvlU`aFZ(CpmCtfI?-2=bwC>{iiO_H!zB##NmAh
zC>Ls&80@BdU=*2z0CG{8Lwj~{{A>?XrEv})KEYsrHzyA7W#7qustR!8&;ic(k5Gve
zJtq#)J)x-1#ZMBSDnq3-!u|utDb!Rm)N`JJ(J6*|&vE47ZhpPvAmh^&D&zg^*ma1g
zs+#fsa~wE)g3-xo>o|5*wOr!Vk^SsDc$gC>j<R>}K`!+6a$xU%%DFtp-g$-Rf4!UT
zo<7D$hd6cW6y1G8@fSa|X7R?Wm_ELf{U=WG(z8F~=*1DrrAaDLfoOV|m!5fsBNvCA
zGYE7l&YHufrM0~Di=7<V{VRU)_F+m>W9-?rmr_*4^jIGU4jy4(Y?9HzK2DxG!+?1{
zo=t@Edr4*bdwM8o#d!Z2o_hRAPWKIS?(|8{UF>110^@_doIG`!{*iIa3PP9+LO_xU
zGMxZL(I@!w<1%dTUy~~h@|&_TV96~#3Fc`qFthy3#4q|9d9?(1%THRb^5w?NxFKf0
zZsjK^Sh1TTmI=4&=I$BkZSm?c?LbZnWc$j)`^IjTNoN>at6KYq-%RrlXS=TdFKh1|
zC)rWu`F<l!d0*Aud*5nrWxC}}%R3Mt#Nc6J<}G7(*|+o9**818GqYnB5Hkn{G$1iJ
z2_aC+BHwDM)k5vP@2;+{>hj)X#{1*mh&T~9s|E9U39ZV!H_SP|bI$Lah<o#<y!qUH
z(ZSu5BZ@?O&fkF%8wsABqV~e4WFs+uRKz5U8?U+W-RO^zl~MXB4-d0rW8`(m-1w%N
zvJ3SPRDp@q&Dz4MCXa~(ZG1z@iSu3i*$$z^P{Ox@LR6doy6tvUZkV$tkFamwehwZw
z$o>O|Y3b<T@PUJjq%xe?y^WW)@1o^mJH5T#oHbk+%R23{Rd9L_2Tq>h^%tM#*oAI^
z2iBkDOU8l2M<`|Vu)(A}{<C&97$@)&v7ro~$l=8e{laGquXu7RB^F)2kJts05+;a3
zSoo>4x%6L6M0|z5DeZBYm{)p3q7Je8DUSn8EQz|yb3b#xrjgZ)LD5>p_O^0bY~!J{
z8^dy;nOn+PXyGW%OPe`C^K^A}qZG8BIl#}J*h<@=Mi2LJ{N!2s2M6iz@1^nV83ywy
zW4=g~S##%UPwN0ppJOG#1%?)oU<eoDUAxGbctd;M976AXcJ~4(zljjg-JcdD#c&)2
z;UGzZGreL>1)13ZF?z~iF%Bb+%cK;F9?Bq1;3e}*^!4@jDw22%SNS`QBpyELD?sOS
zth)UZOl^6aU%YitJWjyKnj|f$L+ONteERmw_})MNh;Fy%p0IF2Zw~{Rzzf+T>|1!H
z50d1C{RGGfPZYZ>QIaksc~S#=uc%=d8}v!(+_${!q{(EP(2s;1g(cI1aC|%xMPqPW
z&QN$4OIy<Pb&!6Az%MzrEZn?RtT&(Z+N8FHViq}eL0pnAEn{0Ig;*oUSq@VTgf3AC
ztI_8Q;x^zeApR1D6DE=W!?FF|!5l8J7ux0uG4}MdvKs-k%-O+9^2=zJyR<%9yie+b
z1-?)$QQ(-z({3bjRJIRgQ;^TI;?@V5(f%$^zIgyG{S|^oaTu&b@oVMR_^%{~g|E^0
z(!R3`2<f|{^RoDE`zR><u2TFyM@P?K5=XDasGx*BgkJ~NRhJm2P+o9xnDo+uK7A>}
zjUu)El;|7!#|IDWak>53EpZ~8-EJ={;;P(9p+eUXLGJ&4o)c0QQZjh_u=bOqyR;jK
z=WVmg;7P8a{q{PJgXHSho4Ww<+61X%-E9&Gg39XZ`gkjS?-GzBZTGD*u_6-$EI-Tn
z4c_vbho%Uz#fwE(78y=$d41m=j%{j)P2#0v<r1ql(bwtiJVwQZy(~8D&AibmOg-eV
z6orE}t4Qe(I@UjKA3RI#lC}->@Sn*ga$mS*VKCPYb592GU(-kKT6x937bD8&j4;tl
ze~s-Xadti`aV)UY7)|Ek>B%FDErEd+a$2A9euB+~8KUrs<V@Ps=xY22frW<$zCPe$
zBtpmRhQ;#;e{~bfRlAFWut2rPWxT~Yc^G@PH11I-F*uI!7AYRyE-C3)QLmg5ww6BS
zT+#RKgw2J|ML1ZJw_Gy1KWgzCl#cr`(n;<$F&=|u{nBHRLi{Qgqn@n7sk}Ii<uUwZ
zFtmB9uwUxHw%&biqvQh1zg`g_x!z!8CcynJtjG63J`hMIcYcioV&O?(7h_j%<HU>w
zc%Ov@TuMpU!szA2y*Xo2ym;=5C$r!YgMp5LR{}%HUpA-&&S4CVwx~4rwm5ilL^7J1
z#pZu6cjDW}cal+j*~NgTPwWhdSh6|A>15(RF^>xI54c{%GRAE)5)fGc8CtZ7QU?Rr
zU^T$lL8u!ynabjn6*M={!}+#eYAcIqi?^wb@P6U|F_1H+Br~~9?iGwK7>i2ENacqZ
z7#cAnp)w^jR2S3Q+G*+D^mBk?WiY<4GDVCZH=c|h;KGF#hH}>Hun8oVU8OTLj2T0b
z8m6VWl_B#`y1>C5t}*!rM)DGgsR(R!Ad{OP&<-+3il$I<&5iT;$<v&dWwP6Lj$}8<
z)B1|R%bG;FIV^)2oq{A4>mx%_29FFdmlWe3fsS%p+Yzitc2Y+%qmPr*DQe|oA0}<1
z7XlBr3#5gM6vETYA#@2y-6m_49pA?(08R&E2r_m!hUbir`Q(^PBnB6sprlS-5o_%m
zke(q(tCOE(?L9%Fn6eXES28j<{fr-J<IX0A4W}5JYNvteB@?I{P5P)EJ?4a{z1G5e
z4RMb`zW!w=k$?q$zKmiq8vCZZ$4SHzzF&GS5hOe3Pr`m$Bw?qnH-;^s46tHebKJ`B
zQimMGF9r=<GK^w-hed#kJ)y6TM=`h={`CgL@l2q%F;rNi89#8c8aqbg%h)YoMeFX7
zj~Kr<#eo~&^l^E^_>jM7^l(LcGD=Rf*okve7w30&W=tXJ*%6=rP(sGQGh81LBZ6b(
zTrxgo=VTG@2_mp9#9Ph)GOqx(n3Ejm#f(r~1wO0IrC&R^vhZ8EwA9s?(IrHY=S^Gp
zbZ4tVaalDrrJCl}&NztE)QlNRMK(|Kg%$?m?*dfTjish6&Bf;C=$WvV<f8G?b@dHj
z^N2(!FB6C(356*FqE7-C08(7O<-=@RyPp5}l}}S$R>8!o6o<O&`SVY2;<Z<HGGs9d
z1IHQ66b=5dC@3UHn6}30@*^W`xbsuYFKXt{xpvGC&zCeT;#2pmV&{7Y$i>D>%nk9E
z1YB{GF0E$$HMjBkhi>K2wpX~=pZ8=}sQ7(~71w`^&)vJ3C2KY@yS$6thtHAI5fR%}
zl#;{{<1BKOV>zcE=8$qLcY}lNYv<5bKG{F<*}OL-Nkl%<@oYok18&t|ecC4%(?uz(
zhQ&(AZ;N-CJUky%0t@8_vY)LACLj5P?QrKJQ8XKWwi#SG`Me^?w~4osv|=eoRCazd
zYylbU5Pq>0l4J5!pjfjK2#kXM5=W(k?x9{W@f9M_!UeRkb3B?LIMXK@SG1Rj$DA`w
z0()!OHTFa0$)h}sT_F_@tMG3NFk=+Jx{#f0s+4UD_=g-rCK465^|&(@%)2C4P!_?q
z!bT>EB$B!ycpHmGO>0}r6wx7j<B-EQA%`hk#XJfBFi9*L)X5j%Di=MLe>}{bjgZF;
zhlnw}V$<^h-?tKr8T%e9v(0mcgHlB-yZm}?*sz4I=Cic-4i(f}a-5Z0%A<V!l8c|f
z4-LkiuLy3nuA=PR*3Yz<OD-%6_`2JnHW(P)UC!}JR^e;r<#wvYDa&hSF74yu;f9k_
z6wXQv<l!CE28H-2!O?;278YqL5w9XZqPfj|w4N&d?wiF8$1QI0r3J2<bV(hb`LBP+
zr#9B{>z8&g0!&_h6My>IyO_6hJxj+7@Xnr-)J$K)|NW<*X4dSbY+hW=>u(<*pHiM|
z85?jGFYPOWEF%2#?y{e&>><yKKlx`~U?NLa*7NO$zs|+pVbYV=QCu~STkrn_wZ$F$
z^yj~(wJ*n{d26`wsukqB&hhLs&vCwMh%qykvE|w|WCmK<`t+|k*D*lttkrCqQ$ebB
zDmBCB`SH^)F;p~$n{K(DNmXgyef2rsIncuTTkhoEE!R-g)WC!VyLj@azot7gjxD!d
z!9d3$E4@}JrY~N{HR~2be-m4udXC0{ESs;rj=7WT=x995^S^nEj^1{j`Q<NJwe)k*
z^H{tR63q5NI$OtWH?QC)fAi<;8Jf=D{pZ^m^YVLK7%b)TYp-YFlse8H+QEyjz0ZIu
zVbvAauyXDs+RyH1>vL~VHfIe>D?52{$5B>Zb0Y%>U*=SL2DfaOPbyo^aNBX7eQ6ur
zx{Rx?zLt4Y>gjAe!iz6&qg~ao<+=@|bTMPAG%sy^h7%WiDX*KtwbyNCVs)B5+h69L
z!_<X#J^%n907*naR9#$m!#du6@p)PYGF*Mr4V--I1x~j0+g(k$Vl=_$9zo+AQHh`_
zxXq0pgz}8U&H9pXdb!Fn$y_E-dzrx|>f~;d=`UBEQZ82@2uAYA!%Bc-C)Z9%CNOd-
z7ZqNyBvrR}qYCn~F!{y1vd5I)*4jcKsC*3ND%kAi*l{e`2%Nm+@7P$2c`VoCNYY}8
zDVX9*h5|S1N5)>3^d;7gXF{9WT{?la(Z~6rGb6$;Q9k)Q($!uuVlWDQ%!wzkH8`2w
zlaR~}wjNc)D}Vc~Xfp=(^0Cufn%U?OVQXzI{K2!ku#vKu28*AdFfs)Vft{p}`2HbX
zPZ#i$0``--Ntbu<c-R%nNd{{PkSh>GOj>*SV@h9?zHWHR;YeKIX-db<;_`Vl>^R!X
zmFt&r=Bd}wsWka~&f`3{>#JRnl_)4IVxBLV#dP5U+{BhBq=;>+T%1{?z4#H6#<J-U
z_;?Jr3+0`PMpbrS$=Sy_lN(S4$CX>KxB>?uw;0RDwQnbn|3vIdOq_qqjTLJd+aH0u
zYv2}X%E%M?t%1)bCRO0ClhsdqD)<|{rmwz+lJ;Ya)W*BS`4l(bvxS{6JjT(E6d!%)
z4kkVIDvP(=Nz=ApvF+>#H-797SX}=LcD2hnA*EyU_%0$}k;|yqAZA0^5rx0v)LN6*
z(XS>z{63!t@{Acbk#h&%hB=$LVd)gIwX^ur=kMdhj+Z&wKb}9hcQfgdIzIiyPjYeh
zHV*eq<jWtwiA)MAC(PqxcU{Brov-r7uA>Z(z-^!UGShS;&%bey+duXo)9O;}dGjUS
z-gk_hufM=kTi>8-1abpyY<qn_^Oh|j6a9)krQ;X#ho88aLpxsOor7ns6ZE#9XZNn%
zG_2UdmNhe-raFKGkU^X(s%KnD4`)037`)iTU`Y)XrA4gT@(@?g$neTbuW+to2&MDP
zx%^J<SvQ5ZUil4&o4QCVsF^aK6?3Pcbeg%#)-k=lgwioHx$Wvz?0WqL)QnBsym~qc
zMQ?i(dv@=kcIl1Wx^6D%;wrAa;Rc3Izsm_dncFt6Ae}Ddj*owe$@z1<^y+pxhaf-F
zPv!L0T(MvRrQ_yv-RfEN_YX$u$s`}KvMrvBLt7XG7UAkgqZ9Y$B>YK|Sx{KGm9uH1
zz4k6?m>ALtmo<fxEiQztlEsXE2{JG-;q8#byT(#^C2`R<HvBB*l8P;)v`=<ME}1-?
z98P&H5|k}q+$e&tMLatC@(K&D2*8n>E$d9O^V7A5u{`05V{D<KK`a2o?uD4wD+jE4
z0g-yuRVce7TEw_UCsUkweq?jK@B@=fgTl-Dfe|pCKhiDsA4%rQBIsje<cjBYJMjV}
zQ2<!KmPtNLqFP`jeA~F4IpnakYGFLr;$nR{h&|I^h-k*Oiz_>X$qza)hM8B!y!8!<
zyRe1BO5kRS9qZ59&g`Hxy2Z8%?5aI(6PG=eM-OOVu&b4JRnMmNOHk=DuDE^+moJ?~
zT3KU?!e7|SXm1vB!UlHHWMC16sP^pQxI{@iT0e1Dc(hXfvl^|BJMitVYIq!A&Z@YN
zlifwA%L7thfoOQ3z!BHiTU>Zt5DRV=6F7mDc<(m8A|;hDfX9lWD0RLW>uMHqHs?v%
zo!>jnmA~&`dkVP)r=yeO#v+1JoP7H!9(np5My&o7%&O1RSvHR?E63BCE1|BYhFOy;
zIoDmq1Gg;WRLc;P$5+|e%;Xus+l+8DE|rqv#h*mNk>@SBBn+M6!pXh7ziSVJ^Ut%s
zp_GQ6*~}VO%Jd~`$W+!ceNvW+inB~9ZsPBE>}BMD<|BV~JK6M8fadV~Z?X5tNrpA#
z^0lmAF^%To8EjZFL`}ma#?+N@;&2OHy#wSr+PK)-0V##jBeb`*Gn{wHW&GSF^dH;K
z+wUL5{6<1a!z4CdwHe*rN?rX}D#un9aM*BXMFwrK0?>-(%V+ZX4}ZdeBTdoKT%JXj
z&1dhM|Hkfvhok=aJmW+PB_=RMo>NEmaQsvw`oJ0PoI8!ugRtSsO{DtTsA(9>(AY|p
z8lbcFGzSlzM3*(Oeqj}*RdviBpXSGpzR97s0W0&iH{au-^&4nCQ$ypfH@MK3L#~+I
zV;uYdu>7u_y<PB-=wlO#tY`2OwC6J(ra}%$9x>jbNxWwsnX}3zGw+gwoYOW9&TIdD
zLSHkJe7uDG8hs(X3XJXKN~mw&C$Nn%AiyHrl`*cA&`J62J$$_}h<DW7N?H0rLEOZ9
zmn5}I9D(6No#aPEcOjrXW?(3&N9dQfv(PhkjzWH5-pis|iV(iS77DW?QWzUwhWKUu
z-HoWienx3AvE1n>6l1P9D&TiwUQ;t@?|jlX3SCZ1lB5zF&IMvB7#pPouc$!#OT=w~
z8~Ie=OTkbHITCsbj1>w!+{@A9r^yV~P(5QgYX+P7<^FBt@=|YqK8ukSO+>s(jO{wn
z>|T5AeJMjM<b8Ar%(TPZi}}Q7A*6>*$TxmFCui*Mp{+;N#c-40DRXbQ3(F`nEm+a~
zI@aF~70MK1i%#h5wegBUtiNhI7hY}Ta!Oo8OZ&>fV>NAGKP`dQ<c5YA&ih4P9>vf=
z51rkERDxZ|<a2$rwfCc{olbV+0Z2KpP4TAOH$BX1NOUl67;uTuL6az1jga}AsP9t@
z^mTHf@hm&GZ{y|Xf5HFyr=Kx6+{;j=hWfH1DjLS2`nt)-^A8RU#>Q1B254z+<>2m}
zY}@`O-}t+K;NZFFIV(A>DavLnU_kUB;B+yWbo7e^Jss^-)Q_XQBui0A2}S7?lNPL}
zzGn}QeCJ=-ajcDu`Nf8Oj=aiHRFsK{&R8Gou=$}D8v84mQC~&*gejEtx6#!%Kx@Y!
z6UWt&&15JkEg_wPwvIl=j&Gnio1wU*giH#ChjNsbmr+t$N_|}o(VcvX+Pblnl@?J~
zUr%3mH{<85V(j1{zVn^$^Ul#0GMO}>&^qrH7r@By0DU7_#y3=w$z&-hDMBe|+`ogq
zv8%Xm({$c=`+&=l!ujM&ELiN0fbtFQtw(Gh6^vzeVZ5T(AIJ3%MzpdeQ>dBCE<J1=
z9IaCA>Qmv(;XAFieLPXDzAjm4VR|{HmP2VVR;a3?u>SG<A`~%~gxpS$P_clW3Xh6!
zi=dGcMT;E|p`6_ra^2;QnfxmU^e<@`xsCk;T$CGE$0zMBgTY?NuaqEoJFOOcB~&Qz
z(y~#AqZCWpd{l4?j9*}CxBHWn&BWDcwsC7jw+j)9;|;BpXy^NM!uO2B8FNd&4Pz-7
zGo05uCj3^%eXNZ=-L%~a^E6YVLmtMd@tIh85znDe1q2O@74nWE&#8Uy@Z_^^Fw}FA
z?FU-8;)->Yqyyg<f85Tgz)AR0RI_O-u081SiH>;$i-!~$9qb+j`pGc%HrI?@3i+CH
zwhoWk&tX>ucK<8H*poeQ+(eZvuT3E0Tie|$QGI%2c$e}ecgD(VQIEJ=2k?AM=5#Zv
zv5OXC%6U}*Uvsw~M1whBkgzCJDn&Y-LMavfrJExcG!4D``0?B48I$g#wWEW>r#qNd
z)yubk{uZ;xW;xy1W#=~3n69m>|5_xB3W!#_#<Ad5Iq<&<i}F@55fT+#f8%mqe)bjm
zhx4Q=CvfGWI$nGIx75vB%Y>n$yuNck+3}0He8qfb&Y4H$a5K9OA17BmpL=iG$m-P#
z+4ifSvF~&%6%!Y*u%?@ByHCYN9^zbEimNxRW5)Cu%$iWk!2|o~8`k8D8@TDZE15FB
zlGBHe(Vx@ga|29SdNrF@%%iXQ7{`yFqHghKZrHez<trD{e)J$`JNj97%}rdkZ~>F+
zE1~-|+jgHIKRisu<i*^0%{q$vTR7g>8e1bOw>1i#AEbS-ihJ(9nRTm|u=}N7@b<w*
z&Np>(?VWeBYRO{ePpjeR!DE~~*TSmnZe`=jr7WI5iPHxU(bkjW@|$mC*_;`SA78_f
z9k0@qox<(cU(UqIGg&gNh^L->mUA5gtlo43%NNdPazhpLoZ+oQZLC=~lU>{1L64ow
z{MrHDdUp?P!<F26`^_v{vY7E@!yG<#j**c8iYBaNN%<*$^vv6g;NK5rb{`zC#P(Ij
z!Tt2)-`AmR5-<*KQabqex%lH~TjWKj{r2vXnZ`bLMQiKGakyZXKU%wayyabD6=#G|
z3BNZwND@_mdnAJ)8@ED~_tZq9h*fH^Yv6XQu=<#-5LvjH%c>!VCGX#f220+16l4#t
zn07hOVi^~$-hD~P+^C&y-fC0{-_cHHgH3E#d(R3<+>}wo{0nSgceXV4vYFsfTWouq
z6*}}!X@kf=w0GZ;vI{kz)5PwiA!VbxC_>z2)MzYsIoKs_)7MzX_q(XA9|OVebu72S
zBpKz|M=3c*O!neS2F_+aK1wN1U+JePKkm?O_eEYxS583eu9+6ctnc9Pw#YdiQ09n=
zQ7nX)i06bKm<q(iwKx_tox@r@g2d?-x5|ys)6>Jyh-UiyrOckxK<nv)>^t5VJ!L`S
zsj)iX$zo)8{$=>U#Ecz8VC==9m_!+ct*QK2b#wbX;<!R3es#(2wi^!c;#cT`>th~Y
zC69aeUrL#bBRHVUL{3%$zn6TZ&E@Xoj<dH*EFB`2A-PK*_=dETyl2&(L)gL+zsQ>+
zzG4t1juJ!jnzD2=pZ?IT%%3%#an)IN?>oS$^8?&|&#f$3ynth`Kfx<|8)>=F&NX-6
z%i?+S894L;zu0!vVrXZaHFvHTN=`xi$}_0SEicKWVFFq!v`SH1TS-@YCpoQA=_0Dj
zvvhX#k}a#GC_g~Yz%VLZL|uI?8FF-YcF;FCf=U-rUtfzF8KAYjle~sZaT#TqJl%bR
z&IB4Ns%xnz%`(v2O;=Apc{EC8sHv-`I6XpZOB=)S&wmw_R#I0}L2pM3-Tfn^vnABk
zR-<x*w6%9Kl85rDS}IFYbanTUNo%@$2S965MbQq~NN*?Y-F+U}*<`5F)YR5dq(*3K
z>tIAjLY7w4P+d_>e@_?Pz5V1h6qi*}Q&rAze>WXnedLMeF3)KC21ghf=%MDaTln&(
z8ou%9bMy`N(AnLOMp0H(LuE;t?(RM^Y3S)6qN2Q%zV03<Dy1}&r>AcKO^V8zTFQ&k
zba%GXH<+Vl+;r}F@FCh?`yM~re!>e*7Z@h!_#Kan{?^GQT{?yhFJ;JniEVH>UMMH-
zz8D##?zrSQM|_Fn%H4w`x2X%sDf<?ZJH#WzrqEdKz6qC@kR<+rG9~rLJMuwt_=(NV
zClho+>u_#2il{}sqEaT@1WpEvpfHt%kz8olvvw|bnaZxfm2a;^u_81;$S3c&@`@{Q
z-n{%v$9SY&XinqH_I-f+sQt7rOTi-1Y9w?He8Kg{!_YtG+g69z*dj|79HJO8^NP>m
zn8`M#AP{ZC7>Ikx3zD*(oW=JK9~*>^g~g8+|8ag8|5s~@$||WTFQT)(oxzcOkobk)
zdcN)RRAK&+xFp9&V&d|jj5Qatp(XJaBuT$?*oLus$@otckB+}&%~04*qMIl02V*l@
zZ^>KXc|U)-W9FlaDf24iQ={YIapowx%9tuHtER5HEP^UONb|*Za(Sq%si!QPr?s_>
zk$faaS!E4XWog<jwlb8*<2b;7*l!_x+*V}lidG+!Y-9%sgd}$XPmtjKU<|<vlm{z=
zbY|aB{<v~lIeHjW$pn&2Bm^@e&S^2FN1{g7Q3&00df8WNs-`Ys^PFOydFfq-1I%0(
zM<670u$6Z-cDjhQ*WJbZvKF5B)ys4ZYfr~$VzJW@U?Xw!61VLu2p?CRxYIJJySZ#K
zu~_(s$Br2Ag0O%{0PbjE>M0b2GPsNgVbLMmN)M4FZ{tD|k0ANbGea*8_ANa;C4PnE
zb}<>xCyIq+k%#A(qmj1r3uy~;1j`ThoOn33>7`9(R3R=h4wT4B62Tx5dkNEL7Ju|G
zZhd}96bLr=$T)QK_{FJ9%F>cvqApM{%2*5w^klJ<jNfEY;M%A_Cugs~M+1NH^zgXD
z706+~f^!HP6)IYsR$!mxEj-IBB8)%a(vq)%=Yhg!90!7{AquDT0=^v-06vcxHj@i3
z1m_5U58fBd8-{0ubjgAu<XvC-@WFmwHWbCa<4=FwDCU!S(n2aj&eNdqYqWNk@^AmM
zI2traEZAcK6EPrtE>Q$Jp7u8_{J8#IUZX7>k{J0%J_y6aJYJk#VuqP>=4!cerQk%`
z{9iN(lS|N3N8e11l$8Mxw^ODKs8l?bl~JSfv7;rt$rS)D?}sY_3VBJvaow<vcx7U9
zt1V&Zq^DY)+~tM>Ko$Tvy!2#-i-<JBCzVdcyyxce`;*Y$o8B3UTrP)JigYR+n{gD^
z(#b=XKLAyrOF=zl5+6l>g>u|(=P0}qwN9;YJ`;@T_c1<#sUv*(3-<s0Fp;EpiQcY0
zmGfO9vh9wCX>Vn9N(8lIfwrRwBH~KQRN$P4b%^08ZTw4RNz7gB3Ng*&NdlQ&eiI7=
znK2~)U&0oCflz?5h3s;rTc}MU#z5fi`@3yt0MfXg;tB3auET5NDd_Q`^7vVWI(YmP
zO%U#=CFeBL#r#qzMTa64;^APd!n<aH+JOxVwJ*Hek#zb??EqQX&B&MFw1j<RS{+R#
zv9kAkU=*x>H{VLONsJBHUsH(R+qf!}8$B8P924e8=hMcAWLyhOLP%6FhV@Z%-uQ5G
z0fd-8IfmgOCy3<w(YUqHOUIqP%iB(Xm~VZ(y}xnU<#KX#XS>|J2j%J=f9SZK+t5hG
zGRYiY$d}V~4Gmv2AgcmFXboV1wkuWTf#k0kK$SIc&8Ag!Ha0R4?F&jZqJBk{EL%F8
zjteb}7{XVeNi$>N3Mxk0=;|LaJ)EZ+8I&E+eH%wPnY=1nDKu$jF1n0t-$i<dqQX-N
znKYXENpo1UY8egX(Aw5XE-#%%>@tO`CFLK-htcw|xyQo<n7p)OoWs12;7+*&Cozi2
z#}>&l=a@s%-ce&xsT8T$864?H16ms!nd1i(7TSHYiAubz-<yAS#U$(}WRGNavRevJ
zDw#yJlg;?5^GQ5k^yAOjN4k&YDrq+_3kn;0aS+R=j8Kk%JW}z1iR@<=OA@|-+>*ts
zj0-omv4h1MuZwi-0GnWu;n_<}U<3^P%8Pj?uYp50_GA^qO5gPV28$UZnssqyW%6(q
zlRKVpDukErlv0TYTl?dMi>d-~t%CWz_wkrUT#p-+lho?rIxB^}N6+$wbQn9fws>f9
z%Ephy$@#y**<n5kmiCy_+i<^S0MXC6j=(a&3Z$Oc6W)DA0oKk}C9wo!CE{Ks>}7M6
z;4F!m<c9F2wI-F$kj<va=ksDW()m)Ll9S!oP@cB2CcyL5@Q+9BHFlLGZDY98i(i~F
zqOTXjLN~9__ngBva(UmY50s}%A`dC|Pl+}j_lw|np{G2q_ckfCiyLrbhXr1k_v}J3
zFqL!(*H`&Pk7+CQ7e%7|8)wP7NwZnEdO4HFRMOVcOg>Y^hRvH;IA<2K=geVpO@=e)
zTRkDn{G_tXTC|*%mo21H^>d-En~Jg1S+{xxb7o8?-`7q@?_eyNlOyGA514@Qmd9}i
z1896si~j|fq0mfPb_4(AzkHSJ7B@I7REli2h-@|!^{E)ogAd(9Z6-rDlg51eDwWPM
zeg0A=R2Mlz0O?GIY&H|kt{|OGTOd-IG%96+E=@L@C7m*kN)(92rCfL4gDe_T;(@4j
zq({1X27mZRpJnc(@!b8%FR^*iBpVy)Y}S3_l?aA(Izv%W5vjN!N~dGJh`w<Gnp7%9
zCY>f*l#O$Z29+Y6&5|uL<7reXO*)k#Ta+b}PQ^Qdk@IGXvSc$+Ta`+YN|^wSzu6*)
zBUZ=~VpRNTPu~bn?GlXkT3A<buIY-G=%lvK+A@8;4IJwu#(Q*pS>DVN$z}Y=^ilD+
z$lhiw+duwieN$Wenc?k<NX)0=^X~qoSgzRr?d1&d3>7)Ia=+OGT6>sj@Z|Qe6vdsq
zTx#uu;7c4CNjqqz9X{Up!6%XDqzncl0$t^$RZXC?Ib}Z!dug2@Pi<@@2vcHe7AGd4
ztbAsyz}`{_6ARX73>G?E<qA8*c~8g0Z;NgVtRvtEcQKYOJa&kOvxfWUV$jI%l8|RT
zYg1+IZrdXI8e0Xi6-Z_Kn&+rFJ47JZh%|<dol*;1O3ba}2HuvC(_m>Z3GL}5OW+sk
zV6K29Y-(qBcsm>gE{02`+-4}{8iz7^ae){QP(@W-f9FT|{HGsc)$Fk%ZrvE3PR4H1
z-;{R6n3dHpQjCm#TsZG?@)<5tZVnM!hLe5WW4t^ar7cZddW`1zhW)_-BSVO<<OdT=
z1!71EX0}(nWM2ZDa6W8(K$|=vV#pMv)(&_%L&mqyyJlHprf_puj+D04(zhLU8pu{n
z<ja5Z6=v0zv;4-7^XZ!|LzAMUteo=l3TCd|!iP64az3OHL8O_z;SRp|z>PH2)-z-3
zc*y6epE8g6)5bA=<}$we)h|(-ik?EDw6j6Rg6LJ?#<&W9TQ(v@jTVPh{Iq8i<bYPJ
z+qjCKJ^m|JtY5_wJAcKnPP1(D9o)2bE{t^X%#R*tcdsU0+Q6L;evV1gs`&YLA7<yt
zF6OVlp6k~w;`x0~TH{Yxuz`=>aV15Ju=9nV^W4rBKKZ3j@Z6)1aJ&^h{@G9S&J+K_
zso`-v@W7o+sLXO^-<$mS>DQoU1`j@XKUHdgDHAI=Xq?`z93m%}x_BdH&F}E&4<2Xk
z#X3H`aS<=>Ime6@S98a;Ybc_hH=lWe*LI&~+`QG?f7{iRr-#||##Wx$euP^;`B{!X
z@qP9VjpvIWTEq9h_c&|r{|_v!9$_R~%=ta9@YHYKqhk7U?%Q$&l|?De9eA53pM8Ud
zl{fO?^;78>E~0$+9N+uFlXMJcS$F*%Y+5#hTz?BsJ^o|5$F1eo1;c#rXRlH{djt2Z
zt>V$gpQA6nOso;6v<X7^OZ*>`QlgK_7C9y$b-X%C?39WAW-M=K(gdCTYop(5D+2&=
z%-*Ttje!kNS-g3RaV34$v{tsg^lgo^x#=e#N{hr#kc9CSCxLzI?{1j$TIvzc9k+Gl
zfH$ASP3rCnK;Nd42&Ag!fTv^3@^a@Rgeif$Dr8ep3wXzrOGG-5U&#)RKoNHL@iYD_
zj|I3$a;1<?a{O2l*#KPOsa&BKUh<PR#gkJQ-pU;Nx#ueu9jUy{9DcmIm~-O^yE{#U
zyNlQpHgKwj@!)fx)KLprMEAlrvapzBERK0LlK6~fLt&dl9GK~ZO#ZO`8R2W5Ev;^W
z_~yOf5Hoja!&GKh_3@kcF0gvZ0`{J2BbCX}-``)rR=&N>c%ID&u~o>T&Wkha8-;ZV
zcUhfoMj$PYyD|Ko$|OEEW~>acZ)jfzH;&Lp#8Sa|%$O3XxCU`~%FijTpD(WA1|8^O
z<7hNl<jcBC7-6D-G)E0T6tV}9DK4cl)5Qy0pQCDEDj%Cz$H?IOy!gu}AeH5z#hZD0
z>u>CBW;P>g(D^ji-*5%nUVMzx-NT$datP9CPQLprkGx+*MZ+{!t@{&di}Q30q`i4l
z5r_g9o68-cM0ScmH21n#2eyR()}T6F!-n}q{Oajfsav>)>e39Eu?zXkoh$jpqu=7;
z@BNIk9sPi&C{xV4FFwXodk5HZ#Zr{Yv+wn-ymNMlx{7QhNvfPreevV$dFDra>tCPe
z#t%I}LuoH(hZ<NnZvthL7P5F8Txjm)`UgHi)rEKY`ooV>v;1aOPOoM4P4`mW{1%V=
z@Rtk~l_!&3tWizv82Y;|l1ruOZo5c%eFNF5$$b37H}TF>kMi|@{s9NhwnK3>_doa{
zPQUh3{_)}Ovg2?QTBR5}X)+brG-OJdGG!bY1@%+sq0hX{H~;A;EWGA+W{xeRzoUuw
z-g%pC?;T~;jdw9`Yzf8H<Cs|1&*R_!KK&C`vTRZ%)iYOf|5Y>i$#);-k;i|<g`Pn!
zoI1*+rB^Vyx`b6%UP<@aQw-$uVLjk>isEdW)LHbfI~^`DMeodt!#Hoy&%QJF65{ns
z&Zkj9KTy`^><U_kV+b-b@3lK(j%v9LW8Wp;TdJ1iBqWzRiODU0aSn5Nz#VfdPvr<Y
zo<qvvUB0y=%ax)eODs+fHzSH~S*as9(zcsoqN@&jdMk17l=sH$ZcAHAPf<vMOUxoZ
zIgiph)L(RWjtnGsY6{BiwN?HOMIeX2`{^gCm}cD!0ape>dYVa{{IT8YKT4Q>X7YA<
z!!S_dgS4fzU63UGm+xCbT6fy>h3LGuyX6{s2~_40$3yOjvq7SK61!M#Y}enl(ZPAs
zMvG!%G&@TllCt?losUib#nYS|tmDJCUO~E~mMwSP%i0A~;-6qN^Xr8CW-(gmmGECh
z0$oqAK!-v*hCZfve((7-g7>0^vG-~*jq@M#s1<JjTv6ovo|M_OE2Njlv1qYtZX0>}
zMZ0gNPs}sgYw6j*#g>T&5#!2@t>XM=v{u%Zp1=YF<#zf4?;2=t<jsA({K<d&8lSsq
zCQtw7x1<!LQYlJn=df{7C$Assw7>8oYGyPQOqw>CRjZaVYw;C)^^ZSIO(qX2!^#^!
z%zyjpSLomS9B2B{4wnf0=ocFtNy=-&qL4S=KvM!R>-c^}2~a*`6|;)^nLMSOk%mPq
z9#_Gh%Bc)D9prdpJA)=%Y85>lr#W%@A|;cWDPCF{lb`RXsq`3TPpL)UbPr30p+u!9
zDk|dbSGV(jUAvsK6DM=_{cW@imNIANL?(~l$fqW)WL#OAy=7H2jDr1UF8}}_07*na
zRITCowxhJQwsQ1Tvr9E{2ND<_8X%o5MgiHPVse8$l-7))bhwHA$C~IK&eIJPRZnJI
z*&t8tKTU7XFumQNGR4B?hV+1Z7e|jZG1POB&fy}eD#}=~>~^l6rg{HpE2(rD<s~V4
z)d-De8tLsDprd<)veGp5^^@p2dyvx?Iv7@+@!+S}ek#YhwU;w{isJEKoFE^Nk=-s1
z1IX$d8`%CjgK$nUj&am*wG}1;Z<V9n>ctdTNP@7dBKy(GWNyCoR+EVL+O>6wR}_XS
zkOjZs@sB6<5M?9;I=Q472B0H>P;6Hm%0fZn?qh17axe8Lu!1!BIr+3D=WzGO#<V4O
zu=mq=-zFjW>+Gk*LuTE)0d|QMyC<?3SvkKl1EM^e+wrB%G*;H6e}&?>57y2>LKJ&b
z1y||*Wg2Xv^>hlLmX{mdu0`k&@w_4y!zGr-!O9LU3V$D9-x|&*5)NrwnIt5}_bn8+
zaWaOxFkW0+J(BoEr=oGBjYr)X2{cObN}<-iFUt4{v6a5*h6`+DPbyL==x9FA_Kvem
zoU@oYs)G|dyU985J|G^FGFwj+d|Ma~Z5O7}PK7qlJTxS~hTJMJF#clxTJMk$7|t?r
zFSy3)iAt)EXs?CeDPmcqpQ@w{t^W)D6gvBO$(=HhU(7s~{$f6jm2+d7D^XFv$zK3+
zSh#&U@9Iz-_W;!s7IN*v6o2v6zodTUy?p-uYkBSe{VD0RV*XV(a(>(Mbm!yPQyv2W
zImq|0^~XQqg%g?u5C0X@$CS`<eu!N!{FtNfZsSk?pFd&I*r(XlA|7u;^aTi~sP#oB
zyt9fhwn<Jhd0E0&tk}4bGsjLcebx+)9lprr>*vzdbcXbpS<IT$K;77J)Rq@JYlm~8
z>Fpb&Zp;|UON-I@iyS(1l9!(PDc}FsAMjUy{|L?9gIqZ9HvMB(bKmA^Y}<JRrTRE}
z_yq60{ydNV>!bXiuYaBGht6`Ysg>z7=P-86L}pBGDB$>!8=O6HfXXRLm^N+<OP9~%
z#KGfqwYSilZeYRe3Dnd#&`@2$@IW_BJy|YWFqP_>I>y%5pmd(zKE<RdW0^W*1~sM8
z{d_uA#ll51X_z#FhT>t`JBFDvYXZC8dWmiCA4X-PUy+a)sL02-*m#bL3A33tp`QA&
z<ESdhqLgO)tJ~Q0z(c6hJ2}%ffDLTzMu^-}>;}cb61{iMpSWki3k>@<{%efol8JY@
z(G)u2?vuG$%OgF+%E=@NCyQLz%CV*VW|vrs=kAM$1rPg5!NfSp)v=H<AbYu+pkp$Z
zGFW+WedT3MS!{a?me4+CY++X`nilveVP~nfEN~2g$;4vsVbi!Db2sD4fs{?iRyV7s
z|8fbWz=~GlBK$#E&>vr(QeHEGiMxMAl9uhO6lmq9#yL!cy>j*!Ms$meD31L5X+pit
zmv@}~yaGEurfq*=1AB96B(S+>NY-Mey;aY%wcudu4;XVhEH&1zjL*u2MIv!4#{gQp
z0p)IoE$r;rDhZ9Sh{R6rMBAhuA9FDv`AO`6Xy1ljRbwHLHZ^~hhf^eu5I8PSTmskV
zk$&ENZ7WZ|zME8kGh5$0#On2HC{4S4nDAaJ;}1?o{C1+yGKH<bYcKgJ<YB+CaH4C`
z#;7cU$NLo}(MT4AvF()-g^E`MMbh{W%#L@vf~2a1LwWTDl1Hq`F@>VlMe=o|wKqlL
zDu>YDa-1AP1#>xMF!wtvA7~u!`S;u*?Ia4#bheb@43J7gE=OruIRL3_1-D$igsm^W
z<L1$t`5U%y!|GY_Zx8qI{=N&$m^p<p6Q@v>8lt;@gocSz7*ktCQ8tZA6;o1_vi2~z
zr|aq)zGj2b>gTF`jXP&^ziDcYI|>7p;>sJZ=aql?2exkeExS&%bLH}BY<uNp&Ww!X
z-aBt$^|HCN9NAA}f11T}C-B}|??T-)rj>Q_&i*r?H0>RITz}{NTsE<Y9lQ5)@MI6S
z-E}(~*Q{VtMV>u}j*}biCO>8si%QS%^Q~_&sNvM{Gc4P5GuK|Rp7~S9a{Rz94j*k~
z*|m3Y`LbCI_jR)O?QNXz8F4Ga*m3*Y&NEWKh`VmRk@|td{P4+_>FjLdOlOKMcih4H
zm5Uf|ImL;yO`L4(<eEF~=E^mfQ8L)fp%drn=uLCmy|**INHZ|l#@p}gV)ok2%pNzM
zRqK{<__bg1=APqR?9FiFt+%pday4yTU7S0%kJeNJV^s(5A34wDIrHc_xr@WcPSIU9
zox5+pf#r*)asJ=|T6%}*>+EIiH5+;T$)9kvx!c)G8GC8W%GcV@77%_=n)l|o+lwkt
zbcvO%x#!1UDH$Iy_kM|D!3raH9$a6!DvFMo>9rUetCwji=W+E(crS*y7f-C<?9Pe!
z7M}0d=6#{_;qHXCI2sa>D|AWLvafBgV|<m-(UF?{RS1EbZ&zWmn7ow5#v~?(Uo^j*
z!_8$$$-~cT;bKO)NkY=MWs2|{hg*DqoO0R-$Bk_yag6WJ)CifRJ)G}2yD6)dw<Bb`
z_<ms36*?OkoR27>Q>3@K)Z}&mwm?b0TU{KPmlnq)PTr+Q724A~X>}3iuy*kJNxR|v
z(B404^pyi)<kEH$ZI7AraqI~H^zV1lzOGUyZ@gF@tv#HCD2{dl?(zII!3_d272=7D
zEyGQA-e{muPBTg5i#c*RjUE}Ia`FPMS}~J-@4U{DbM00pS2VaCC22+%i%Q6doyYT0
z!Kon@p<N^{$vFg9>r3!kxEt>cj>A^qE!+6_t_9=7ALC_$J?D?9!lQ|JHm%$OQCV4>
zFU1xNWN~>#$7v|i*MF3eish6X>A!5edA#Ab8|-OixOyS?-F7XDr<L;9_aCRRv!9ZN
zd0f9h^Z2jcAuk+b`8D^mw7QM$yHBE1iqpr>vFYynxpu=6cE9jrUftEm((CT!p}TJ4
z^7Sh@_2$p`_4ecBmD!+)k(oJnu11R>_9DWX3AdWDIy<9CC(+Swq^eZP_Rr^uzEO!L
zpU(r)qb?MX%WHQ|;~syZWm^@ujlP+YN{I?NpChkT{Ef{hi1PV7`MgG{_?w)7&gY%@
zsZ{)<u`W5;6<t0b%_|?p5zC@={LHpEV(hV03Y9X)aztN~jeeN+L;v}I(D93J@XWrm
zsB~KN%>gMJq)Kb@M7nv;veS7FvrJhHYd77-t;<UI+rR%V7yEO<(E>s;Ce?voYEy*y
z<~HrL`4Eg5+FD-nLz#Pr;xaHG4lN2eZG{sdoM^8Tfg^Q8$f|&_%#mZ--4A;iQH#fX
z__wn<h_ov#g7M`F(H0sOBd3o|z+#232yil`Z>+D`MUVvuVor&PvyU9ZQKDx<@m^{f
z;*0jl(fWk&Asw_Sbj-WQMv%1@M)d3_999XrEdFv_i=8#JC2@nV8->C;*mKEoeRN3b
zBz(tV=<<lMHPQ#-0p(A8dA5rigm#oKAEVLbbnTONB50R5SI7qpHdcPgn+DrpaYQht
zXB(qSz!f@<B!QWUU0Z-z_#+Za&TZ=rhjn1vFqRTH2X-)j%_BxqsZ=zclI>#+E$5DI
zi)ilA;@0<FE3Zm&rNlLq-P5N4uUj8>KzMk|XHm*L;d5}(Po*z9J<YsgV?m4&eNf&A
z(yn$FEHIVF4;HUN2>3DV;pOQd`7j14kvuQ)($Vg1E+1X)Or_GX9{H#SN~eOhQC_#T
zlGf;a-aQ*OK9<kt(I{Jc_}D4b%sKOPh>zu7Yfu3i2&G(=;1huZ$`3k&w0~AQEWxKs
zAN5aMimXGq!XYRSNSAyGPn4?>Pj{`Ew(v5B&mG}>XP@T`g$g4t=Z*5pR5Y+*)nZQV
zeV0=gdww@rFQrCY5y^yYj*sTM(x%Rmzn>h13zX6OJ0kLv_-M9GByvW_H9VK>BmLB$
z>EEk&4ErVdAt_gk)9)}}IKJ<1zXvaIDSs3Qh1Brx9y4tF@AvOZ&QpLl3Z4FaWc|Q7
z<@w)n;NKT32-)=#?SBV9_<isX7h8qLlQHYR=Mv|VN!b<6sNV(m!V9%R1yh2D3IKJ<
z`1%0ZFZud_&L3p6LY(ot<UH5+_m=o~kmC36nb1QPRv$ED(E0xlja;cS$^t9?TWFpS
z=8_LSbIBYvYtCHl%5=X+<S=o)uZ3?Z;*Atro;k(hT@LwJEpG*8go&#y+or&ZUNrOC
zjONM7-2k^La4Qkl-rGeIJ0b3TtYe6zhVZ_@aQwuoD>QzgBqC+;MNQyPXy-%4vT7&9
zC0)Y?h89PKOx{ZfU~xm)xuS85V-&WNP7GT3<yKw7DdWEK;io#$UTAoU-3)`Z(bq3P
z3b3XuRKpzuO}wjDV5MEKP?d5huZT{_?J<Khl4mC?os!Dg@tqTCp{(+odkzqcrJde#
z_roY*V!!YLKnp$n6|auxkUOQONH4TIo1i&MNG>qVid}xez}H3Wm<BQL{s#CN7XV*A
zo5itn8@u}*!ZwP8Xhx?vNkryJj%j?~ULK7d!7JWI$tH$V<mCP_KT#Hv)w78_2KI{@
zp`RBvRgxck|1op>&R%HO#M(+0u3@$l`1&V=PY5FPXv?#ciJL-l5E6+~T<Q?#!NlC&
zJTCTRK^JZo;Kf!n=B0L#yd5p+C(=NMX9+N{7%G1(IaMsTj&)DOf*~(EFNt}59};~+
zOhf-I5OemB%nt#M-pib`y4wE~s7rhTHadqd*~dv^a#_rQf!?yE<pet(gu9mMy88OB
zVU-ldw0bN5UMQoNv!5j$qmTkpb#wUBKl%vg_w1s5(EJ{Q^2aV8%RP5o#hC*~7&I*u
zXq{o*jrY(nbc&|Vp(H2*97#eFT;o9Ql!8>2%Wt`hlGfw2_UB@!6ACF%<rDba7e0Y*
zI?nlyKIgx&L)n5UQ^LyCE9h%!VlZ#;Ofi4`Ei9_(=2UZcLMAhg_l3X$&Y={fi|e`b
zuIp$zd5nP(jY?H>$GtbxcIG&JLwR3M8_+%$))f-OqrxWEI>s57TzwyZaNjMgS-G4s
zB_o_Zdja#q8_ou<uN_v#sZ1E8n6Y#t_uX|X>z2=Fu&t57ngx93BR9}=_yGB_i}=jP
zZ>8<%LE8F9EY^0S_@BJEX{lQ&kDwCogCyYH*I+SM*16^Vb5>vF>!g%&-~Kgnsdy~w
zFP_<jOabqh22#F2jxd?Z;=?6#JUL8~5T<dtL5YYqq-b=Nz9RM3;aGtW#=a)`*7>{B
z&#<1<HB1<8Y|!T0ypn*;II<v-1&uO=r4o7v{$kn~T8(Y$^wmhb9a|e5R1ya-F^go3
z5^@De%J`J$FG|eeVnK3=6j6#d+G0W@bQOMqFNQ}C-eo_6`0)xLBW>Vo!k*S2(Ab|%
z#0lURBm%ks4<oFR!})3<ogGt1935u_=OS-(IC}F7e=q`?D3Ta^7zJ}B{4%+iAi!AW
z9$U0pK9T$u%9@N1$BzhqaQx?SgEWNs7ssIWkvL{7R?7JRQb+4UMn2`na7otc<GG^1
z!Ih56VMTI&V!x0+fKWpiR*W6k(Bk9!ouAKLk(DW_V)K@Jxb6B)%$-oqiDRc2$}1+$
zU&}}Czk@YPXLIrNF**iDyyr+7|I3t*;kNr9;QIAT=sJIrmhORgk__u_e~4?Rjqu*l
z7NV=KPN$SZTpG=M*4W337zN_~%KuFnr%>E^?wV^^yXp$ASvkoCNhV88T^;o`)nrmB
zD5_%Zx)sz{R#R6~Nh&7n;_?de{hjm<xcj|GrKzf^qpr4^Y&t3mDyu4^d&o4DmRC@e
ziGF~hw4#dox>`z$Gd5t#Dyyiit7Xy3wM?kUxN(IiDGd|mui=XIS8)C1^X+|eDwUzC
zx|W9eT8c6$Qt2!W6K8PGeRnfs;y9|yOHe>jaT%F>ADw-1;i=M8R#wF0D9S1-DN3i%
zKuLKe^>wwB6lVn)ML25>TIHFuVm*^8%c-iWBwI0#bt@K<RhrWBO6uzBs4R=-)f$RR
z%PB1_qo%fw>WWfSDouGsB~{hc)YVi)m}w2EG?mr079Sf#u`La$Ok2E^;WN8=`h|Dd
zaMvGj)#CAJttl?8psuctiqhhk(^8a{S5Q(`L0w%v<t0V2D@D$)<DG-DvGe%MM{nTJ
zJ1_Ibdj}cOii$B)x$>$TSi5K{3s-Gm(^VT7Us3GcAT2YBEU1+TVl$staLD(G!ayVh
zLx4mpBQv)N+_2a)GMV;{G^9%@`^WV&G8w{jBok(>gq*g&O8{mZE4P+<yYW<vmA2#)
zF321KDDSTpm@8X=8O;1df%RYciVEF$cyAFQDMygG9Y-cHlV%JA(K)TPsS^Dn7heb;
zuTd~oVd4@#fx}J8=M~EWI~PN4E>zIg*u=DtN3b?h4)3s^nM?YB%S<jATN$NYrLMx3
z_V>Y@Rh(~%dHtk}eKwo*Bl88Oz}I}ejUnZ@!BqugAFX^nTtNA9nZhE_-}r8ESvkc;
z=J{HXEh(k6I76nWl$yGFYHO<^Ue=(}S<1^xsi>-nc{4>(Nf{LtmDJVOQC6H4(Sfa;
zJQ)MbJlSPY=lP<EXNRZ5Soo5#z4rj^z-IDzviNs4a`z}|XA^B<wR*Z+sU#2J9+hJX
zbggYM=!#T{j~O&7<fUH~;w~Yyi*q}O@NaMDPZ_7c?dm%GwDJ3ROlgnsK0LcoWVp`Y
zApROTv&E&P`!Djs^S`0=vOD?2b&F8NRebR)U*Pzw&#>cS9e?)eEttq~W{tS)%KN{-
zWu+I`eqxv}ed!^}iRz!)87sK^<{MbIbUJytV;PH;ahtnFc}ar4;Y|hZLzsMNonphr
zB|P^1C%J0vdWv5?L4U58+duh7tQ@E486M)bpZt)w+BD_$v$*fUhnTo<4lh3PKY9AS
zb4*>hh7Wz@BfS2tog8WIN2wG`ul)!g-86&t{xt3Tp5u`x-{%Ye>8m{RjsL;>O=-UT
zpT5Gczxh`*Rj%X@KXDU%UA?6HPxJ7%eu$pDmOuW??X+L)WXZzu@h>(QXAt`3^IW!e
zC9gd7B=Z-pq^|fan)?-3-uVaIc-c5Qd;8e?@-KO5Uk{sayq?+9XK>?fx3O!-YdpVw
zKMm6sargZX(DGsvyN<V_Ddkgt{AaxK-T%$bbBZtg@gMTycmFr%N-yI-eEcScdiznm
zr}*}FenfNch#&ySjr7ylImG0dtN7C|-_N(d{!?_%MS67|ciw+DW6LvC)s*q<W8Y=_
z;U+eJ=nuJe1{`ndV&ME9e)O9IeEGlr1;yug(?%5?`(EU+XLmAh<DGo$y7^pe@8!gf
ztvvDifr6G)<mkBA#Nqu%IN8!iU2O#w6BhEt&ptrOa4)HRH;;YiUpUs4;^Y7EzcX&=
zBrV-J&h2@LU%Y-m#$N2;Wt0}D>Fw&|$eE+`_YW{}g5vbyLyTLzf!b^_hfiII3to3o
z&G$3yJXMG+!dBK5;=I^Nup#MRhPFf&3vKu$yo4BDrj7dH1%(?+IZm?eVp&uYbx$v;
zpBT!mx=k$l#Di#K`-gLd_=tkRofqSZ&#veYTrA|4yNt3f@Cq~27wyTP#Mca_JU@`S
zx}rjxS_${i=fF=4j*7q+0$AKcp01uhhQ=H1bHW!91+m}WhrckphQ43$<?>ZwS?yTP
zU)%!8$>-@Tc;Ac3KnZzH*3`0@KW-S0uCc$Xn!E&NeMl+$*b5s2abnkbSbNKZOzn7s
z$6nY6D$T~*KgopV7uYp2pPN=shfFCw$KT?6KYxX?=_~o~U%rn+2M?3V5Af?Jf6SOo
zpXU>6t9bWdGxeoCeE++TbG~Od;m<b4k@?=mOtc_~+Zi2IG6o3ZfiQNoZ#Tm?rick2
zCTsk`+C^}rw+Qh{0F_wmXw%LP8#t{P*v6KJQNY83C!{W(j6!#>jVRzz1ix+S2vf$m
z3gof+TQ1R_zCMO>d?bLj{XkbU&p-DZpr}4}fg7sI$>&FCYwsYJ&ymlM(AwS^;1VH{
z(<N+NK7pOjWLZ+)PJ87BrZ!|b&{4tdw_n8zzkZ3k7Z~G+TL={L!P8E$Lia93D_^mK
zB-zG}R6Kz-llpn()oo;FEvLRLOWE{QTt91sfB5Ua;cx!->+Cpr5rC1tR(}5d|HC()
zc%QY)=b@D5@H@ZZwLNF;H%nEzmIptyk)M6{5x(`Ue`W4fH#4?!kT>^ruwm&`8fGu2
zw(mH{&JVHW{yRAS>d$%j;YVmLU&N|8<5_?0^&Efshy4B5|A~Xmg9S-Y1Nl@f>*rSR
z)*IXCDx1Re2~}hpX0m1dWWN88f6L$e?LYGTd&e2<JI@o3{~LP`9pv%v{R>;SA0UUK
z@z4&QdvhN;6&pwQuzg=UYnM!?Vb)S=2TpUWagdwuzLnG4e#tk#`E4$g%w^5|@izF)
zB?@vQTxjZJ?y@<wG<L9P(M<X-HqqDD$=-uUXzlDGRWpYxmdzlgAYE2Q)803D<e$FD
zqrZHE!4XjTK3@9yV|?caTbZ}vD(cFMm^5P+y=Qjw^B+IPv+o><<P`B>UU`aHcf;L$
z<<Gvtg7OY_>^{f3>ux4}Zad$6_}lEfP|D_YbIIp5>C!BFUVM^=zx6OrZ#$Go02)Q-
zxjp>onfJN#Q(xh)|MJhdcKLKtKxgww@-_1rn{MHJ`+%Ka#aOs}E;rJBdF7M4{o+oG
z_hCRgvWa$fuq#wuppQ8ygFE0YZ*IBr7BzvdU)<j%9zXVUys8zU>y7WNK;^trX)#wA
zvaZ-NyQTzvV_m)CP!L{>j##{V@0}jL(yrhNE*nV_jkWfQ5s|1};o*|DygySGw(_`4
zK~@&+<C`eza6abg6};CTHo<xE!7G|{IF2s(UfObW=Edvnd`t&zggv#{DKY>2Wa)N1
z&HVml-(-vh_-TUnzCY>!Z9l^OT{u%fMmojvnrGkHLf$hnO656u`aI(%&0ynocW~q7
zi<mmGf|DnjX*zY76O9+??uXS^Tt!`J99JWqJo&HR;p^Xilw%iqNEc@~zT*|X`|a;?
zR!w8|!twsX%@^5Q#F%2;HIK2E&!`jytTMXFfHJl+ddECv4#)+mT(C)h_I%leo4>J?
z$OB2+j`ni9r~Yxn8Bv@C1s>Yh3+bzJpfGBMR51yA+%F2VL_b5?!5orLtw8Zu(iR3M
z>4)a8k-cWhW!$m6jK^R2Eh;z6z5|Wi`q78@&@~I$z4!PiSe8&#Jj_V8mYEZ)>CZuF
zQ4#YmznQ8tuk+ic0c&e)j7IZi<b6HXi9&0NsB@PmWVP%*D4>4PI%ZVYaPNKBQ8i%+
ztEW}-nyO;ByOo}S5ppPU@jQcl?R52wP|`O*swgAk!iB6#RZvq?%~iME&Ds&o`6K%o
z%BOhmjn}#LBWt*7@&pdP{R_HBveed8F=6dy9-KWuQQujbdo**)is|g^VrXc9_Rb#9
zwPgaWoUw@c6UTA=mg^WlaW-q`PhtO_6zP#3+PeqI<?`e`JdBu0+BX32etR3YKCqH2
z$2D;5y=Ukg%1~2N#gw(1_}Glq6c0A>e%DZhquKh^8m)6&xX{Mkx6Nkj)*W1N?Nyw3
z`913AU&#a4%;T|t`xVXUX)G&BM>ZSj=WNqOMsgZd9)%(|(ob*SFe5{KkS!sb&hy4o
zkCNWJiI05#EA$`O$~PW=nc>(g{UT$8BfEF-)T_Uxxuu!T9?jC~YNpRz#RDIiMm9Uh
zzP;TjrO5YnaIU2bt)s`u_yt$&Rrw*dKlc;fed$SNuD*lM-f}f(Uh75=b@S}jACT4k
zTzUUI&+jyDEDi3SsMt%~B@1Z}N0;92lptLt``F~@e^JE{fkqTYI{uF!D^u6l!Xa>u
zD3OUBKe^+CD^{%SO<`jam<b3uwum=N2u3e$!`y3^#K*~JR(0lPX>jt|&p|SFfS7X<
z7k@>p;&El6WsKsAGl!{(Py3_W0c+agRoD;w`?iwFFWilFIOqcM8M}Ko6>HhA(0IZ>
zrH#zof+r*r=F19#c|+-f`F(0mvXAm)GdOwJi;?}yMoHZaKHg_LiyTLeEo9P86JIWq
z5&a!<4<qe9N!>k}#e=F{TQf|Gwo}I_y6z^{G!&7|O{BP_kKgoUc;E{k<=ksO<HZB#
zSTOA>((zAHc3wP3NB;;qZ;G7}y1RSHjSMp|l%uq?)SJii5h1<A%;4k|Se8Rv+_+J+
zh`FRK4e5mQIQ+zIP=R#D*P=KzL%E{RyUc5=D_ndRa5`5&VaKuDX&+%0^pPHG#|gL{
zkwfGw<`(};3NF@c`~@#>_b{AX4lkth^P=E1lqlFEdzClL<};tUmuDaOM-E*aqO5i<
z_gz`Tmp}DY^7X6u_Fvr16L0-5hJjQnO^SS!vl*acFw4lr1H9Y<*DQdZ;S%oHaxJB2
z-{!$(E0{XHo7FSl;{8*tC?QuydcP`!6+s0p>>7Db{7xy`f8FJm^YRlv;I;RfIF*~g
zl~*k1=^yN4V9a$~z2P!W4;ImX<_O1!m1h8#lXF~b?d0-B%b0TRx14Laz}q`d(7yX^
z_MGdbzA{74;4pnBf6MvO4|C6k9Dn)rNm8jk-hF2etIJRE!W##vs;lGN#TIf0Px7Jl
zS8{0Z3@heM<Ul-2WFWU%U$k-~$G820=U&;(j>Zi4tX>JPK278B7;d<F4Li^Eqq;9}
z;A9i|+z11zlsPk}(sQzr_U=Bk)^xPBGHK~zW=_7yne#35oZdrY(TBKuqvo&w{3J^C
z^4`09$y7G-(wqCKsHx>_bB96GB`q|~7cVlYb{YHM-^<kxK0xP%7E;;qr1S{+OeKpK
z%%uE4NdcQh&Q?-2l`A)`qpWu-w4I@=KgZZv<2ZllU0zGfU<LpHAOJ~3K~&C_a@)L#
zWPo7@huEsuw4Oi1@snrJN|92U{kwOwFk8v1zj>2vMJ?^8FJQe%S+QLbjw3*8O=<N+
zE?>Ktmd0~TnLUNR&J&E}Q>0RG?#LbnitD&4m2z^3hup~>4(pRLIR~pb<D>rehu>C`
zj3GiL+7<fJj3uuyIme*O%d_6rzVZpn(SR1==kFxhhC*L?39wLM=15ZuQpd?6Y#hb1
zDJ;SSX|tj5hwze*u(MH9?$~K9Mzm?k!^Avr)BpCBvR@d{V7bTQfG8Rac%o3X$zFbg
z%lL=FvzNaMq>%|Pyg$awkNE6%6Y=|rq?5SpEBqN@D~Gqi)Qzjcc5b!;$6`ID%+d~W
z8wMZ$*bQxgB>P(*v`JlwQDfN?exLMtaF`|UK}uqh*iAL|D;QH@(GtCSI~p0Om_bR)
z%d}LjVSaum9lbe<i&6{?=a@EYF5~N}$fVPO%trT8R$ac473VT6oK(uQTbo^M*t{nS
z2(&%#=?1?4Ap5yODskIEz;TYOWK0R&1rIA@7dftWo?~-5+Ua5YIn0%k{dJ(P!{1_O
z$rLZvT?0RsjIE6V{+Fv_pD@tcMOq}YZ0qA>n<e?u>^{kco-CrRt$XAywBte3HCr)`
z&;7|)82arKG*QRYv4dRb@1t9dVeMrL=$~*ILv5{=f3Cd$k61dqjc@++O;7{8vEvMz
zmyG4vQ<YS99N_H5P9FQl|D?J!!<eOYtSav0^u=C0epB%;Hki;dn9KWiOoEQ?jd4}3
z3eH8u8x>_!r;X#yt<P}yY%{0MHZiugoP+yz@!p|kmaSRG^f9FzKX{0afgGjTJckb+
zBbO;b^|x^N^hHo`;mm0!Endg`F&XwAI>w&e2bp=<8kWqR%0PP~$Io0KpC6=cpqSyN
zy}bF}VMb7#IeLK1m^rLjzL1i94@Zxk<jnCSq{hx?;p8%QzrUL!2M^M2zBHkn;U`X=
z$iCNJWY@8?oNYWud1V!64)0;--jmF{Y!&mT)N$^_5t=*t(77Qlv<<O({Tiz2;`o^s
zv{H0jI7|7I#Vngq%i$x(7#tp;eIUbd^M1DNJj}3$v&RmRs-MZ~<qIj*eH=S}(thLA
zUjGM%hDR9gZei#9zolz<ghTJ|;pC|k3{^~J`TPm&ed`UH+S)jF`U07fQaYPXa=yJM
z>YuG(?fND3whmBVImA;>KF#^AA?ha1X5)r6jL-J))GxNu)IA`4+!fVDC8cyVp5$z6
zwA-c>Ts(Jz&Qv{XS1+TkM04WUQ9Ao`6c=Y{Jbsjpfe~8?`MV*YRhmgN7qEQkLQ4A2
z@bjmhr+rYN`r9~m<^q}&#l;z#P8_3sAo@15vILU+mkfO0mId2O;t$5LMX2aoXoZF{
z#g8i@3_fNcn~X#sNpgnY<oxzm43rea$>ZoW6i0b+7~-OW<Hj~xoQv$EJiR>vRZb@*
z#y9=l?>h?2g0Z|`Z4k1_wkpxryhr@<K_R<|T^*~VE7~kadH6f|qhBQOea50~Zsax?
z6Uge}D3Vezo>NVX@5!JFo|l*<;{abab3DK_lwaa1=QMrgJxB>ml(I?P>x(b5ZD0x=
z{Fm(S+p&NhJYOmtmJ%oM<<5@`Go06)-oJxGjUBX{I>3RG=Qz{U%ktGL(9MVW?a{NG
zIeCKKVMRLM&(Y&&$;U~4+LBc?FhF^IJ^Nq(HQV-|BX6<5=L3@$?Vn^!h5Qn7jkS@5
zrGT#mc#npe);_N0oR)vV5D>~uZDn`&%!)Z}M-wXr>|tt%xPjM31-s_PH-k2a;?l&P
zE!Z82iNyng3j;pFECi$Z?4s9>shnI|2~IXcm<5Eg^GjbWtsKL=*%Qf@)-Ze4EJ_D2
zu<zI@_8n<q&BnEiFYe;eAN-8g-eCYL>nEVPPILHlD_SYe9^OyI^kvL%$nm2e{)F>A
zgLHSa(bCdFd+#v0_G4^6&`4T2T$NI&S##&cb-53+5zthP@lmglvX!i~kS3SQM^Cj@
zis%;y%u}v4`CRn$;dK1;;(R{u&S^-g6y_mw`CJaI6lweG4$%{{mH0J<d_IS^zt|9U
z&FAy{f6Tpi+#JVw=lQLknK$R0^SlvwkpUuu1PBm;AOe#}krJt#B}X~#+PiD(WXoso
zdVRZh=ezaZS+;ChE6a)$MS=iHkPtz{H|Lylo_D6}{^(TQ?*M`=`R*E@7c)~`UG>D@
z^Hk_=-BhY(J`LG=60#5=H0R033TMbj1gvw?c=ED!z0nV7=35s85~*8HXVzR#Shn6Q
z;WMAGtkNh<gL7!q>~d)fUD{S^$xhdGlWvd^^Qq37p*KxvNNKzQ!YGqy2|0Z6mp{f2
z|KlHXduqjaKdS&tnlW|Y4)B&Fg!4uT&5@Ht8gkS(%GP?rh0}t&MWlJHOE0F*HO+Wq
zMU-|>9Ysp#ZY}A|Bsilz_udkVX0N8BmDnRZmlP}_aM`!FbzCJI0nO_o-la3A8>cm8
zE=T$8UXY0bdr^*!y{RpFe?^xMrhJ^auxLH2b0CUZJpD%d9=$z~4p0+CuIB&3mW<eN
zK6?KTVik^d-HUx2B^ixwZ;*>p!zByV&b=D>S%+7X8)iM!bLMX^7vR;|&!)m#cOz%f
zN}^f6b;=VJ?fSw)o5Hg?W3%B3r^I;cjb``n(=?>6+xQJq!Fp*tU0+I&I{TmdBGnUb
z@PGX9I{19{6ZTy^S@@v>$A&wKqR1u+{Z>sj*2bk1S98&IAi@3So{f!0rDwxUUamUW
zcxGX{yIm)iInW(VPv*74LcZueY<DWKWh$IAxHW-!$E<>o)~p{E&CV)?_1m&YSAPgw
zc#i+y%XW>kk%;mJB~8Pp8M25(oS19Hcqz?X$cm%Vc?*=0ADOr(B-U0X%_kJyn><uj
zS4$URQXy1%pskgB*v93^MloQy;iNq!D{);L73R$;Q#EaH)D;Ej?`gyqA8s&n9z~!m
zWqDxeUU$&lw>#%zEbI2}d9atIe-<u#rkQP8)a8aKLU5ebqQOUMS~AOXbA)sW(VWP;
z^kEvJ@P1eOk4A$liYTt3CPq<`A={OtTV{^@o!{zGz(%=8DKlyIj+$H~dc%w9{I<TO
zhkxPlg_-C(HZE*HvTaHY%r3h!JuI4x6`<N^iyj-@VDetwDYU3T#g*(TfNu#$m6g~3
z>^jQKyrNM@4(=tg4Z1K%Q<rfzEWx?#Q<bX&;KpfzwI{{t!=&DA-WvnI$6kbLBo<IA
zx>xr%+WcE=tLpGB!+80!q9FI6%N2;-p~EdEORv)I(y}evV33N83_e_TqR^ts*X{cv
zs+^QAU4(7e=HT*scf7IW6xB&x@yP4HPCxgfQN;*@HPsfKAZ8B+6rD>ciAzo;CbG)H
z@|xol!mS-_6ZP;carue56z>q^-P?69?AfRb+XPU3MQoJ6-JOYX_S&TF+3xmn<f8hX
z)4p|D+I-qcMKnKFWTTwe;MW^?-cZNHEEkqq<VLqCgEXPkjvCPw{I}S2_;k}OJoI_D
znz#YSWux9?(vuT7p~;CY)~ulL)Us?zQ(3}h!s0}@glcslTtZVQ5W+s`!4UhmE#s&^
zO!}k)!d98OH^%ADQlKZAc@E<aE0bO;lUHb$(xJlz?v{g@G}*k^G%G0PNm`8$CB!KT
zbxA|v-?+rtr?ersM*&)?++8N^-x8^ZnVp{9kiglrTY*!d#svg8PU_w>Bk*wG5w`Pb
zPv<-0P-w6h1Z?@))8)!VY8zH{=d`rT9G(=J+5-=1JE9YC6dvsA!u~Ed^etTFE-*8r
zi&bA*HlontS`twszKwv=xn32gVtdYBH4lK{*oagC-7PG$E5{a8QK>=4j+P1`Rx+o!
zh3yNfI|z@mq|&AxG)>A~dkU5Aq^k|H9aXM&Yi_)>=$39~u$3)6GbXkWoNFw-N!4Jj
zCR8L<nzwdrIsBOwOu%pl7w5DKR}`L{4xDmJSE`=vCT(*x?a(SyXjfTQgSqRs+N0u!
zLMY2(DDo3lyQ(l9|8~=B;gza83p1U5E$gt7Eot>{^CDgJDk$Re8;gE>FzvXkEfY-I
z?6(hjLWm`jwS_qrRzfaXY~PWNeo$nlh~DuXxv_1lUln&%S-I^|+V$1CPU5N1VDoRu
z(b18WtXec#`^qeG?8l!dwru&UV@T3vyO!R(a!?Z&8{EuK*Q1MUCD~*+@==_{YTw3H
zsSH?D8f_Vw+L7i!235{B8#bMmEeJ(Pj;~pAb$&~f!75Ilc6?Fr)*;gD&oU%i*UmX<
z$xt;m#b(q&vE~A6b>yOSXaxl8w7pkOm9t&nsjGUrdtKjBmT<RYfro08R`cpqp6UP=
z-BAt;$L&${Jq~+R<SI1jv1Q{T#z9IuArK00mhoWA)DHA2y4p_X;%38lr^E?TZh3k?
zg##?oBTFTl@dgi}lo)&&`f%D4wkT}%s>BN152v))SDjr6HqPM&bJx<cLZh?US!w4!
zhwYHw0f41CPq{0`Zjh-;hhtN=%Lw<isw|xVw6T4wj^&uvGQSn<Hr8|6aoLMyXNqr1
zr6VVCbFNZnUDZHq`-2AuFu}^XMzF!Rgyq8yk9HF6!4h{sRFX^O(@r*oDM=MegnN+W
zw*QTXTJF_}W!<)o3rE1h3<?|l1$PivZCe$Kjj_sgo<5}a8k8Wa#5fmyIDX;c*fbzV
z{*K*Pw&~Cy6rXexEZ8`Jwdqpmwg-q&<R8uMtcDytEBx7XTXv$%hPd$GBWIxu2W*;!
z$I+C2O+T^ZuG)^RQ{l9s&g?n1Y|vpPPwHX&umiV>?1jrtR5jbO5{{2p*E%->DLS=u
zXrvQXfs;H~m}2X}5|<-i+qUiCj&jb#IrU=mt1UyE7-1zFDn>ZIt;k)mS(lC#o0V3E
zX&Z@(Nd_?%#;I-D!nE~lTC+=U4i74(c@-wqE-j44k*R}~j@%^5#DU8$9Q5(preh!G
zpKa4BUxqB)v&;fbqK>lMfm52^Yx|CL1mqfUsNY>ec9!|nr79cGym^%ZOyH6WSQ`y(
zMDcd)J%sDSazqtNY-~0Aa`;sAY1^@|&p9CDbmhuREafWn+dK<3(ecO;#leJ1hi8PR
zcCFZ~6A+y~T~qWp{kcYkt_0OXx0<L4x1W2Z?_jDid*>vM#x?dp&Z?*|WQ}I55s5d^
zvmc77Byt-LRTff79z1oWYw5}Pbb;~)&YZexbCyZD=yuVfE<-y_sfoLH`O3B@uRg4w
zwmm7jHEqRZ)aGyLq2Kwcj@Ge*i)E{7(r(k|(Y-q0a?dt8v<MHK!bO7}T9rE9Z5$|p
zTIp92#)cVNvrU^W58W!ZNRO`?_1rUr(d@;(&g(DHehc-wXvXVq+veRWyN4}!mqQ)>
z3B@K=c}RN(LG4pYhc0_Y*up-=@1@#@LW9c>Janp<VFhz_w#lv|9l8y*S~GD@9n^TG
z>H(#qP-s=?QxB`$rctFsk%{rC&U86ETlIx-`>+?^EIrumy5z3TtO}=1^;*jh6?%n7
zzN(;HwLSQ+Vvv&%D%V=3WbtWUB&BnmRl%7J-Pn$bJ6`{HVw%H)JrGdnbXLnOTXDcs
z^<#B)OO>sSwH8@sJ?R*uL8k)7(o~iub7v-0+DWydFc)8rjIAWyZFj<{VAJQKSFNe&
zL8{Cu{aIv)ieR;LXUmq9vETt`f0udQ_7!2Da}*`r#Qk#*MfmqG+bGC?m2RSxJup2#
zCymjL2d43XFOEjz1Go6V-TYh^|0~G$L1}z&**+*eKc8%W@hwKV`u|JcirQc_{Xgi%
z|C}~|@%|dMYkm%Y{ka-+s&3d`@h@%if&KNtY5aMPUpyAx<8tBM+Z4UupG$>5m(%=w
z$1mJpAGntXKYsDCFk0FF-(t9D|Dq81)seToh15wI%?<<z_lVu7A(aPJI=pHL8SH4N
z!Y4q8b=q#-VMdQ<OG6FuF&cshJuS_QEJ^C>>q$%S1Co)RHu@%)DX(cDH#HUs%nWza
zF*rqjMLoq?i2y844bs{@Oj=PjRRw8)U~OTHrq*8KvP!8h&jkeGwON|)wxcKI(pX($
zyqi^Dq2*Q+%Yihu)>Y#72?Qb9?=~|Nj-|1_nuJ&%5TUoVh0!IQn%X+j69XoV9rR5u
zQWmTuFFhWROpo-^IWR?Tc`c<m$w&!{(?hg%4Ut+{MO9&jLF3{$&8<DeW|mM_mWL1q
zjV(>>=!rRO3YHjmvxJvvYiec5m(1qcDq{Qs2+`Tp!t{E8O?5RS#`+K<LSI`eBa0EL
zYih|ziUA~}y&d#TEK*WgOMXTopff$%PiOxGIi=N<<)tE}WNBuE*3Lmv3MvT}Wdg9a
zI7v%uH-U^|>dN!cOd4C8I|wIavng1DU(?XTOLR21vZy7|P#Yx9FF=OqZf#|1%}0G*
zElDPg108J)&#zNeT}NiJLE~6oC%xkf6j#(zkeLWbX2$yI=o=@yBuIIF8WPFU%qXoL
z10>~@Q&XA^5<<&UG&gtS&nTk4q5w?@BB422n%W7+XR)ce)Sxl4OlMOo3nG#F+A87$
z8psg6t*uO~`KYU{B{|N}^-xzkL$e_&tLw;2i9sM4@9&~#be`hM8VWNF+nXI5puKmL
z%;F%G1?fm6%d=y&wDpmgTSi@3E>cQDD>F1Vb)cmcvboB5e{f`dftEY1tjA}uxuy)i
zrXeFMbTqdxPXZ0KL6gRHdfQqVUxC`%dQ##I8i%{v8Jt<AqPl^s<QNc=$-!>Ahi53N
zsH4dAkGY8f+PX){C=OC((zr4|PIF5y@i}EQl;<LmgjeUdd%F#hn$MP+VvwLmmT11!
z!dh$wTk0zDYk*v%{Z13JGJ(eW8sY*12-DZv!q~E;rmlh1cpsoM+S5kg)Cv_fo5)T!
z-lID`+(qZmG=&un6lWzMfrW_yTDynID5{~lFdZFOT^OgSshhZ*GB#J_p(BZ`&2#5w
z3o<32?R6#QV0wkS@84x5Hk};}RcIE@+`Pk#jAQGjI%53-O+@HvZenCvQs1!2q){@`
z-A3Q!66Lj<$xV$xN)u;>CdsedL~&*UAXuClprvbw^r9Mqg=t7>`0(B4ZsM}b*i>P>
zPdE~q<L<3C^prfd)sz?@sISm;)4-W+4VCycVPfH3X7pGZ8)`|2(M+6aW@J%PTc_eo
zJAIQ&lvUT0mlkJYVGkXHljN1vQIeAcCeE~W4Utx4=-S4amL6hEEEE<NHg&KP);9zX
zEg_vPbyWm>0$8W>ZWGht7&g}%I0F&-+FBS{(5b1dB|RzD(6xm#!8-D7EbOCmV1k^o
zI?8elEL@ryHgKk}ilBmpEv-GoWR_4{mgnG1QwQNh3ugp+WSRE6tt^^Y80$A^>}qai
zDiokW!NUIbHij402`V@<+Sf_n_yWb1wG^0GI5XN$NB;!br9sN`(hMw|9ig>jkmURd
zs*AG>EL@tRrNzV<8w=-XZSG)cUB`}?!ia(Xgftpzs)#dvxTm#+$u%E!wKXKi`3)@W
zY-4zKoywY8vXWyBEbQ;1cWj>GiW&+tlN2l*Bda7xMZSYGt?hjz<dsuvW8pN-&0YA@
z3TUV-L<m77G*4?&8=?42HdU7qu&}VXH7d?btO{ys>O45JMrCz9S;;X7A(<HHqI+bH
zqKaA@XJ*F-Xzy`j;mX`NEv*L5)R!ArxV}0|)7^Ho)B-jKix3F(@B%HjTUm?Cq_MW#
zz(Re6j;0poWIUVds%;wk+FKc0hML-X8)rs(+8LN$p(5Boc8Y<8Q-j@f4bM<mQAcrB
zqJlFcWE53XRcO$-IzLWROAqncWi(Xexv-Gbe74paSSTaQG~H@uH8z87brtwDBgVDg
zxy!7KH?h#jXL?(k8C{mty0EaFzNux(t2dF8;>N;)iUvxu5|9WMCk>oYv2bl+oV!h3
z#O0JbSQwh;&W#pA=H1lRyqVw1zU-wWCzbPTyi=wC%_PrC?rcD?ruSl!sI00YMiU6J
z!eCP~GkP4Q6_upM`4QSWGrgURtmqV$R#1={i_mn!Qv>u(t&&q%Mle4Kq@;gslFq>?
z(sPRmR%9Z8iNG@L-9sd2<`XoHVtyRjT6>5~&88|CL_@H)I7DYtJCTGms)9lM0wU|P
z^xkb@B{qrjsw(3B8nDbrTN^XPQBqz>dO`r9tuxo(#mGv8!jf_dGvW~<LU?+JzNr<m
z3(E)=rT~)u&<tGzQ>5h-6AT*8HQBdHN7oQZnfX)&OVAK3Ob9yKdWcKOrZO1BF9fR#
zLv*)vu%3`cWmS+EpCBBXVW6#@m6#;TDyv9}(U5YP@y-rrWE>@B6=WvGAjCQggFTEa
zuajR~MoCs8LPXGKhv}PKA*-O2>S7}q8VJwP)jvsUP9ecy9!SaLK!}dc0TMIv2nNd#
zz`~@D&bA)nQnIKDR^b;KtBb?*w00sAQmLpk61H$?hT)D5mi<YTl~<Ay=R@ktO!agz
z6^Ws!w1S-E7!V<rM*0|DguLQ1%CZv?M3D2N^iC|1m0wJCX$BI>pgvD$-vlYy1ylzM
zkVvKm!*q1?laQWEFj$F@us9u{qotde<V=%B!P?RoJuMxq#-~zV86@5>2#01F>F8il
zOQ5WxiqtqCy1vXze>W2mABCkA<fX&_>#UCVGqj+STU<s(b|MHJu`ovO*aDe(B?QYd
z0Lidiprdz;)T{!ktBa6QGBd2x-quHaS`NWr6-Zc~8=#}917BhWRaHTJ0@hb1=xJ_4
zk4v%T7zxia($UU>NT9T$inKT%QeR=Vzl(`=jl$9j@>64wa-H?b0S4#7<QA1vnUerQ
z$G<dA&&V8^`4){ZDi&$)9wj9+pX%yjq?F8#LR(7@@u}HVR|k>0WOZSPw%cuxm`<=N
zhycQ&X}a&UupXC8WmOfiK7rI18E$Q5PR5%)>;u-A>+5864GK#uDM$?fVZu}W^v#6G
zDJmzJlK@EiR>$ZZnjs^vgkX6(NEr7m(bhFga#jJs>LR2rS(t#9rY_=48g*R~S{|k4
zW;1$X8o^+-u|%QI(Q)H0t1(Gb2CE1N3ul^`ju<$T66-@~VP<;T8D5nXmR6FV5&)qS
zp6sW0YQ>;2HvxeDwQ)KICrQsMp}NdSh9(1xv~>-VoLN9HSO`+EFbS>AUBsql6AT88
zK)N(c+wE2xXEZ>M%+YnbnU&Z?Q;r%!tT5El!b~`xvhqq&;|(mF>+57>*}%fW^f-jj
ziA)dDH??A5VL>uT$v|j|&cR6o3oEk>EbLpRy=#!9jC_K@Vg!PP321NaCN4FLU{w%J
z2-X&d>1ys^#aUaiNAy}u66KYZBm{iG3S;eUOk?0oMq&&?TW5Ztn~|k3`6cBPXC@#-
zgviV=y_3si7nBezN&_SV;aR!{CP~XFA{fjuu&{rP_Rc|)GV-VjmZE`;g|R7FCN>Kf
z&Qw+fiS-#iJk-|CvOkfsib|4WeMq^?L{|sXdK|@N6=a!MxH#0y@Dk(~mr<H+V&U8f
zeG|)M=a*1jVg&KQ$UL3>6QpJr5)9^pluQk-)6p?NVtO9IU^xO<obuDz+D)vAGmE42
zwso?!?kr<lTS0s=iIkc+BlTsbdpnto1Sl%2BsVz*#5&8PeGJb_@`}nR&q+d{!@?N7
zV~b?vl~7$~VBwIQr?YRIl&k`(gGETGGd&!ky}h4=v>d8UoLQRbr?aICe{v?3CXJ!x
zae7+Y(c=xgj`tgb+|kYs7D=GAyo$7VAG*H6?0_3*V4cv!0E2T8a*N8S%rUS~TO6ln
zbb-wLVuEFv#y~?X($V9=nGs1_YcKI>*;EIENGVyFAEe`MJ0dZIpy?m$tCRFJwGmdZ
zP!BIK(%#O3LZj4InH%V0d`+0SLJU%d2~Q0$FcTuTxQwbCBRTI`o}hbp*2EbD3&*r2
z+PX(b&T?p+9}~1T8@WP_i8E`9h7ZfcG#d*e;TgK_G!u$VroyyGDVG>-ZDCfAw{;C6
z=K4DsUDXvVj1ZX~pl^DWoT757a}6x)UmK@$Xqt@N5`q=!AO#ctC0aWNNy;oB7%Va-
zfhGklcRPtqQLu1jjMf`1gpBmdUS1Uy)in}(Db<-?_wJ<<);T*<b8iwB>a~C_)#PsD
z)V;M~b3;9&(3$m9)}WoWYiE<9dIycY9Dm=kd-wgIW%s60ckku#(v7nb{6T2ESDAZH
zfUsrnDl@@FT~Al1VO{NQklD{iV^n&g$kAIb8jbG#1xguS8t*T&2bLr5{a8_HjAnaU
z6kTs91M8Vo|5CcWG$Mrk^sD>n`kwv~jYdt2LifL%#{1LlrH5a<Yz0W&TP8r)?~gOT
zAYb^!<BS&zqtWf9QQfGI`;hX1Y4rNFS?I<+<LW47zj)a`5Y9x!LU-T)T>YzixKv+z
zP#UA-jFny!9cS)^g&XMpSH!{x!I}S;u+Z!|3O)ZqocU+Q;a@x!s(a|}jn@K`A<R6?
zO5)5aR$bdVrCHhD|0a}r66g6P(rnj}t>F`Zb1#ayVNfdj@66xEqmBgD^q(}Zx6gSA
zGnw94?(}7}qs%f&PqM1=;$cXwYt#qfsfxhGnZdKN`Cb|dQfW{RZ=22rT4}Ok@N-`}
z+~k<Lm9{~v^;N*Je@kacz}=4V&6cBC7oprmYROSCICtAlhZY^G&YZoL4!cf|(U_B_
zNE~2Bm@~#Wj9NOmr;KdfTk<l$dF;V$FN)ly+SkUI==5jql@YF1O$kU3ud2PeX>j|T
z()R{iP^~~HKB>t0UdCXy=YF`Qb(#8{tLQfC<}SKK<bB$)&Vzdez)j!A+x1nSi56I+
z_wFj*C<m*iea?+7L?_SPp5?=?cA^Li6!ZW9AOJ~3K~(sl>Z{&!*q(PEE~F_-PYWtG
z|13Eub!~Zic{EMP^1F?Le4`O(!A7~eRd_k@;;t<Ws{@yDc<n~@UDZybX<x-ESFI@c
zzp>2YPaaYc8+EDP*l<r9xhJ)1J1W&4pS(v5@X`^r0~aQnaZ2$UZ2c>iZii{dR&Bde
zl;L^|jVp#JeY!X|PPk%)16`<*#^p{f`%~@zUYKaLYu${Y#4EdVY$x!96B`#TOpqvX
z$HkXvsV>8mHvX$I^j`QIH6O6)aeA@e7Yt~Y(uBaiqsL0tTpl1SK`GC?u>sCHZ=P@o
zM%u>Vn1!P)S3vcmxAf5A{%Q!+E>JB%4Twr0QY0i4!D8|(u`ZW(nPDXA?fPb^Vw*O|
z*h43?u5C0<M^b6`e!<WsLoAvk9tqg3*=?A=ELB+iSUPYuZ_>DdKujH3a<on;vT+3I
z0J~IalhNx7+kPB5S+=J-t6j(9&>ksTj%NLmwo|FhNr{Udss3m6Wjdv>Uah3ShQtA6
z!$zeoV@I!2RkvG?ihPh>IamQsD%UuVRdM(>8?ar;Ja56eur1W_NmVLBI^cs_PpV`r
zSqmlbxJHypnJ|6ct5@5mt-y^Vf2&6Y{_S?WeHjXI+q4ZsLa`n1p#ZJ|?}~gCpYYIT
zmnef-OMVvRRv>lpY}<gMFAGC0+rh#$H*F4G&TNev>MYF)x7Vw-VDV$wgxtuNR|XzK
zv1MoDm=_Q1Iw}=TB4p!;{qSc852dsM&qmzY;5BLlvu3|+y(xD>Iyr-bOj3H}s<LRx
zQ5x21$;3M_wi>qw<VeL<qPMHsnOeu5e6aXZuCtN|r%d&A1$SJ`VB3g`rwu$ulc`nP
zX<zNnk+k*rAd5F8)<;1MX+D0+`&upQPujVHWArYaSr<tQ+a*qsI?z|~(6Ir_UsauY
z`IUB$Zh5LXf#cuU{%$9WHfs)EOY;?d7R{<A40W2gVezEQygC0{eK|g4V25RQ?!*);
zkCCeXIQSzfgVoY@3p)dMoKzr`7b*)Ikc3+^#)s9(29Lt-!bVczWVMdMyorn|#*H%+
zLZw#;aB7z70JEh%2P~qE?A7)yaYY?{-s@WFFzX6F_9#u-WgFyUUS-@YVx8dr{VG6X
z&rT`fKpml2j`p(-SGaQhxK9~Ykdk&Ha<9=*R2f9$&$2DcLOpK?c+kT#qx&Y!_vzpI
z?j&~7wj~j5RCN#e+_Rs1R>A?bdrZH&W?NK2yW_3vx8>I^pTr$hHZHqaz^%}KFN3n$
zvFzYM3{Sb{p7K*ndjnQ@>k!R{6&vvAmY-#}o_3?^Ju0mp<G)8eJHfkORe3MnL}}Ch
zxWAn_iRFFhbYZ5QV{M>8^)p-Vo_3?prrx7XRW?!a<~}n{_h|Q@?e`n%;r4<1$=1B!
z!P+%W{wy1as*C%y_duAh#BD2%J28EuPpVPbo_)Os7P*rSH#}KU(uL_RAGf|Mv+W*+
z+--VtmwV{hXhN-r!mw8~?g97xFi&|Q4$+$2(7b~07DZCwccVN!D|+U2nk8sU62d--
zoxm9lD9c|`&1|KUISYvp8p$b%c6^Xhkd%@{ASRCV%nUL!(n(4%?#dDV7}C>I@o56#
z3y_kOfHQnGX|m{2n6l}WiZT$O`AAF4Br7X}%!~{&Gc!p^h{YcpPeOc*&9rq0%};Vt
zqRYsPEZ9(-)x85qjw})qlL`2&SqFC}<&eLTertq-=Q2m0mXSz&fjBZUGf7QKz&o`Z
zpOA=8GZc%!@iQS5MY|2v;xiBvW4uJy>^CMpkyyW`QeX{`jHF#U0PQUG+SQe06&Jtm
z(Xi2SOiYaNZd3(`V&fBu3;2`~mve<W%C;ZQYw9Twmq<oN21#)-E@7&(d{&{$pCxt>
zR%c2T7*zzJe0L}@{}<{{hgB7Xr1IU<zisNSlkRq;LaQY&3B)BN8Fwv5Y2OAv#rf@X
zR(nF}*9yL%%t%Uv<|iiLxBW&a@^I~kLXeP@Ou#Xr4R-I9GhUue+i~&X)uBH?Ou)Ax
z+u0yK%Rb^0%{$F4Jaf~Rn4E+r)j3@i+?4?7(W#9^UcE;pM%=rOQdej`l9Ce~lQWf=
zl$uUPMw)S#FoF#M&(n^rdnEqYcoJ>gi%zRRq6K0}w{gblU!swel!VXco(Z$qQdCN8
zd=hann%%e%f<SDHaku@w<bgY|8MJF8rKFRYnPL8Al9rr+<`0mV5Nq0-g=vmoxO~-%
zJC^RGMDxdxm=K2%w;&LkKtinFt}Z1ApO56!G}2O&j3*i(L1-i<r;?GLf?sp{r8C=R
zeOld1Hx63vs^Xk#16E%iytS}c^>aJ<F#Gq%#uFc7_^Ok@NXrx)ol1!iK4M}5PA5w8
zp=9-@n>owFHk~#=tI5Ph|FT7|;;c!BV@7U#kVrIN3~3peq^BmL;h8)Up0?B*E6w<j
zn36_TW;!vZ-KzFy=*;nPe;|f{&*#J(kE>X1TCqGfA<?|SL1D*E3>DVBIicz-8xCP#
zV_|T#>dxL$9Gwc#q9cm)X*@yOX3<X897MydJuRKDeC_v1jrD<)L_{86`RbS1dGvYy
z^3T7*XFl^K{`f1uO+#K1SyhMm#<%`E`zljNDca8;{?-f5D~fHaLQ#}75=A9xw4)f3
zKyofmf9MnZ>Tmr|eB;0WXTI>+&v1Hokjlf)@`Vfg%>7@^vV({(;Y*)?k+gXCExVXB
zNYx>PU5c(H@Zy(#hXdt_PJ7b1#+IddLaKizt@g5(5=o^}A0R2WoQog+5`X-gA0fqW
z=|ljhKv=&bWda}h;xAE~muPn5zSdg)6Usw;?Q$)K^B?*+b-78le2;(ROPt<R?CRT2
z@uZ~>=bTErw_l5Y_ov}Zfz%>C`jO|zG;KoyIsuNH`w)+AFGM#Yq$4*R$F~0|<@q%`
z`Q)$tJAUN@NAN3k-OCZyRJ1~)z}1dRFSvPe@u})}!#R&!)b{YmNVwbK9_`qAkl4wz
zK!O&>sTV%WLlsH3e%0Wn>d{_K6&~BO`W0@P)xPsKALAoW>@haiOAkMmc?0;8c;*vd
zpg!B@){}I%XMgwlf$}F5-*cab(!Ce?;E|wP2VT3k=+nb&JO2efdbkSvmDCQRLLiAR
zfAv3*hi+dZ6xvmrQQu2y2gN9Uz46n-i+xU^!&=!$E919*`*UPzj(kKQj=d+I=8yj4
zpRy}A)~@gE%koXPEFuJtyvP@y+G}8{A|Ge;Z|E;Ew}K}>{3ZV9-+GB8zk~6*FN2SL
z=3|s(#JX_C?GGSjfTK@+lt&tJ4Bn&!5#Y&>ewym6M299kgDJbN@hJpou}PeG>cjlf
zFaI0<=IejSuYU2fJab|X1r3Mz^-o+N&H`I|@E*m^ESU=7k`+=XHF%I;|K!so3gd<3
zTONLfCl6N}SDNvpY}Z+S?>B#mr%oLrC1BuDR`nr%_jkU`^B0bjoe*$r6Gx7gj2xVB
z^z7)zV%P2a&b8RSf!&5<Kk8szwduywALixbTkY50N%NfB=L|V#Rqfzo&z-~<(Ot4r
zCoQ7mxd$sPd-CW(v2NR!j6aW$NcT0?XMAQMPrUG1zV`cHBGVVfibJ-&sP?S*m_&jW
zVEd7C{JTH-GmcfIVQBeY{0MfO{tzd(<lBDa#UxzvfRi8pO-|Kkp)0g(Flh@z6cbSQ
zirT!IVB%d^bIvJ={f4bo&K16l%*=ml&D=<&vMJ9TWvCRoCnB8LB`f$MfDl`couT)w
zo1A<8Bt0YR9NU)3wazsP*KY7TfB5ID*IwXoat}AAQaQH25&z04@Av3DvMY-pyn5Xk
z?s?|IrF1f4*Kd;+eQkmF-@U@^-Y}c<r}@o)`zPG$9wohMJA2AA$*tK=O_oMS-w0Am
z=JZo%iJc$j`rQuJ!$waPTMlvI><Jpnvltv0U?mi>M>R&qERZ6GU55@cb@wvM8I2s>
zRzOex2!YfBPMtf?;hl9Xjt?@mxJE*56_4)SOjYAvj%=@DsJDxphW$LWxsH8@4wDmD
zqJLxx>5F6Q{zrM@u_I&#))*L>WO;6arh!HFZp!1zn?GeuxAfv?$G%<k-M+@eLI{Ao
zs%@M<cZOYcB}|VFFt@aB&&UaBh|Cw0&Z%cV!1<HMD9y_vGex7TYmmy_hbT#iW79)>
zNeNHUKR%0yN#dcSr#bV;KH?*D43Eu#Kb{?j9^>)j2T9Xb7#y8M`eNC8<Wb6t%Q$xI
z2+83|`o<S2tl7!M^QWmTOkrqX5Pw=Z&%F3Nj~(1iZf*{NwQ2f?XQ<k7h&?q~wB5PM
z(9|*l2*jtd|JWH$J+hB9{|f!XlcW~a^2FIQJhZJ27AF~>U1D~$hoMk9^=V7Ib*qbr
zmABZlk*3<T05v;~aN*1`ic&QC2Sx}>L0W#0v**vTZ(ESX@j<2+LL_Dsaq5Y49NDv(
z$lNI73p)Fc9Yr4-U@f_X<NNCv8W^SV&<XZ5HnMN;HsstWqch8-71Z#=6OXfZTODF~
ziqYvs^6MVr$c{R8A2>pBQiz_RNpyUaHtgZT6Q|f*k;O>w02!4#*;SEXJf5MTnn(9m
z)73G6E;NMtwhG~=e(y216~=M*)(s{W43JC8so>m&^X%VIPk45i(b*MyL?;w~D%*CL
z1C2H8JA9ZdF-P~v48A}D4?TL8Ge`H55T0RRe4g@cM|k1+C)wLjL3Uvt(>)zbuSD$G
z2Sdh2u-<v>G{NLBH*el#c2y@KtD2{uJjtHz^%RyhusJ_Kd;bK7PoJm0x`D&{wy-$T
z%hY0stg<a!ICq-eo2yxy8DV^G1>uXOasP47pE^ourl0PvVQLRP&I?bSV@GuXc_jt(
z-?_`8?xS}1F)lv-C`E}8dIracNR3VVPjcbJLuBQb5g2`+ce<v~ti)cLgEC(%M^B$5
z5RT{I!A2(gI++b6^3+pD>2B*{J)-g0lNVU(?_$x$0L!Fwl5(ne;`~_-?A(lRWs2d6
zc`|~#Ilil&od+MLDoxVaKSt@cV{A%aVqkKK^s?<d(vVK~-~_rkeQxhpjL+csvArb3
z=do*B1tWbuEG)0m+1gCu<~@vEe~p0!gO2!&63$(`!2Yd4W`_EhU0Fx?6AXVneu$j7
z5dDLrq*d;rK53kHx|TR`@))aQz09pfOx+n#MF_##!UVT_7dhCN%cU#V3F)Bgu{`|5
z<K&73Zr*BUaV<<%`F0*Zb&Q>xs|nU^qck~8_uwSeyB=nDWjZalZ!j{uiZ77H>F1u~
z{A0%`&dDMx1-iP2h`44E4Ny{T6Dg6A5G^;Z^6u?X8fz2z>c9UAmv43v$gbqbwn~z6
ztJzkSK~HZ#YY`}_-NnUor>HGRW^iDb)pZYEo3ji#<&9iCcbbagA_@{$dE@O{WLNLv
zk=>Q_G~c3UbRHozwmx#64?ll_%=Ia{2l{Dk>7sb&qrCK?4^Wx7Mps`SZ7uCAMPg{&
zdz2?mK1_DpIs-%F=)PDUI{YZ*C8ZoY_An`tDSF2jD5&1S#q+1BD@<i*aDX*`D#wmL
zLQTyU4(+OCdbp3dm33m1b2xhHERP=CNla*lq48N_Q}a3g_&FZl-C+2K>DS()pnf+y
zO0y}c-%4)WD*eNgNG+C44;|&)sYl3(k1#MeMqc$UUU>0Y4sNd}D<_N4<T$0<_v4!%
zU~VM>B8DA@kCM1Df=sLA!i6W;UR%h-P(KT+VG^@SdHmdY_HV6Xer$m06`jWYN7-Im
z!=c0biCG?JaB{(1`LS%=RJRtOX2)SJoH;?TAd!Kd0U`-m96$9qj~v)dW^9<jkqK57
zW@zb~WapMj-g)B+%ThxML27OVXD^)R!1fxJ#s`=V`8js_I5VA{i0m3p?k=ITXNcbR
zCQ>SQ66&~0+vExYL3Vi~=bt#u?#3YNvm=bp>Fhmqk!R1HrlBmCy!;Hh?zR&0#k2kJ
zDbAfZNSd}t-^es*vFtf^j+2KP$uBBr=I#|*#+U3_Guw8p*dv_*gdKyG#7VVzyWC7h
zE#I_nBDC;Z$;+Hc3gf$GfRfdelOS2~&yy0`pH40BCQO}yi8aa_cW~kS8Ftl{uskuy
zYRH)KP0KDMm>0vq*a8TR!KQbK6z!xu#fRFERNA+E-tm8Dq~&lY?LTz{Y-F|p%FA+S
zx^a^o7e2(MtT?2;%;g_mCAf3D@lZhldVC3=e)%-**Di6ZbHwv{9Bbs~9C@_|_|$Wc
z(ACn;I&pm9)4xRFS}$)k&GPccK1fEKMr=kgFTD61p@AE`{>~j%!VwA^k8pBp7O!2t
z&Y=s>Ql1(hxc@vaJW|Emm#<R&$TK`#mxFZ4*)awoR{8t?_J7hnz5)m$flOZb@Utv-
zT;uAkZX(j5)d>6skm>95T>0VmxYN<iPrmy<_|cV{gd|9jLP@TV>#bv)f9@G_V>Na?
z{$WlxBy;(#TkJpo0^3VcsoZy#rw^3!&ehxOyYMmxtFu50u<OJ#JRFSW)mMJP!1OXn
z`3-#G6Bp^b{w9kV+j;)vF4krT_~G|op|h)%Z~y%_xz;*>6q4Si_ZbUkP*aj&$W{w*
z;QY%RtMc>OYgd?A_Yn}F3FvRV$@?8MJp0j?DNNMZaJbFalz}9>dM_V)>HxRieutuc
zPjX>z1-_&LKJmFv!N3i!TyMh{h(*Mv@WIb~p6sPAUVr;0x<3}-k7e8b{p2MCh)FHr
zz@Dwd`ZP8lJj=Gsd4BX(Hy2-eiTrpF8Vt1G;(FU8Pk-za6vqp)g1fn}znnK-e~VpD
zJWowp45?+i_`=Jl3_I(cBsQS2w61aV<S8;@H5w0|q9QTGy0SEGIXFb<y7?~i$>mgJ
z$D4e|@Yn}FLiR!nuf2VXRbAMIs@^RPf}+MFoZgnsk6wF^6EA**f&f$=c$#N-rSZnw
zcR2I0U!gH4z(C72esKAHI&QtmH~#KB49u_Dj_Cvu0fcF~`Zg48qA=OeNnC0fdny9V
zr0$?N(#_`M=Sd6r*>n0uDrE=Xe|LgUeda}y{1Tt0(|PAT-s@T66QBPW>0+Johc57`
zQ#D+A<rVrCBypNf*Y!*M@ZA<#-u*Fu`}f~xY8^7_9_81b+r?Y2{+Qf@ALPuIT#_md
z^7$vXaruq+D5<GJ=+4O3Z5k4Pd<ki?hwH<M{Kn^=#HTNix#bKG6(!=&Y2=BAN?BfC
z--tVa=Jzq&ew(*%4e-pTK1W%iB&BQ{&z;!B>p%DrTQ7cu+SCA33-I*Clf-Ip<lNIF
zuP+!g&UomNlJu-{)+g>T<g4b>&pv`?P5UV{Bm8{u^S?s!WHUF$68WW1e}E)E?0n+m
zoZXbjr60e>L`cJLVxvDHhZjEiDbf~t7@i9`4&nlB6dSRE$vW@8dWEdo&157ROK%y)
zjg-gDlN#JjV0DshhaVv`&PUJf_n6Rf2^OXxCFtP=E`R@@XzK3emG69uAH8{-kZYES
zjgIOqvjE?oxzxxnEM&OjCOHp1&f)4@67x6n#gCt(`}&)#<m~1HC${6?z@EgPl+Q~a
zd5+o6n}p&sNKJ?ZAsO#&rfuBE=IT7NJ4xHMSGn|lD>tsZ!gqf3HVa|szw;KaT)jo}
zwKw?IcV1;;X`QOQkMqLuI^Ml<m9l;3Ili?BDg5j_{xnBx5_t8MpU^+OOj2Gwpa103
z^uPZm^J!an;q)H-amgHi{5*ltn{=)h^3;hv__R3Ae&~~IPM+l_Z@kM2G5Gxnoc+)z
zsg0fB?YrYV{nASmBx=g+pUP!C<wdF7c<&AmoqvfvWvP_yI>k$;HgoOD+Y~?a1SfZv
zFw%X8AH4b|oh>)`yKj7(w$T~VE4Hw+vViP@Vp3DnIdo(n32~Wx`iq}rrTaZP7qj^2
zbEk-pPvyl=f0nfQcCPn@`Or%rAT36te%~o}=ZARh`T!R{_$+B60<N^hh+KIMNBP{-
z5AoB>KcRhm*?4xM&(GXQ7jIv?&E}II<Zw+Ex^$wsWeWbp96tJm&l4EB!Rzlf5r~UN
z1mfBE@F7wJ#HN??@SZw+(ac+bPe{7%Ugu`t8ZUkNqh$J4xOL?fuH5S6z1P3TKYaHR
zi@HYT{-=2P;X+=2{cX0M`#5_`k|^Hw1kdl!=B;ZTlvS15_XoI=Y1e?nDpHqS-3go!
z?!?X-P;Kx9q5ig*b7nUI{L;I4E3t2hb&8ZbIjt#1yTr3k14;kDJVy@J^26`{lsymC
zG1>DfAz4rPjz{_Btb-J4o&4&JKD3Gk#L6g_TT(c<yBctwCa&IcYy~ho6nQdJM{B}0
z?dZrOOw09mxpn6@je~1sr3PSpmBraPLb}ptXokD}E1Y=Z0v&gLN^g6!DKjO_wFDQR
z#Ros~G2Z;<pV2llhcCW@-J5f1pAND+O(VCsg7lPlrVtFa{gi9B+F1xo&?9K9a_2@9
z{r&w+tprF-OrUIQBXM!F?AX1N%#<{=U=csMGrEBnYkGQeY}7Oy(85bJcaC!C_!DHe
z+~RxRZ#OGDkK!;4D#F6tEX%8F%+1a)x42@;dVy<izQc{?b<UpHMnYUNJGa*ov(m}#
zotsEc&!)7vfUL?ATHpUM*Kgb*Z`%=q!2(|E>?XWC#_O+LrGH==5Y+5HL1B6f6<c-_
zpOs8yVI|r>zQf$?Jgcj#%uG+SWX|e@R+d;?UM0rl&lgCbp(dBt{_3x2>z+1~Rv*D&
zBc~2kGq(i!c?F~;Y78toHfL(bmXW0^DK&~pE12oL#?_m5Sj}$Xd_x5(!*gUa%wJr3
zp9S-}gq$E1sVn^HEAKF}W-P*qxHR_&!aTxjle~BBF2jB6OzDqPn3K#(=_XDcZD4LO
zOkQpw=}7{e5Y4x4GdMiN@VrJwvJb6(6Vpv^arI^^x@IIkV)-uZOUHR=OC$C58T{}+
z-!=9QI@W18FM_q@MV409mDQFAT`jHbJ$i~$*=@Y?y?;_-wUfXBNPU^x@7-c#Y?ASn
zIMS2+R5b*7_qG2*%iU3~wqK;aGK(8;4Kg>s#LChlQ&Y2`Y0fxK@l7ElD~t0iuZ5L<
zbY><e8JnHPKQT&z?k7<b=<6fA^>!OWbAv=O9wRj&hQfy3oY|hs^s<kf++335VDs*+
zy!*ZXh34*FwC)}Qt(F&<TU=p<d8TIO0NA{HKfdr1yY?R>IW~s8O=Yx>HZpqm61Q9K
zkn!eSc4*EZ)Lw=#u_C<M$E!d7DSf^%;-39H0qw8(&UfGBvyVMQcz!2$fAVeSLl)jy
z{%2gDTh_p-$2K!Hw?=MmF`3DNDI&DpyutAJC_NLBoK%gwU2id*`Uh0zSF^ull|TO9
zM?GnoW4p6sExdL0HjCZyoc;V>0^&!`Qm`dmUk00USNVfCZ!ogl&j+9Q9LaG3cJHd<
z)xZ23TD!)e3v*TrcAxz)^PO+=%m1Z?h{eCPJwzS<s)w1-EX~iex~@<l!puyJGZQ<-
z<oFl`4fVwP1Y-+}EH1B+U|+5?KRe6HYRJHug=Hr|NsDI_tEG8QxHZ_Y#+=?mNc!7v
z^8SrG#B7<QBr}2H^eyD21PE@~O<YzI<vEoEzWY-m5otcI%IGjLwU`V>c=Np*XbZXQ
zJf3F^KSC=kEH0xbI^C=;&NIKf!t%l#GxG~z`rF*%GAm+%nb~<ldJLPlY$hpjjjg+O
zlAWGTLRAUenkKw5&ZXDhrf*;hfgrf|7{!?}lsE1sE;EJ7_$vIb4Y4rY$KB>0*2FLm
z?@A>hC5@WGSib#@H)x+QRwQDQ%Gj~Fl-b*Z>~8dtm0d(ZRuaSg3%1UjrQ<bjT)oci
zTkli0?>MDp#iUg>l9Ui(%dQ<{r>Bt`EMfg>4|8*itga+5H9bSb@29t8fYY0|QGNaZ
zJ#W84YIu@4e+Bg=NlXT}ksX&oQB^h>8QpBF&8B&@mhJfo<P;T>krGE_eU_Won;9HQ
zW?38{DGugVow#F}RMpl-+OPbOTdke8$z>K4@yKImSYMo{q@b9|%=iuF2#}nWPf_e7
z|Mk`PSk!y~_>;0+Y3xQrwhoQb`kkEFQNZk~pWK{GQsQ-{7Ux)4UL$T{mYLZFAch@#
zcY)r;-hI1Cj7_Ajx{%m{I-1}88=9Irc=Kkr@yt7`ODC^V(h>I{+NMyoZCB3aoY;u3
zQ7#tln>_qNbt?04j}V;#?<VC98@o-y2M34P_jn_p|Fy64TVMM!uU+{oq;C@)SHHvW
z{^<|-z?c7kQ+ulT!MH?d8h75h%yYl@n<UI$-pHW9*<7en=5_R_p6}ERM|Arl2TvkM
ze8%fE0HH7N^*{d#@rmiY^y|OD(f)3}b-l+V!rf$KZHE8))jy-|%<~+qZQ*kFGLvKD
zym$2q{c~%){*&)9GPO)%2CRoxo&8>B&(?arL0*LUxmmiVZ}9pXt!S4oF+V+SC`*rk
z&xfYDWuO{|#u`8R`d@H4KAAHg`2y#UjPn=&<Ek-}=vhuPdKdW2DPW^aH-OI;<vO#|
z(=4}N<MrD^ym9$;CPzovQ4%6KC51pBKuU5VD}yVBy+*=>O!*1{D=YI1_jYjkjZ3Uc
zL1<}~bsb`mpb0ZMag-d<b^L(<0J<J#VMUUinT+U}#pe@15(f`#<(030jrV3U`S<_k
z3s`=n>pGg>ZzlHA7;q@E5lB{6mx)VGBhK$5DLs|7#STKNtH?kSsR@3TRzVYz@cJ4d
z5l=>P45RC7_=F%VB@yW(F4j+cataAC#^yxLpFmb-BEG&5i7|q;^%(Xa+`(&q^EIwc
zrty2f|7A2ypyPdzDlE<~kyMjGtY2eI_z^NfWPOpV?=<tpmtSV2`3kLLs|cUZG8cPB
z&MFA&I)Q-SY*Xj<Yk$j2lQx<F03ZNKL_t*VzMa6<$6ny&51in)|LAX7_xTi`apdic
zTLc1~h2>R>)6>xWlcc94FtupR@aj^c`JCj<reK4sNg}!=5bz_6=ZV{ifHf!t5WW<$
zGvd(JQi&C7gu;oOd}I@U`_<o~Q&v*{A3lu`l9|~hYI8CW^x^aSkdd&lb4^Noe(OdB
z!Q9*&108pH{ZcbrzRdFU2nn02Nl3}Sk48pj8e+hqTPlMvkZArC(ozEq>1k-8c_I>q
zul*41ncwBPIQZi)ciU1z@!xPbhDVMc<kC0(kk|VBe5LklCWc5eQ4+Q<ps(`1OI>{Y
zS3bqW^=~n`v~CQ(>^#Q7mYBFyk`jD`lhV;w7O);g9MJ`S>#h-fl_eQZdR%}>UmCID
zHNuey)AQ@(W+tI^jw5_N^z{hBC%FFYzu>LaI$pf^Fn{uY{g{yY)-4sjbsc<uG|jnz
zMWCZg4WG}M^)`}NsmQ~`4_zk^2-x|d6;=G2aAKO2BxjdUl)O%R&nOWoTz2i;`4>GH
z>k-{HD}d$Yd4_x2x%|c@Lh=nlOS7!mdnzn~1>yB2!dg6ufdKyG6k_~7yN5_5g5Mu7
zdA?T?;FVW|xw%=oTiUpE=?>!ZB^G8T&@>GhUMCdM?TLofl{rQfEd2KJEFnD=X=~Ro
zgvfe`<*=Wuv;@TDs&PYzzRKkI7<b=&i{|lVUi<NP85>=)eNeb0?ISHM8DAiV<b+t3
zmlv6zpQWdzi%XYoBQC$e;>?86uGA5lh9)2)btVQoDZ2O!-OX)OY<q;&;n$d3TVQOk
zkGC$r!D3kQ!?i_bS5ui78RpvCml;|N^Xd;)8JS&Y2NGR<n6~w4=L<&J{M-_mS(ye6
znh!m!vuXE1=5GBXfAQTmKKvVhM8KL#)pgK}*ern1+A0xW5@`v3mO=ubIXR(AKe4eI
ziRtOYDgy;wm-qud!xMdJ96eCW-~XrIr%MF+>hFBaHOTd8KBNRq=*&#d&^7Z@UjOM3
z;?gB%Cq@aLTqGqW4WH&GJuQU^>0y+64Lp9fEk(cB0te@-w~iI>K$v^tTn#CsBzwf>
zzO~i9^F^KVQ`Y`gV{&+ATPFY8U;h)k&OO2V|L}Lrq}H-MXNjL&YoU8+nHN8Jg4UJ^
zwpPXQo$p;Hd;7y|iXY)0e*C`ij3s;i&%@L`4<~(1%nWj9b22}A{e2=*l3B5py7(F1
zx;x0`eGjwPa+&Vw5JIk!S^F?&4s3(jUV4^O`P3&~X3w@p3X?*-_LEDDEvz{LGaXZE
zG3?lPfZ=!lksmj&@X`y9ar^3JdY4jo{P=!?Rkc*-CvmsA6_HlTL&0R;eCH19x&$dH
z*}9MP`EG9X&9UdeLk!)1llMC&*!A!Uw$ub^sIOzL?+!yVs}Nczxc@O8-c`@yPzNJ(
zD;umsk4@(TFTTvtz1u0ziR0ZjUZ;E9oL^E5+geylF51MIM|P1OuhZQ<MA?o5_y+FK
zGNp5H{}!6xeS@ZfWez|3C=Jy?8mcQ8?Yd3dz#>OZpXbQ_J(MKO@Xc?(#zNS~_Pu)<
zx&019b8AMDGCN6T^<EC`sHd)>k;IiDTKgvnM<mrdk8o&54chV){i8F0P9!Fc(`V08
zk?y0ty`7PnFpod`EW5XCrmiHF);ld|*|nTMd6@cOknF5PT7G(k!I=<|bvSV5JliVs
z8SZLjb|qp5A!}B4aek4817|pScn_P);`r9z{S$+u6UeN3p1bfUTefW_erbfxz9E(Z
z1w8lk8MbcQMwYfnd-o9Wx%E7A@fhVrC8YT0xb)@?YWJVw;I=w8@7Te5-@APO${n=K
zDlVQmN^Mm&*_nxST))gpW&`Dn@@~fj+x8w{^6nLyItMA;dzv$ccd>ovCg!?Y8CzOo
zVSbf|&z$1gH~$;WV~g(X0zy^0PB@UoGfzHFeqxxrT_YSi_YxN$-Aheb9&Ohyaie?8
zbO*zir5SXKH|-&NzMc1nmf3r7Kb>#=h_<069)JD?c5T~1iNBw(|KppiL?mH7mJ??m
zr?w)`c+JsD*qk-Noz=+*B$x91#mC6-Ezvd^!=6e%-K$w7P2V9oxSf_OSJ?FE1-4h0
zvFFGEhTi!nF5m1Vu4p?KAKgoRu#$|dWNyCs6Iwb)IrPj+9N1FN&K*J8-o3#pf{={o
z$*0dzTAW4yohEww``PyBc^ZQiY~H$q)q(f9**d`P(;wu>u0|TF%Nf1%20!hdvIh?K
zEWSU6BPY&MU0TZF6GyoH{jYPqYXbCTM9DEWh2G^GKe<iBLLt{cH4LfcJpK4l>Z)tW
z$w{UAz1NsduBS0+n#*?vX*_U@x!bSOI~!(VWR{mcb)LWePyZ_e^K16|rkxw6K*Xo>
z*ki}3D68Vg;VoSG_TSMwI0I70ms-wK=Z}%$TcM@9mswH7ho3&q{zLodyz-B{-89JH
z@Em8Kf1X`iwy?E2o7Ow6B$w}`DrSiP`<t(_{p^dR&$iM&GVjI(%f8lDsX2Ux!#nC&
z9B5@^ehop0xPq;mKYoy;^)aSIKE*LJtR$DPJlaKWX&Eiot}(YBChSk=-1#RcOVw!Z
z=pn32k_sC+b7Th@@e#VahX_l__A{U2GpEY9eEEG=b@!uMj6K-~U4g_rj_j-A%}Z~y
z6w*n^DQ8!C0&l)^m!i%4NS$lu%^Pjx)F0r$_B!e|Z6SVnfcAk&?BS@1*`d`15{h>6
z^qIrdRo4(78s^el##WM$3~>DHMH&iH=<4ibIiyq6@DP~`o!so3um>YqRokeFndaT*
zKBSOLPRz0Oz%jPf2WhCUWof9Hftd)~_U>c+?o|e7Ei9ZOJ-CM>JL{=!Xe4oMgtoCY
z_U+lk?JIAiXV<W`K=S6*8%(b!^W?J^*|BvCrRf^YO)X4?k~sO;!vrg<sV>T(t@#ei
z)}*&%kmObG;>7-~R5$LTG=7qAedl$0h9=o^;8Awe2dS^GV`Zd;-pM6&5z9jlKgRZ&
zLY7AR8JG@n=-8v&_`x^H+PR;ht3Tkq)^21*Esq`CMs>|5@?uxG(bUAeFP~G#_7V)%
zQkEIZ-PR7OcI+eE{XU&bF&x;_z>PQGU}0Tz4-y2-jgQfI@>!1V+QQDAH4NUl!<3%L
z>9c3qTvtbFemV<7O<ZdoMTEl?Z9UAXgIfqq_R-Nd%6dvAAA05_o40HwWo?}H!AX*f
zw)5P%gOn8(lc10B<G1gkONorj<KmO2$c@)&>*ye^a2w|z-A7$j6&dOA+<yCY#ujx%
zd@fI(J5GL1BHc|b^bb#R@XR@CE6Ultc@q=u*J$aP=EO5Evb%8;b(Q(FzV!;N<IBcG
zf;x+4%{;oN<?M2CulkCuT9+B&%gn=Y9@XOX;rOtqtgeyHG`6$(R~;?drsA6Gv#?zK
zZKhsYfV9K_vkS{4rlb;DoJ04;6Cco7Tv|m6AE_xxtgfyR8xYJdt`L)uOrk%+?EJDZ
z1Eu1o%B<O^z*9$=ir0LxB*pufUswSozSsofv@rh<U+*0sSCQs>e^2P7R!c2emSii~
zmYlP3j$n*227__V29suBm>DMR&fUGc@9aAp?(C9=IOJ>$*rdT=z<^1{3Fn-1l9ko1
zZguym`^QOjTH^NSW2^g|Q&ms=Jx_&G6-%TDh9ek>7LsW*G1YuQ3ZfB`Ee$lcrU^&#
z$O-yM#F}W1Crm(arXyVi<b)$+5-lV%5XsLY+1x~%0g?PDA)iWXtc8}=B#P!I98`(L
z63!bf{+uv+rj2-8hH!3}RJ?_@jKCktB|kSrCY7M6sfDy*w&xFqiAHjWHP#Vt%d{J&
z28B>Km)ua0bTUR`bIcm~!_}+9186=!(P$o|64W=g;Lpj0wpJ2pK_oXsYb-|E5Cp?{
zL~;V8ldUwh#K;&B3`dCM_=z_+5=*2|6qQgoOu98jn;{%S)ARuO`FW_MX=!REna&^t
z_(M75N5ZtlnrMnAEH5`-=?^Gr>Ke=ukx*_Pxj`STv1Vd%Q;zx3JV>>Y%qV2qT4_t0
zz1f`Hd~$;tjrH}8UfmwMDIuRF!(_af=2*hC9o0vEG#_2b(A3yyjwUJ^d3jL+x}c@0
zi9}l(MGp|okC0BbVjzeo1&bc|BQ5J+<-0TWv@|r3Obb*^CqJ4GsRW4(h_+TTS^z(3
z;>i@Da4zXY3vFqE<_{6g%SEJ3T?@@mw}F%R*_@#~{O|wGrG(-*wI^S)U0Tt}FNmTW
zNopHf@CC!<<>nyL2^t!k$=FG|w0V(&f-jVVZX}7f8RX{Xl8iNx5(=Sk9+8lbmc|C+
z_7s{>$jy%u4yrWP){)2v50E%-Q3(Y_(<vw@KuN`@Z%GplDkKdZEz^n`$RQa|@|%Yq
zX2XB~|C~ssXl!U8l@X|VfP!cqnM8~>G}DeU2L4bk(MS$$tt~V*$DI+@NVI^EPoe%w
zEp3KC59C;KOw!cUOe$mG4@Jn&^%IXL5$Oc+WZE-}V85!L8_7iojgVhZSJyyBC`1ZM
zxc&G4!tpo$m9_ipOu0KaEA3aM>O>0)P|~fmrcGU^)BrhZn&w2x!iQ$kYLIS&rt;fc
zyYk0BKTAuS;aM7De-uR{HylLK{P>g<^$pD!7KkgVkAi|Iq~g>!#Sn@{G+Ka9NmF0n
zNZJ}5v;Eb!aV-!gsHSLXZ6lBq!6%Z$THSSV*?RN`b0wTfTdo<Ppdg=2qM2A5_>>Gn
z^<$)y=zc$mSey*zwiS8ND1Oq^HZ++d8@iupG~cAVF@`{(`*R5Cf|hs!!)`wd$<09K
zBa$1Sr8#Df2>Swr{i?b2A{a)?B#5`A(R>!ph&0VjO%5hF{$gQ(<_nOY7a`r2#4yst
z;;mN4MkO~dpBx|5*Ef<f4E(_`y3s~!GG)zXn06LaGsF@}CnorVVe%q57^x)9%`K!2
zOe{>qXp?+J)dNJMCXFr4%_P%;a5zMwC5G??2>BouPasgqjpP#wsKlF_h$m7An?Aor
zA|9hT-s&W&PW%<%3+52eRRX~v$(ANs5^WCd<x4cCEQ}6?^N8g5X=$t{)@BgS%_Y^^
zf*uTGB;)1_BASo<{Cs?>AlBSOYg-C|MqYkC0iQ~1OEa-V8^N3~BH2pPP{<7jiMO<v
z_+zgLb0G*{AVf4TOgb5-u{n-`Vq(8)(9+t5M%uJdz!wM;%@31kuBW**g`(=FjVT6A
zjSaM=4OCx{g8W<(@i>YqXo<C2FHxBOsTm3C8k#}N!kI*jwzNP|HS!Ch_>~NGwe_S0
zW*m-$$RraqHn)&51i{>VB0-hbM4EImM#6MDob8yh5;iACv2rHQQXkJul=-(ku4XM#
zv+&!5b1Mqi&sF#CBRolg=ey)v9!ImCP00!{c1q!HRx6VtvVOO#RnXyvt{(8rO7NUI
zvJ*~ZSP;@1wCz9HNueWXyJGkKU_7&s+4V^DSr;V1y9v6xNZlR5mqth3FV3!u?r+&>
zyk;QrZvE#NldyNzb`7ST>|K(*Jt*nA`HHypR-$c`lFa19(vc(52#DOSQ-y7iUYWW&
z$a0Dfc<xXKQqt;92X<t$9oUjBLhFKJj}lmpqiBA{PMbl?jxX4Cp~;!r^|UXQ^%B^g
za3KVduDw|@Ya07EyvsLx&!MPU8_!CVDOs{_CxZ^3@jfjl5l#!v8cOd?59D;<i|rjQ
z`x$|v=uBU<h&}JF;!Gm#JTBztvpbVcn=aK3$bp;WN(SZ7>bQ>A*F1ifB?BQ~%KUq&
z76*9s-3?~an%$wa=|Iwtj6EK?2qKk3xO>hdHoW^fJ5DutXv(I;!3I|&UOh^1Aa~$p
z|H{6n$nACDLoV-j;AoH1&InnSAIP96+mMS>u5R7ByvY?P?WXIUayB(te(Vi_Rvor%
zvUT-SGUz}iw$QvCc30@_k`hnZpHhzU|L`Hayh}XT!NN`iqzRQQo=j`Z_Bs1u&&fg(
zmdU<i-)~nxNJlN2mC0C$%!{mR4<A|hnbn?sA?tK5M}EQ{K}Tsvtn7=PVlpj`EG4W`
z?9e0do>-(<+mN-lN%INUCY?AcoCxMF8<E?SEYdz6(p!If`aQe}dsl)ieM!VA)xCS$
zFhGY=l8qpXDY-${`Ml2dQ}?r~|H%Z+(@vK0|5O-j5AxpM<}>Ym&8A8QAhVcju&z90
z01)mKM@WhzD|cTPSD0QP>Oc;j=xDVlVT8yMW_wU}f4KLAOS=@Qe@gBiVE5?tr!-_s
zP-@&2eaCHN8F4#MwaYjdDczBRg9rQ-^NRfTma9{|0+Ip{S#oilPw{F3ArOWEs_J?9
zz>%@=D%b%Ghith;n<qggW1y;<wQ$x2O}AMam|RJUj+8FhXcpP7J#+y2UZ2QLAo1!l
z>s((?P?h<Y?ifY})moJ7ia6VYWJ2VGLE_W4E5)mKo9FCPW$aFzEdQUfBO{$cDAqdJ
zY~ak|(~D_YJ#wc)gLK9~)6MmX9VRRi{q`L<ws`F(8)H2_-9e!A+OyomPdFrA`|$AM
ze6o|lY#)~Ek$$wJ*X702P6s{PjuJRejdGc+CbMOrSkKv=2J@$c@mWFEE+zBo+Npb2
zhHOt~s6)7P{adu-%%sa>dro4BNA?aG?dX+kBFpy4<|CU-%Z0PaHQRQ87&}q%P-EXF
zi+-nGxkF%HbPzz0<(rBJuRN40Zds2EoS5Yxx^>SD_KpR5N$#Mm%)X0(*&S;~4^nL%
zsXRMrCyQpeY)3Zkjd8BMsg5Jr!turdHvqZzim(J@3&d5vKqAo*eER^K&7CAfA#59V
zy}-3V4{fqA?Dm8ZL<)-O6bV^$3vxSG(!0FKdNtVm=Sk9~O5BX!RVYGIuu!aLTw_+S
zdV(kvm3AS=uSv8E@=L4fUD*jm3h3DK6T-uf;vk<ORM3@)S5Ictpq}_ti*`l0a<mOu
zWW|7NJQLDCD=4bLeERjN!rWxNz0N#}lhn{5tP59U_HWsKI*J#<!@F!ZmX2i^$@<Yl
z^zPe-u+M(ZrQZf;nJ&e{j-`7c(1UsO>(>*%5H6StK+);ZX8^_d)~O=a8F%&u(<VQP
zwJ=x)e&Nt#>sYbc5(0#()1yy63PPGE*K)<Jpy?W_)X{MIa#=yhVcD_+$F3xO+iQzV
z-rNT5%|P4lHRT$K4JDozm~0T25`IOlU)BLGJ#J?UG)>3zk^IYv8j_rCqf&sJ&fVx+
zUg*Uqr%mC|hE&#r51OB{@-8^9MW{M`hYTVlwF#hVI=Z!x-wAQH+evNO2Tj{8+Ak_O
zFDyCwQP3^DI&3@jxH3}%5k`!=hO5U7BiASCT-7Ko>xy5sI<JzW$$Ox?FG>lQc_GZV
zy@!uw(zszn{HjBMW6QFvEjh@YRkKZ(PSd^>sl6=hmg=%0(!pGrW-R}(%DRsVayysP
ztD@N2XkNw*A<4kySrLM0*D89H<U4hGl3zVQ|EtC^dD3W#bNv>1W{XATz35pMb>*Fn
z(Gow7-fcN}>{mF!MDA?MIz|WyAKd4hpf3~;UI|sBthzTPk)Q`1<%Gh;9?^mCSeF%z
zu049wB|m7f<uM#fX2OwyJ!>Q5m}3mK?mZ(QQk{yOE66q@vJz02Nja9<u}=%+(Z64B
zf|}vjhaSpf<oL-<95a-#CLEh|u}3&Ucj=bYW<96)!}ROlhoEYZ9e+(*v#-fK+my<9
zkK_Ng+bK>iVBx6EH&%?vlDF(YuHBN=u47i7Ypz~|wKh+&iLijs^Ds`JK-i6$mhQgA
zZ8@o8GioW&sY^J)+yxlPIjx(D9FYW&6=*!qi}p7Z1w<yp*jraHd0<zIeuJ*VuV>-p
zLD*Tju#Dd}ET<P>U9;tZ5Y9^$ZZpCI5VleUps0Qp-hD4s1tCioCZh#qeHhTavuhZZ
zT5Vn(S}cclan3*~=*b<6Zz5N-S+&d9GPLQF!Am%Fd4OFq1ihe&JC@!`o)5?Hg`_iY
zg5}zR4LUB(W_yZdsCGHm_JtGtWTUpZaq!O$hxT)_B&^>;2ncm%@v<d!$&mt*ZJahe
zUXmTK6H*T_P}Gg(x6L8f(!E2K;-lw)VRXs&yA??7yFB3eF4O6@?GC96J%^hYub^+C
z3&^f-Db8=(9^5!A0j{G2yRc1zTgFKNrFyV?w>BU4{~kV&Mu1&VwrQ{fun>07UgFjE
zU2MQ{ZNp7-@Pfar7wmv0Y=x5jDH%4MrIY*G6-iiUjfvSZu(C&ITE}YB7p>rdUo1mQ
zTU*Zha#;S$`zXSYWM#|4QK#z%cH)yY@@li>q1c1Q@@3(aG3$4I4Fqj!E{j&&PKC7x
zq7a}4^11uIdnoj4&Zv#Nc)A@;wjnDZgh9o~Te$O<F$8qY(S+@v!V!^+-&yADjvPp~
zV~^6goeYO*$EIvVF=eMXiJyZiLNWizm?#7o8OnxDVcwLh&<x=u9gck~(r=p{40?^f
zl^LUYI;T@$1ylREG(M)xTEejMh#ieUs1#TBqfbSl#I%IX**cU0vXCxiFQKt362hJa
zl<9S!5iU4c*kjjh*{>Sl>e)*eS?(s8j(+VpB{!>>LD`eI0EX&k;w_6AS6yhnDKG3q
zT#@K-(cTHumK+^ArC2CDqOtUA%L6Yad3@C}DlZ;63?KkK+MN|E783PI>%WCS_@gXb
zv4YZoVcL{~+fvq%-G;0_K#7#G;*KRm)r^B%vTR)(v~+6ER><7iqwb0<8_Ad45f-N|
z7nkk2rO`TJB~x-@Bjc%)=bB$U%S{vo-GTk~xaqm%48IC6*F1m&ONuvmU_AqhyUhi{
zP5L~>=hkI|lJ^lOsyO@C+5ET`XMQpnKT{SgVZpR<_=b0)`>>t7^4v;{j6rdq(fsz0
zlL$2K<FV)8B}NX@<}PJSpJE!%9pJT<?^DyNa_y`oOd428;>uB8eSJ0O>S9cqdk_7h
z8N!`Pi5>r*m)`%L@*$I1IAa37bTgm7yOJ%(Q!M=X9b7wlI6Z^EXU~scvGV<|$?Y+c
zMYm4o%BFJ`?E)=S$c%Xl8P=nSx-)xNx$*<z`Mp>=|0aqfel8sOk#|1$f~JOxY~H$)
z8G}Y5JW{cTf-T3<!%V$p4x{>(lWM-q$M3CSe~n<t9k&xs#wjW-;D?V^^5yQcbQ^LF
zOK+J-ywxDr7x&<lJ(q82`7Hg}opjF&l5UHk=n9{`{R+D-Yuqw#0mFK9;=++1S+!;z
zErC+z&6`D4afJHwhgiM(BQD1k#@;ZOsUv!jYB<lTH{aq^BA*pYrt<MiuVM6>%#7YC
zUVZl~s>a^Rw815${89W(M|pYG-|=@H%#wLG5%smP^@|T#x9K1S-G_18q8TV{ZItF|
zuJd>-Mvzn7lSOlHrb{%y#bY~J^Zw^Fq%?+JeG@Y#4ki=7#NXe1lVd^<h;(7`&wfjn
zQjIsBeTMxPS{X5QCR0cD=Dl5-Wp)Y`11GR(=43*mg|9za&G$#!Si1aXzFhS(XIpcb
zw|pVHK75%oqAQD+%%?1(aA?bVR)4kyU#DtryKNzPYLc?zAe$v%ao0);RDUj0W-eml
zz%m-oALRAdS92*Ys2X}T3ua8jmu}+A_g3-Up-YtZ8o|=J(+F!xwtV_7pYAxvyu0sY
z*PG9C++WSlW)9$`msc}>(XSX1H4uRSM>ntI{jc{@)N2&WXJ1QDO>=a|2G)GKjV_~a
zW$Eb7#PmY+x_vzT+FBAKz}Oq-F?Dz~ZO!Lc_2TOo)l;~kM?KHH{T;=9C$MNjA&)=z
zAwrj?NEMT@&Q(`&+rk?Ot0}g9zJ`yt9AnVLncOtKA4(?9_iNwbo4sebZrQKs<ByTs
zrHr#%KIDzhc7mcxW@&kiVNfyjTK-q%b)+xt;>njjq*VzsWAO?`Ru$5CZa*)+_8#>K
z!yWmxqEV!H0Qdgt4u%b@;-CJ%e`50|Z}G|AS_DCET6Q1Z2UhU$3y<*a;VX3NIhwl`
zO(UeF_~wJx`EvIeHz{z@2!E&U{QCA=IA5QlN0FcRUVn~*^+Dz?yNv-|a=CPL3opO*
zDRB|vmZf(xtTLa(rGq^6>ie|j_vM$j^yHc6)=)X+Hio7)v+A28Y~Qkl^E1Xd0Wy$V
z!HQqr$=Iug6aL44WXJbwS^dRMXVJKgi3mZoVhFb{o<XOO!sbuk;p2@57<k=6RxF-H
zC6^d7=rVh@?Vzd6u!4%ojOs7ouDfTm`lXktji*74a?ibU_~7leOqhNveaj1o*Pdd{
z+wXI#wwVdD?q)zIgPg)rT90q!<xlsp<nDWjq~Zi~RKEY{9lk$s-ij8AoofPuysrIO
zzUUSTb2JWq{~>EO>_rI_a?{)e46Eu)-PygYeEmHP!=Sj=2!8#?kpx-}^Yn|WX*6<}
zGIIeF2AKY_^3C_Sl<+Za&Qiwoj3V+YsQY>YN~Dat7hlUqFTa84bsabMOz`>#-*8~p
zHjZ3-jkD1*C{zp_$GjO6Ir{lw$JfWrzJuYN6at-!xx9NltJZBn$t!2k^2K!ZwUO3!
zHmrG`&Bq#~SY|3r3*<5N)`g7g*M)|2`+05U`vj^cF}-gaFRuQU!s;>19UJ9^mFuY<
zHI3O*hoiLCvUc@b>^OFr-s5lKhM}dTd{Kf;$9QSYX9#pgTtAQDi#l`on>D<(eiz~5
zo-A52m$Hb;flZ(D?)t5S%7?IY&QuD58mIPdWX&hvk;&=8{H05%%u~qkQo#2c1gbC0
zwKEqoX+T$EmyYxLs&}~1*yiyyP(algFz!ZXPZ@^NTFZNHzRsT0^;D0(iJOOaA*mM-
zil5}AmG4pCn(+(-6!jRvoN=A_+iU9(Dio9rW6^aby!8Ga7A#vrRi_{)cW+?jhu_ks
z=d%15LonI^03ZNKL_t)}yUD~FDeuyW?VqjU%QJ#ZI*k^La?AW>r1q`l<87zezI78f
zOdI85jjGXi^mOK5KO805z`Jj}!k)9u44!l|H;(I%l8*8HCu{h2&sj?Qjbp|9sU+fU
zME!;w_S_MCXPOjmF5>BA%UvNnak$;=MplmEwqsYPSe-DZPv$0uS&5J{LzmS)KnL62
zdlNEYfwIh%BAmfLJPQtS8&d2wGd7WmRZi}T+KfqES$SC7{Oz{o=_0$Fxy-03zI^L>
z-urqxAHVr54?eY$nl=Lzosdz-E6+Vcy64plDh^ROat70TrFrs^M>t^gVdl7=6!x0H
z>=F4q|JcLqGx{)Z@&FVB(aL`G3^(z@laKP&`t7KZa_+os2H(E*49~s0jaiH45LW7V
z^XbR=e%C%;c<doweD51tg+k5oZM?UBKZTw0EoB)DnYNe_ks2O<_+eJ9+eADqNX4$O
z`TMW=e8Xlc$KJx2s)CLUqn)lm5h;%C+s^vWKj(~5#_VaM@o5?rRXsVs?eDz&*#V|p
zH;G87fCUS0WaGOp^2yet`2B%)@-%zPI-LuGeE02EB7qiuINHLX-rc!s%51KR*74Ve
zALmNmU~U*)MY^?#om)2Y`Iq0|>oJiVhF7A5yKwW=VSMrS3p}^-?_6q1qUZt2%DWO!
z75w2sx)v9pAP9G^pnrLo_g{XJ7uKw&Rmo@WvPE3nxSGdaTFdYo=hD3}j~nOD<NWul
zS+#BpS}^2r@97l-saP#rfB1&aHf*Bws%cE@S43X-t5`gJ2y32wl&4;Pm-E)?1Aa|s
z)4JFA{J6^WvHeh#3|l|_fL)gb`MKu4b$Ymz<;!QW^TU^T=ItMtGHVV6Y6EqmGh_Re
zlUvrCL1lg})TX&^-g0tIZQ!9NUZZ5}tqdv4WBknv@SpsW=igk12<Cb^CvHy+y7ZsG
zjYD&J>fuK?N^cfT9fTgOWX1AZ*zm>^Jo5Cb9J$<r7L2m&?&a)X`w|a5_AEP3)`Fr^
zQc+G=S5f`BRFs#XDbTsH51EtS@W?Z37<$t}s-g<9y0d)!)fa5oe1P$@Z=)(A2o{u5
z6>i~~r=CV1cs>0KeMEatWck%ydGYawcxL5V>Jw?spFB#B5mV?A_A&hGDKs5FNLp1r
zBMk^p0ugTe#m_nL_ZN8Z(WlvQvKEBk(#d^%x&CuD9&BOx@;QW6gTik8@L$}@gU`In
z=$me(la}%Hpxtj8;mj4DeC#n|)ib!RZyuGSXLD0kj7J`Pkb~L~=8o&`?8#>*$Dq=3
zX&;Y0`V0s6|Hyy++kdid$4S#~^gK>(dzYs^JjkM3uSQXV-2aDr*|z$5{^#irnYCgC
z1wl=!&-FP~4=`}VNREE_CJ#RG0>`f;7<$7}h6j)H;6s0<p?nJ04=6$@=*ygOoq7HF
zr+MZ5uSui~6n`$2<;7?~xS$Iq(U1#gl4rNYPV@X<A7{t@z5Mkrf8nh!c9~nASg>bf
zy)YE!-gyTXH@?S1FMPuF3zpKoFu?B5-{gf4zhUFrH+lHUH@MiEaoe$y4<i#JQ_!8N
z!Xm~`97D9MCp`)@5-m;a+rEj<KVMI4$w+35>xCi&`IUX?8E)alryk|a_1j6KQ&HWA
z<KMi^v#Yl=bHOYk0mW&<8OcL1VB!o48+P*aQ-5Q_&Qlnf41=aFWJI)<Cmwm2RiFGo
zJZ%6vfpi@!Uw(mDml0f59;NcCsZ1Z>=WmZb!md~uvnLOvr0)c-@2By%C!gbN+}it6
z3(&Q!i-|M2g>)^BqPD|PlfE;1zvtuckJ71g#L;cEYc=79{XFsf8v0I{O4l5XtESDR
zq;VI2Te+5kiry6FxE(yj8P$XG!IQYIpP#25eVo&QLCl%dhnkD$s2Dt+(vZ%;u@h*!
zbPivqUfe!!0_$FVnm0F`=GNIa5cWY%r*Z~VMEKy9CwXqo=d`p5R4qu;=`B3FYCTt7
zJB?0$KhqX2$A4l24?XcZ<>O~ExGa}M!v!{evw`*B?O@<_a~Rwumm$~Aq+8}7PrvvM
zF)arbgFvThrjM`Y!&jf?r8S>gTQa%Hg`J88qm?Y1JCP5ce}eaS#8^6i8Ua-hEbPjl
z9)*1H>XSVC)~B?z*`u<qf5ci_DD5?nvZ8V(jO<5I&w=D5ni+Eae7d#n;^8M=q-5+|
z#`MSoRJsiu0Vlra;YXfe<Dp9^3TUAM=B&7n%J?2W`|%X!5>;<3^hZirv2+^mKl>PO
z{ZPl<%WlH28eBZFpD)*c&X%LCELb#)h(E-<rE}T+(aXHMVK;#sX?JJOV4aH{O7<=5
zhSI)}Q^vd{cR$^MDbc=&$KQYQdsYFZgB!X71@csf^qP$6jKTvN&KsN7ccr6NvrkTN
zdO%rJ{y!Kqt8<4Zoq2XA8<oSiyhzk5G=((B4jt!8T^$#$B*+cxbg%AB=kf@5-TMnd
z1)XVV$)|Iwg4&b&xX{puf7f9~U1bJCHPgcO9s6jCCs0&DUYEgiuPEladAD;l0rG-z
z{5~BL?%1(2t)R$LSRf2RRaFW5wydYFsRhy8f)IlK<EOB2au0UyJ%LXT5y_R0ArnrW
z9j#>wMk`pjcp-Ji_K*n%$klUDRY=BbIDM{xmcnN66%hy)P~dCk$k~h3G#sX`rPS4#
zoqSuhrqfB1t#O(gW3;CH=z5r{?%k;9>*Jn#D~T3$=IEISCDmP7vS2a?cJITl`^k$0
zFk)v}`{iz?FT0;<&F6XT#aB3<^4f!ACPtD&`ww!dA&$@#{E=#URh5xeW>Y;;p*SZ<
zIJbz>e1*@CpXOXsjB_<j4)5}MEd+EQat#Zo^=0?oWBBwSk+6@#P$`K^M>%q?o|LuT
zDVzrlS59*LR1KZa)etBPN)Bq<yC4`YrK)=g!CRLyQurwdr3mVR-8=TMY|JQ5AJ3y^
z-zMrs9)0?Bqr7Yi_xH(AoEKnMejxy!Kw-aS#d#dsc7&QsDUP10$?{5<i=v_u8crYJ
zY;6Mtdyg_|L|4Lj3c2Zv>^f3ITvKTVayoURG$+PO`_9pl5wrk`KkTW`@s4x@hmT$)
z6-!W;^ifohPnVIixv?i~Kh}V%g~<&mG>bGRkDsJ1-9}xrL2gK)w0jRO9^B5k+GY&J
zIvOK!luc(<t{Fd`-aRy4T6xr5>!skXQ2~5`94hnUytwBaF%)7gpr|2cF1w4|%ey%p
z4-gFJp=qFqW{w=aNLy<TaU(#`2Z}Q=pg4}Kkm2yY{WLZ=vHRdThE){PQr3@XslvVY
z|B6sy0gbhVC>)cdDo>BNo@DoWGl}!;J8+Rp6=%?<bpy48KHZ}jvu<NlMiB7_3Ho%9
zlON}!(r|7c`%cu7P*jBQ(YtRiic30i|NVW56z6cbr~t9|5btfN=g#|oN8<86o_O{x
znr%Yp$e&%Sa9^Epmn1mPO94fYOSfp6mB-F-<uqJQ1t`eRLDaUkw+nlE$X?2kNpa{{
zJp;#%XTtOeeDv`?8crTU?cASbi*Mn`o?QgB0HKKKA6mMZ9eelD6ic9}3Q8zMTl@ki
z&NdQDpGAZx6Y}dcTU(U4VMSs8cONr)`D}jo`-$xSYBg_v`XfE6N;$B71GSCKh^7{c
z#}p@zo#WEwD_p!1Cnv{8S!Fd{x|Olw=l2rKFDAA>pQ55N>dzhKa{U$d9yrEG&s?zV
z3)$c8B^Qe4SpbX}$4^`&5l<4&==gm)6<zW<y6reM7tV9^^hFFO{z%hZD0D6=;mYX)
zoUg6p(Ej7}>sd;2-M1X5^)qD9Kn7GtShxNtk-|w-ly~8##dn$;jvHt3=^Bb?<Iw)2
zT&#~FG!3Bz$+R_a@W4r~Tqz)))(Hl4>D{Z6($cBi-#1NZevsTqE&~QmXU^~_I}csN
zr-#T11<37O$e9ygajE74hmM{DMI~8xoX<9&WZpf$C4S{7FTe0QXW9(UXb=j7ayt`@
zpW)EOI{d~yeAkY_ud65`#o>LsxL6-YsH!!{<GygE0Ezk<E+ivNm@=KIlREL$Zo%Pg
zUsEyZ2KH~>PyLlk96HrN*Dg^w(@0y>8GhV%iey`YWKs|aLt%L@N^`@k+xd5CQ)pMW
zLO>u`L|)q&4qR-&m)wm$Ya0GQgj*KhPF~#s&L)C{a>Don`4orSSbOp;XSE=g>PNQI
zyF6`c4-R<VJeNnuvSt9YZLa-A`9&q`KS$th<<9gwR^A|7pLM3-vGL5s0^Qq`4IM5}
zICmtAH7hU^IXU;U3kgZ7GQ;+0r1!qJ?)E~GM&XofDcV97o<eQu3?<<(K3ylBYU@DY
zf^%n1aBzDA&#w9g;SXWN>(DCm$e-Mc;=GMi_vu0X#Y1F-nRqz^utFixdWoY453>5T
z7dhLUCZIumYZ??kDO7TEg81~fbxH)ZC0hwa3djirXf|3ob@~c7^yx`-=NSkDXsK<Y
zs%I5PHh;oLo9pO3>1KRZvLRA!5D1eS4$;z_ByH^mZcisF3Pg&!lb1Zsv+sSxkU6~=
zRHVDz30x3Or|PLo=Fq*QGl5Rs$jb?O6(h@Al9g$MDbAib$<F#yy!!V|sDTh|%?<Rt
zW+_d_HuCP;A6eXY3Q=7H#bE!p@3ZZ5!Q5Z}6XW|;aO}q>gqlN9QItUUN^<n$P78+N
zj;5vC8aR6BFo)N!Wb3I$f_|NbhB(6-GW00#%7wblloUi9UC4Y2q0*ze2dB2L=fkhh
zQ#tB-{JKWnxyuAP^{28lpPH18NXFy?-m=&=NTdw%qfr9BdfE~-965N1^>4n+k=i6d
zU7@xqiF$Yssax-7R#_{npIn7Xnq!BLaeD8&e7xfvfnbp4x;lKLnyBj5ox++XD!X)I
zi)i1jf}p;>hTPI>N~3vH_UuOe`Ok^BG!oZ}=+mu`BaH%Gw9%4ip{6y&!0KYQo^Qpk
z8Z^W+B$65>B?Z*ySCSv}o3^1x>D#x0uWMs;4jI%pXPDT#f^R;2n5|7ETsz}t>qLw!
zF(`2P>=`;w=tpt%2U@iN>6S)fZ5e*}Y9kN+%e@@_>_tvC2~<6+1BiK?(YfXzLwgmo
z`9gw#E@+HL7+5Cw%Tw>+?{g!ICKbC|vvfd$okTn9VbJK_vnP?<J@oEg%$e;qoK79%
zh<=X0t=fnZ2xG)*U4V0d*u)#dNF#g!a)W-F4B<V_%8p#(3P%o|<byX}<YYq|IYEuf
zEp3oRmt${+AqDg_CypIw&+*N?ux<~&V3=g%Wl;TmyXHAQPWt%cBmYhR!jJeak-(o@
zNHiLyM~@182d%m@X;SFq=ZEpdVx%+XsANX*5eWsjk}^+5$i_^CIOm#m`j(e+)(BFl
zx6#<#D!YcusI<Pv#1MlchmUc`{Zshl@82`^hHLohxpfp)TutWe7FK_-ff?PdrH?da
z?wyPh1ai92tE!mVSP!&BJ&87VH(D98+ZxaC{G)#%+;bBDy!2Yuf3<_tr)!wrrzZtF
zP9gjO;`OzzoUzwQ3oe~M#epAB@Z9U4WB7s?iAI7w0_2YCPN%#AdRA4U?@=()Z3rz)
zr=lpK?v>=|N6e}WgH&1}FPewn*GM{@MhHl@CCMo*AQbS^mQJ@z`V=L@`I=@%Ra8*W
zIYmWjXU<9M@#N&CuKo(qVbzorY@>Ts1@#wqAVeG6wjSiZIrE8KKETmS2{bNnWdB}P
zz49`bTft9?#ze-f(J<sOda^(qVZ=Cg<QPYHt>Lqs=LrM@G}bmUb80tsY<Qi|j;f59
zG7Z0OaH*z_zFoUhRM0~A$`TF<0V2(|b#L<R+5pRb`wxumRm$mYXRMb<+;$W3S`wiy
z^yr+A=+%pK{S|W`O@Vs?jop#3WnlYis)^$lQp_4t%BPzex#{Yz{N<hVbn4r{`08r%
z52fkeC6C=-G`fBybE`njk*z%U%xdnq|8|BR{ttGZYe5(pQbH#`8pfAuqb*rSi@yui
zMR~MU_aRez5urutQ<CBF=ibBL?<QuB?h8f}ms@nIyOxsFD(IAF9S|q)c`GGwW~bB(
zN}jFHJPDX<-!}2y!4}B@+N~#ZDmU9y<a{b?X9DYY_COYZR@kZNPjXV_ye;BgSmG#I
zUeM>fonv2@1tJmWEQGa*bIRDlbBXjO6i8Wcb~-FS<IPx_TUDmdn#IWeB^=tZ2d$)<
za;=tahb~YxU?@$8H?wo!Aqoaw%iI}L89jV3O(%DA;P5d5-AA)v)=l*AU*OeuKBXa^
zqO#u*G8YeW<U#|A0;$$U&c||CxNt6$#*d-YA7{^@lVl7*(pSv<**DRnK(O!NNzy{0
zxv`$X*UaL^F}*o<a32TvAEJEpbmrW2E&aRYb7=Qo&NQUB{?-MIAJl{9hI$$=9%bLD
zI*ep9xz%HsfAeJGr}uKIHtxt$vG{Insi$<%6z1G8iJW8uiBt>QcAucSUr$c$_>o3`
z5&g@8Y}xuF=bD2oS~{PeCHb_qTx8>>o#uh*lDP;qNZ<ZFIDg_SMTG%Q)n&*}*YL&H
z+o&9I9kXw|j^P7)Q+sj`2T#{Ae9CODA3Km#V-2m%m)L$P$>J4vGwa4FbV{D#{dHec
z*VqgNy;wMR8WF{y>B<SV?KwuYyceBBExQh#1y#jJw{qrUE7RxBXUfE}RCf-sfA4Ni
zTu5-sqQwlY>OwkE&zA2u)0nbf9k6vTXlO}s&CT<<dT2GRjrBBKImOm(dx+&!vUK4b
zMh@*o?Xd$~Y6~#1R|(sH*iN9dC*}THwjDeNWN3`5%$&c7fkm)w$4*YwY0OzLk88${
zp)8zX-=U)<5^+N1BN!OD$f^&&BWYwfd-@XNXUyk@$rBmSqm(22wsYiU4Oh=v!h|6`
zA=%8%P2Y1VmTqU_&2^UvcOS))xzj0C&+*Etbu={AbG|vi(%Tj@cH|(^myU7t>}5`#
zk1=ogawd(via_iFhfZFgITK;=@_BR#sw5Iu_+j%N`i#4l0o{5qcKmP-ZCJxsyH3)W
z2(ob55(ZRu;!1rZmyT?wK3GYGdYNrUYUnj^7&ZGga`ePW+Dfis+1#5LF{Fx9yLWS?
zHASo`&X_5a_+sVDoNP!~w&-@g6h*;ECpmVyo&|T@!K5+6$%$WJ|A|v1BK=q~?*=LY
zt+XgQTYuP0=ib9;J-nS0%__r(RI_vA_rx-)i#yie+|r&5?p?{q2~#L&*vcz^-^Qg=
zM<^P0BXe(>!sro0s6V!yGxbS#bWm{@Eoa)2M0$*3!Hlc%HJ;+o#TJH+9LnymHV_YY
zVN{i$uQu&w|H(K@moMV#31g`)P}s5e2x&tGc@tx_V3c8fyRh-QZKPEd1;K?AN9lF-
zEM{Imnc;)`asI$oYV=Be{mY**?b=DCPi*4DZ+Fq!R7+mp$;`W93~DOL<rBNub>a$2
zCPAyeD~p%RrJ(I1dyZeiFw!Umy;wMFDn(i&`;J|3)*0I)b*dsbak_z97A@k2tB11f
z<5hgW|2zhQXhmO2(ihly%sgw^`6o>~Ct3xQMs?$@H`X$6Y=6F3`vu3(USPmAv$^q_
zk%)#GQi&S2?m14^-a`<V4szstJqiknFU*7~*U-JZH{(ZD@!8ueIdHDgjUN(JSB<)f
z`&KMq;?OEKfA#^}Ph95W$>WrboW|T6ucLpDD2I0KL+#vyGG7zh51ys6??75l?_~R)
z!$f+GWB$x(jJ|3x@pA{*cjy@Ty(ck$#?|C18O|Nv!R|w+Pzw99V9qomN`}UoV{F@d
z0wa?lmI*O)!F;OoGVDEgjI@B3mKY<a&SlDwat`d<M_%`Sh;s)xQkQ1XfNBoy+{&S2
z=NWeWY_1zIfX+p^?A`PQ$7>Rv;{z3->B?oIy~eU|)(sS8PV)NdFK9_+XlaRa_4FIr
zv+iwnp01^><qGwbuweeJOd30ie4~#2$Ip`AwHHN79Xk)60aXE2qkG?e#E$RbOx(x7
zK4t9Q@&gBs*DztmJf=;az<}<>96hju({+NG^A<9qR~MR_o4I^yKO1)(WANl#xncZZ
z^mL5FJGO8(Q^bn9?&0RClL*!y<h{=}x*Jdn96U|6Hq#(Fvuw#+uIdrwot1BMx;9R<
zteR52f$axQn>0!b`JLPV1ALts+_{mr*B|GqJ^|iayP0$6FEI4_x!gQuJS~U5;Jpp|
zNUMGZ3>v_ZA2(5-G?QpGRKVcgUD))+S`IhoGi!2hHgDNWCY>S~=)$rkv&l=GXXl}l
z)Fh(Ze%m}o_Ri<cm*3!2O)cqYKbFs(PFWyHb4IXp^Y<LSkYVBSCG;=vL|d$eAHLm8
z+|axORhF>|d(D6LNSvJ8IQK37<xZ8;DYVAW6({SF@)<j=X*UXnJNsxqD~}3#2XH_^
zsp{QFkd=kILyK~6L*hr8dUI#|<le7hUvh+`c$kuf<G$_cl(CqVa#-*4a(S6vhZKW!
zDvdzF=hLlBUqRCpjC2}Q^Nc`|Nt>f(ih^#PziD4nRL$IKKv6I<X;4&DRh4?NhSQr<
zH66`9Al%51N@q|M6<yaInlqWSd7hi@Ls7xVWXQ<vAk6wO*LDa+Mb}gW0+Gp(P8(=G
zpLw=nmJYyN2%0ejs%B2ZYMM$WlR?!q6k(oisc9NmbfcK`n%h2jjYdI`&SX$k6~i!J
zVHPOnS)hhVH;RgGz44Ywr!fR*nz?>aS5?v(iyn(V#gbz>W1y<0Ts2ig7#T-5@+;hC
zoo0DW(*TQZGdiQ&=Za?85<n)MGV65Ys2~i3R66788gpMZP1j8tgyL*1;n<I&;?s4f
zZCTe~?t5k!pj&5{B817KAy8En-IA?MkBWk-Yi{SqF<6}XT-&qWMwD#Xv{77J5uoY1
zDNoz(EF0B)Ar}APU#WZdVLsS@37>A-4u-3LRRL5DRb<GZqN@t&Od3_wF&{NZr!$VO
zeW83tO}>>mBSZY@&ri~9XdY~E9M}T4bUNkGsHqA@#<1+l+=55bHH=KgJR?zoR667E
zC?w1PVamZUM=DfRN7v1@c(%-Jy{cLC$i8A^(xlTFG~I`$D(2#8-PBP!BP^RT={D_I
zvvf@de46{e>AD6^$Jh01*FQ{MnMada^(hJ>ZQF?gikgKVX%tn*r)gH5Y0{YtimFR?
zA?0E=#-vjiLa}VZ!doMafpz|=G}7St2!nLm<Rxv%L;(x4Oe`_=s7Y9;fKX6#iui|r
z`6ciD$G@?sK8ep~>Ojc0;53sl?ForSgoK4Ex~3t7Vai`sUHedR?8o$fv%HKcilU-w
zs)?5te%UyKf@AxxU)y+L;+O6-u>eIg&lR=sGZZOiaR2VK5#i?R2JyyIPjkG!4S6Si
z8@GhVhgGK|kxr#hbsbevC7GuYm>6u+ZGeT-II=<2P!$WWY)rA@i7hh_4rXZz&K{OV
zCQZh`j2m`6=?od+VzWIu)GoGLn1Z6>(>2p>GZtQ}?y*{SOm<#=M3}NRurS3iG19_E
zGfwHwndMHLvZxRkF0UeE`lip)mtmOxp_#s5|8{7^9NBYl24U50%14fSu4&p?m2hMs
zgR`><I`+(>t99WiPkZ08KBlGH@^N!V@4k8Nd$#r<8B)29>K8(wRQ2lZF1_$%WmZqt
zDJR<1VZYknZjj6thBbni-6QidhNF0oLu4`TQjlG?qZ-Zc0y?N!V%|wq!QFhw+k<!H
z;8|oR8GQC{j}f?ax<ZoLkd$iQbAQWHu)ceG1480d0!^!VXD=sQJ=$z~6K%JQEJ0W1
zNX9L<=as(4E+nUJSH_7W^42ZcdXsc+XYh_(+i{)Mx(h@|?YZKV7vZ*-m(zk%WwvvB
zd9aBTicFEsmmN11y!sUuORn=etw}p{OGG>E*d*A!FsCltMjg6Uss;|F?eu=mHYL2g
zOJy9Hbp!x5@7DETK^c?B4r2f2uQ^%U>WIW^vyKwl$rkX~g_L(&&t~!Iu%9JAS$EcL
z$4u<{h0Cie8}o{XOONc@p<5PTibS`p3)$AQ^(MSOrMj|l)q!xkL+lAM9objMKI-gg
z=jmumoLH>N>$%9H!fQ*4gBu<k0~9UDz#;uOvu8IADR*Blto9vwNdk~<(2FbP{~`--
zI-=V$Ea!&2Ok1waE$iA}b|lI635Pc8qX+XDK4uJEBMQ5>ZQ}R^>*NiYW~mL2%_~wH
zvPmey<wv4KwjxhQT?j{~PSPZV7YC4(+5y&j|5~M-6E@s>Oj%m(SZ&HaE6cY1I&BHP
zg}r@H))FgMYbGU*k3&Z>!4a29s!5L!k}crsRtRSo3&-jlf517t$ni59avYhv&)FBu
z`Yc*xpRs8m>*LrFj;^w4aYpMLIYRb2IC)j9voW-+bL+cO-K&rNj7x#@Ri5IM7;ubB
zS|F;p&adD|+oNPpzM$9!WV?XZ0JCydi3X8nnDWO3P?1HuN5Zz-IvKW9QgRSemhF=v
zoA2!Xne1Uuuk*Qq1c@_AbDj>X%ah{a)uz|QE}MR@?BwKB=Gz10QaL%xcZb7q07jNc
z_IFvD-j78al0!*Mnm}PYlj)s80ur!2HcKZKz>rOgEdsl=OrvBsZZhjGdbjo35kFaB
z7$jLaf7+X9)+nimb=nb&9YB;U8&KNQ{*!v}L<i@-mp9iRA$z?A){6<Y%w74rpoOyq
zO*T{6mnFl`0(cxyM3S>(yDs&zmw5Ev4jA1y>B!YRFd{o}c{_ydt_iQ7W{0e7Tld!I
zea>6Xwl`VF+4ai*chr{m8Dx9@DZ7yH1zcZo*7u0^{^>sJ(kmfmmR68{cO(@rBki%n
z9ffEw6B#G7gyFel;Uai!I?L(W$?T_eB|Ej~K*sF@vmFN|Ol_Cg$Ygnaxn1AEdgp}S
z001BWNkl<Z<9qTfl@}8|aU}~UIuK^NSfB{$qGHE!9GdKAvvDMwZZ{rEw*3>h>=gv=
zOA%IMvQEuEa;L>jVl09H*>}z7<iyUSYfE181KF~(KXM}PNwzHC!AX2wPtCSnBpR|M
z;O?d9a3=YP)sW)#EL&$zaxEpSswq4-8B*+>$nBo2kSn)nv%$dru2=&1fVmBTUZYYJ
z5A}|Kr9Wo-w$L1nNJ6%w3zorITcDU52pSYr4rk`IgH6#olC=y~nBQ}{4&>%*2BX=$
zi}nr%pyhUE!HWC&)!mCI$<eImJf36|<uL|lXLXNqOxio_sd@#B2Xt+~wB=@1*lpM~
zjPG0Kgd#gYJC#a8Rcyc%D!qnJVbb7o2PC}7Vpi~Q0<rAO!sST_g88Kk>E8oY2!s&$
zqUFq(K8cVb^`|Vk+D7XJHY=zIn@Wv-qpxRluOe$tCmefoM;u+c1VTD@&kHz`aGksM
zJ21h5GM3)+0QcW9mqNdAROy<FYZtc3x&~_9ccYt|Oj!I$X6Jy9-Qh9i;L6aJQXsE0
z!v^(2br=^YdYEfx%%U=?IkxF;PAQzWWu*&C266{P!l-sOnOAug^QR1z?AMi_qIe71
z^3P7-gb+llMlxeePsh*XfMWCCZ^A8%?Nwm<quYohZy^LdMqST@o}JwV)IxD11~`1>
zRt)7=5B!c5w@gHH=;|O~I7*W^k?_geVZ*r&E=}3xZM``*E#&K7`P!q0a*%WDkvcD;
zy`4FJXZ>$KuShbJvjFc>7JCFm?!dTy?$NXz@T?1Zv^z7WjU|{JNu0lum8*UR3>`wA
z-$GMKUh?ReJS8o^eB#`fWaAl~&kC}xH{&J`SxV0qp)E7-WzpWZtypHu#A?JT>#fJN
z5*rq^P%$%ZnNDHImA4j(GHgg+d{$W{RKN{0uBXtaA}3IW&=@@a21fQNGN($jgS+Db
z7G8TZY>&Ttjoq$85H9V>jZ;RW3xU14a3I>1C3io-{ddi$&}WbIxG~CUC%X*+pwer^
zb&T&<jA3JuZFja9ZJd&YuOO|n&sLc8fXk=DBa$v;f3>cwex}U1i{C$RAN`AS%=m)J
z@aq@y```YWan(hZzFa(!_3L_?^^EN+LO3IV3OIR^^SqFF5ZRG5%QzLwccf%UWbrT8
z=Ye+_N36@LWrFTAc4U(5*v3#>mI`h~4pa8CxX;^_+Pr$5Me$JQ0E&~Gi7W$hNBNvU
zV+&vQIaNb364W-dIE-q#4}Tzt-|r(344^6s!J;0FAKHshAZX3^n-%D~k6_TerYH)T
zSS{~=@Ck)g{pb`_u^mX$eFOqQ`~jb%B2D+>^Z5w|gQ)h>2SE4v35EjBoIQ$Sl@C~F
zGz#aM-L^*tF4UMx({us>KVAD=#lY^J0R^AmT#KS=);b4at$Fb2`27L=J|8Hc>N@E}
z6Adj1r#@Zxq3fn>>~#t#7Tp0qKA+EA=aQ9(*tAp(9K-CXV+aKT=$eY2Tg2EgLx}|Z
z1pGc{7j;F|2m}K7{dRp80hYm-j-t}7{|Ne3<b$G`ZOilogXa1<%TE2e51-FRAmBHT
ztWq_6y2-OCN3*x0X&S*m0H4oXQ<QAH$cG<&M(19G$kV-%-%f%|05Emn^9Kk9{N_3`
z1$=(LrDFx3-{-t(sr&qn9PQ*-)pUZvAOXMb$js+6clXhJI+_Zqs?)X42<F{5k&xeS
zmRHQ9xSH#0Nu-2h|B9*+2nGoRd=?HUt_}l!G(}DtrI>2@i`jE_rvh?HdNX-=wWD`i
zC8}oHxUQ+zC>*A&gRX1=6xHOfprSwH`j<KKv)i`AvZnh;#p-ElwclP;@cI1&0)G5{
z(^eEA=+butgS!<G2nNwLJD4l@0)B#l0J>@h&Di<<y!6oy#*XffW<?p(-%Q`~TkD<_
zOgr-X17;g;w+Np<Kqwfr?B0}*&mVB?$&{0d&+jJ`44CUHE!o*vWm71G?VCXYK6}lL
z;@GYI#<`+u_<g$Bj^F3##tvp~2S<RS;Sbn!y9&~MenLS@Hx@T0egyF8<~3E*(XDl1
zy6!XAN-3tl+gRh+scrbSt|Fz3A3c!VpvjAKM^W(yO+2yb$%&RQcfmYL^TYUc4Ndo<
zt8PclUH9u^ij7g(Sm->j>-cSKZkL2wa@Bp-+IrK-G;2+yX8D`nr`sXYt<xWH_|YxA
z4g^eJw#A_PO*^yoW$Q=k!I<lvEdy6@*Hr5|)YsOLGG8`DQ8kLX^=HA%>&VM7?OE|h
z7&&$%g*ib2{(#fAuInUQ8fc2A%<!g|Jn53IJn>PcO*L)Qc}}ss!IL}*LY>MPJ7OR?
zKEJ6$1(J=I`1{&V>0CXCe4lO1fTEgy<a5`l0+!DN&~+7oLPg&Z45}<Zpqkijd4*%o
zR*bg&$c=j@RhsT25DG|kCMU_l(v{DYlWqT&?iE!v{lkvKMvCv&t>#KjcY0W-<0u&H
z`u6YaZHh3cTY)q<AT@=FS&FJSH0l<p%f99J`z+lEC#lM=%bt-_B+0k?$T^GM36SXU
zbbfGc-kJRrR-y^!i4C4aQ7Y{uR`P8R4Q^R?w+Aojavj1U+6|6!HBRsZ_Z45M0(XM7
zgACUSGT}~DEW7(Ys$(12wBs0QLfrnxKhs?_)3qX(6Ey}0KYEfcTA_O6_540>5XGGo
z9{Z2Kus@c^9d|9GG~y#CM`6|9o?^%8I^yvdskHkNv`_;~nZ2CxeM?XS0d{}-25Y`Q
z#vT9rAu3yr5|5P8vip6WUbB&K#ZZ2G|5CKpW*RQ;<N4=Tqg0IIxA)FRPq&e{xQ{2E
zd6QVUiU)pkH{o=Q){6&t{N*(?B{J<e)xyQx_uJo4?oSY@=*9lGzd$Xh;-4Rw&5Qr_
z5X!K*EbZUKe?RvjLvH*TE3T>J#Q8>)rV~8-+IvK*Msm-+cXRp6zq0Gt6##Cy=U<u9
zxtVk62%*|-Jp9~;_{xUy`(G}BrY1_ecj3*4|B3bc>YQZ7OmY+|`;TSetm_$6s&O9?
zK3)AbM}Wfe0o?QJf23Q_PQ3Cz|G~FMo4H}h&zUx)n3N&-X7!7#-E!2KT65~OzpH*m
z-L#zRD_VH`#WmzsPvSRs&m`TNpy||Bo_O&+(goH0`JW%4=IlvyCBv5=yvoJm>-eX6
z!`O4QhKkPc)I*PPHeJZ=_x*x?om5h(7^|Lnk{xFniN#{hHZa+p8IvA?Kxf3XB`m#u
zAc?j#2flfi*VpZ3#%;eOa`F?tI9J5Iv-|Pz|2&Pq`*?nL?_AQc7_sxac=VZfNQcY#
z`EPzj7p;|8{35SB_8i90Te*Eeg8zJG9W(CyBZ2KN@oi%Ti|5|RfNnY5cb|_hKX`{9
zPsbQ_{cP@<J%PXd%N8!j3{V41UHl7X4DN*C3-H;if8~=swfy4G4-&n+pG;vTHCx~2
znfJGVs(84PN9^nlRo38y(@FB@cjNc}>jCmJ2?C;q2mkUo=amxfx@#H5IU2!$!K$Yp
zXLp^(@?ZWVgA0<R^D0SgJK^<x+l>X351hb#_y3$jZ~l?Jr<+hh-T2F2|3Gt1D}`zU
zElPk_9{e{1KCYg72c_3!$W0#QU;h0WMEP+3;jS4NDT5qp_&@*W85+}VG`GY(0nG@N
za?gFYQxsGQ=W4wE*N531FXFF{{fUF$ZKCUdeth}dzq4-N6?%`K&9CQ=<w|W6C%1mY
z%b)y6ziSrri(7}#lvFwO-CI2O;buw(PvZ}_T|+}tlJh@)z;hq|=ydvA{}gDU&iwoz
z{((w8M#5<1rT>1MQ-SV0@bd+PREPwVJn=tIa6WGc&;DsC+qWI0x?d%)KKNh!c)Gz!
zs-0RDovY{F$DEN}$oN8h{pMqQu;~OP{jcG7civ2EV<VT3{=oCAKEq!&jNjkC1huu9
z#*6!S=9yItUh)TepI*na-<;t$|N9^L_L)DiqbZ+1{rjJ3K6eyUjUPXKjn8+ULtgS>
zkHnPr8pE%D*M-88Tvk5zAm1Ko<fhy1XG;H0q!E0v>RG-zoM!H_#SH1!58vJQ@Z-kM
zIqn<E)%k~b?43;j44!ru<Mbc+`>7Isefu<0$u#wczvbyy*W#-f$#3pkfG^!f;?jPe
zc=~k;#xLRzGyAjmSRIv}Gd%h~PjI#=m1V0z?*79+vgwUS*>Sp&3=!`A!vlQ#_KQ?a
zxRt>bQT#z4U$1_NPq!Uo=#0C$^V%MqIM;yEbdnd|`h-~z{3k>F7dYLbkvQ=k&%N^%
zg?%UUz#Y>`G{-r=`*U9SU?Um4P7*Ba$!~u)ofjW_ii@!{p-z4I&CjOt(vvT9)vZgI
zKemKF?f;RcWQM}>fh=A$i~jvf_}P65Uw^cQO&1`ipc~8X{SC!Ey0ZS|M_B#cQL2Vb
z;^%iQV&^;m#s1R`D0+~&zy1>=f|sdL3NTK8!=o>MO4=UWu)s<Y(5dGb?wNlrK21Y3
z9^<KJ-sDPard@bPK>2`){O*qx<QHnJeC#1MpR6YqYawkI|F^O;kCU@1^ZoZ!^}b~3
z&c3J9*;lfXKp=#KL|gy?6?I0(>x?raav4VSj>FvR?AIAbM@I!w5fu<bL<B(uAq2w~
zvXH&^^xmCL?`w5ez32W>?>Xl=Z&h$^@d;h^zGr!!-?N_OJ+Ez(SONP!_F49?orcQE
z;Wt0|7SA1NWbx+x{KZH2kQf}G@zg;ceEgMkomdVV_kENbmZ$joeLrPzBJ9@&cFp9)
zTe$sWdx;YxHqyj@ee1{c=g#4dyFW*n9%ih&p6~qNr}T_?2LM5=fLlNNMLM7RPhPI>
zhgcD}-T5UNfAMdqjepFo+vhTzRJ{GGAMnJXY7(QvjE(zkOpJ~&GB)AikS&cUp1+a1
zK6yO^ke_Vlt6%*A135Fe>z>aud7_`7zlE=V_b2p)XGdlvNX~%5iHV4hjU-jhM}|xD
zGFi&<(Zc7LL4A(gD3|WorD@TW$&~UFxoqFfOc{2@B(qE=XF@MMfOS|#d9}3PMmV26
ze6fZ{AAOcPt}n8;QsV>s_E(Ry@3#H?^fyi1yksE<PC<YDF~0hLzsuU2zQB8SF6Z1Y
zPH^DRn`~XRn57$cvFoyhywlV}M5R!v1jpZcjmbHeF>BsRuHAhFzkah0;+jKGJ<JOO
z^ZCkOUP<Y%UT4ooK1$V7-{sNQYaymcCSrWz!#8r~*AMX6!Fq1H<8HRCn!y{R70k}L
z$bI)eKwWzu<H;Z+z@jw^)?Ur5p||-z-*|#OxBm-^3`oiaYwL`2a~WuShi`rBXN(g=
zYfb-|*Lm{!^=!>^v!5zQaq_^g_~EfJzI^w`m|6S`%l7Z5{>7j0;DHwI{@?ecPeYI5
zK*0HvukxcuC&RWvzV=@~BNdBLIBy|iy{-K4TmQ_geV^sZRSP*i9%Ij%Jihu5e@|k@
zCT_oF7YE;|r+?fmTgs$9hs)ppDVDdL<e$I(1VgDJK61<T9D3v%{Q5*6fAUv%v3}9Z
z9O(m<ljPSAew))5$3Um3m=ohd%@Mx)onLX|XYXd+qH+qRUc+L2j=%lVV=TGy7H-&g
zIY;jQ4dP`{8OCPzVdYO*z>RxX@V&460}aX9+;#g0nf>Zfo_peF-2PX0@}=TI9{kUL
zr#D&5CqDQ-4nO)0o;=pW$L{z%TPl7_@5&pQbpCn%_Qx-$-@}_qG7qrNIja~-X<E*{
z#t$DUp<vH!zV@{T3Bo5EU;p(lShH@KeYHx_)cIV$Wh(#p`M*OizLqb4Y#%S3eFRjJ
zSDyG0Z>qKY{U2|p<mopV3a5wc081&$s0`YYYcmvEx$}B*tDfO+et3ZE|NI}g{_=&~
z_d+8FUVVeDs~1zTZU=j}R?u>~j4jhzxa00`bJ=ZQ;MSr`wFr&JU*^e|cd){Cs~Eaa
z%M;Jka^u=jUK!uW(%EHnsTkF-{G7ji>?EK4`+wt#rKSAtd=)ReHlG#qXR!6!eQcQh
zeGW7yGx~~+T;O-Fyur5Bi>SEl8uo8p#Icu#2*&$){DE&0+x}<VapgLGQ+=3E{PBCa
z_Y0rq%z&BcnarPmY8T(Q^A2kJi~0Aj-9`ED4zqCeCWfnD<=#gR&@(ua)p2XBS-kTm
z7Ih!si{E(`9g9OMg&u9@#or%c!;(3y*}0!9mQUgFc1ZLz@$mP)$+8dM!{w{yaipow
zxN*x(-lTcluyYRo`h|NKoVJ~>-u^zGKYA~>{PBl*{@#1|U0n|<rbtZ$-0-nK;_Q>(
z;?Xx6P;o_&G%Fb)(!%-23*)@>@Vy*vA3;SAYZsr<u6n-n^?#*w^C$TDo{dxmgX~@o
zU-|q0Lvr?Z?!0+72T%Nvhrj<r7ECMTp>KVYhW-SlOD6Hb8!A|L`Bat^oJTF0PR$Dg
z-1^Bs;kOU`2fsVp&*#4Md6vz6gO#`5K-F^(^3ww~eE9aybNQOtoQuV{P<5F5zWXd6
z`l~OoZvJGNPhX6{K1NmB5KCsyr{b#jbLz-xrl>wTTRXY<#_QxISF>Q*CU#x1ftQcf
zqH=N>XgtBU|Lai_#2{V(D#@$A_%XjY3tzhD(=2%DH6|@tMXKcp_doO;7kY<C*<k84
zd1UYc7bXf>wrC~SZm8f`T>{<PMt5&7&;RV_tXuNOCda3{=^cLfU@qmiZQ>hW`!2%)
z#3nBwHPXiuKlmyG%WvY3)~q1^jcTgie2L$zT+0mQW&o8+^7iwO^JMcR?)l62QeN;I
zI>!w~Zs&M^^BG=!dpb+!&1LJhovfMr6ff5fW~!=+G1zjJZ+_(~%-i)R-1MGpy#DRy
zd|!yBWLgD#))(`C@A*4eavfi|=~@m}J;4WV+{1JC{VOlk^rWBgRxwa<HoxbSEa|M`
zpZ@*(3`|6(S`=ul7-~Jsfj4KdZ2oMv?6{h>a~|VxY5^6c!`%PPdpXzA!+0_y+M<I5
zRrMWQzH}M!^(*N+b%=#iCU|{#7I$7dkAME_zhkIm8DGBh{XF~T*BSPj@E^&rvNY@8
zVHRapDMmZ0_}v@xSTTPlTdv;ChPglH(Aa#IPaWf%|9(GJE!~#J=pIBJ*(Yasm)he!
zJK!Z`egfx~wB<f-Kl?J<iE`4sxO81^G>xW=cJq0-q?T^eJ%N%D2~7fK7p}AJ#t3gm
z05Rl;!&5J@ll9G<ZMeQ{_W%izr5{sHGBKVc8Kms70)72Gj86pg_4ZOcx0G4y_VJnb
zSMb13eo1G15$p5fc$yftoIj<4&wcjey!6Neyi+&Kvg^ZxOM?;mhb9P;Npvhdy<R>k
zm+G#8^dX%A1Qb&?wT#O5euzceN6A&gJU^`IsXf30zmD^sKf05ONxl5bKYo+ep$V&N
z2Fs$NNsJEGGZCcd>FH&Gf!2^26Uq=w(B9EOBB>#+iQ?|yOVB?shE66)5hoUlGo?6>
zzJVbo5@YlZ4!Of|GqN3j$YAgv8R}(ZG(}=GLA*3ZVNn?~7A)gaxBnF>rRZrrK}tNj
ztd;v7i{(Ocgjm@^W>3neroVt`)2FlQ#*edlPm;L4$m@wTD<eJabPbM&O9vW7z|in8
z$z;I9MEcl(($aDkuUW>Och4stkI{VmMR(Aw)Ru5IH_c+&Kyq^@Gkx}aKK!S*GnP_}
zUu<GLh>;k)NLydPrm4d;UKm8h%9uW52J=7gF_vDFB(8gSXFSH#sU=+Oyhtht+-i&v
zSS*$uOK-1~xSf+*O727-{mFp5o^B{!LtH6>)G!0XNrHk2bUcSxOrw*b`!Y)^3UBX6
zwhh>6`NfztX%c<C7fA*wdb|5rKDm^+>-X`Q4^(pR4}MO2Fo!J#IppO{B01bcA_(Zd
z*iEp|wdZ_=`C^u+-6wryO^W11`sjh2SPaEDUETeV8mD()l+w}?HsACK_D?y(_n&)?
zlI2&DpXU!K&1d?$_i)Eem3-s7kI||s*j|`kuAE5pGcu+rOiU2Z%OMueWpdsq?Y$EO
zCTPYcQ&wKiZGZkhnFut4&9$U-4sShqFO#o-51;$He<P@VhJU*6*G!lx5bZFXS~i8Q
z_M@Z%4M7r=V#(FF@$oGw?tAQc#%63HKQF!5G&0i9coN1FNpkYSWmaS4A!9kYCFCdj
z=^Iax?CmBfDj_GHLs^NYt$TzZ^eI{^rcKG^bk`t(4j@U!$8M8AG2Y)ndw;^(45D8-
zs2>{aVJs0qe>ZtWC6tupF=N4U{`8K!NU1m%T8^Q$ub%*;ZM7sOU&;1O3z=TDka!9F
z=;8!dmzQzfhd;^Y;R$lbFEEhEqpYlq<?p$b1=~i*RipfNC`oK;iowAlk||Ac0&;Rq
z)bN93LscW2w_nNRnR6-6pG>N&f$<q@x%(5>^VGu+Q`?izO-s|qg{WYHj?fqCc<6Id
zqx26Y2$BIwaw*8m<;3qEVT#(%r$7G>#9I#Xm3x25a3J<g1A1(b#?~P=T)mSy^UIht
zX@riZml@THe4*{mh8;e}KPD1=3=B;$K9)d*N4j_)<C0Qi3=Ad+Qb_{RN39qitn9}B
zIM;mm&seWd^4N<nP_}Rhxw$b*u#L}Yi(_c0m(j5#!vnqK6;H}|xe^+3^Ge7W@27tv
zMQ%?I6jhR+mrrSaiq7s4v<^t6K&u>Lu^eK-C`_KuyeS3rUl?`U4V2kdlEalZ{~0^-
z&+zbzFEDx0YVz{p3{}6ycOENZ&xih!znGciEC2BCH1v%}X0J3IwKdGWc{`KU4EjoP
zC>Uy{Ta__m&I10&9e1IX;zDD!@CTA9<^3^t<wJ65e;;Gpjh|uf<a0du{PUD7*+PC^
zjPb^!{Kt=`u=keF@IM#E`NuE)Gv@~;G6z4xcGH7oY;GmJq79+8yn9e4dbX~)r$yUx
zE1CjgX8BY;K;RM~qe+HoWjFM?9LgD(6_{8GRpvhxCS}r#Ye%fxV<PZ237pB`w74sS
zp<y!YZ)~KOqG^j*FtePzoR}{?`%AHO-8NP&tKh2btEf45p4@^WbYh6%i2^oWwt|v^
zymZ;|F-9q3(USR;7UdJm$)g~iqHkz|mFw40URsnP&@@>A$4)e{`#rl@JbylG*RP~B
zcbMbH&eCxDZ60~(VSf4C0nT@HQ!sTVs;7p>9($a=yctX@%EKn5qUpA#DyCI#Wo6|G
zuG+dLeGbp)Fsb}$tXi{@?b|M+IM?l0@{+7DX&M|FVCMX#%qp8~*Zt)u7E4i8+rfq{
zn_01X4eOT9B_<Mak4puKM1qpC8O)eEDa2bq9wyg4omEuVHgoXVCwSy%Pw~ndrx;77
z>@1wp*d_EJHOA||euy8vaGp>8$t@Jo&(R}is6X`v4?Xl#p8oCcY3p{c-n!&Rfg+%>
z_7wFE4Ltp`hxyr)&+^u(y0p$rjFE_yuz2x2N()_9VCOUyj1Dz%;&cN?UwM|FJn}d%
z96Un*Sc-MKZlrSjF!#UI!>4ZBM?Bfj(Icm5JbRd*JoE^^`t7SUclA+o@(io5-ph)`
z^I5fF9pwdahI$8>wrDvkSFdIB+6w#1s>Jv>g;S<8tE`l~oN$+y9;bgqQ(0NTl+pr5
zhuXN9Tfwe%m2A28YK9tW8BfY!A?Bx5jLY_alE3-P`^gIeq8XIb*}_TXELgCBg$ou?
zJ}IC2>S|VOyN2aUma%hN1yxlo<QEmA6N8KdMQqx<n!?;%db(;+Gd8kuX$5<(+JXw)
z^0d`mG5G51>t(@`6-=L6iacsc>P$cp+jd<^#gdh5t|;bQ{Y8pOiW%weK^4tm!^#EZ
z=jW#J8SW=GeF5|5%%C7wkyluP9_wdlqL8gy)=*^j+4y5il8GUXHz{s??-u6FTg>_u
z3sJhAw~jaS&Vi@-@xzbt+)IZUN{lmO?i?DAzRZuG_#LyBEFnMKb*w}Jhl1MU$5?jt
z^{iSnkChuQV_IH}f}#?}hx<sxrm$&!C58F95%cf>3ZH&UE8o~q8|^IM+6~Ltv}ZSo
z##+Y46C63y%Lm?jCG+PkVEvk<<mbTKM;qDmo?R@OH=lLuS5cG`qql2_%C)OlvwjoH
z7fyEoX!+Op7~xRE(Z;+bm$80zB|EOzOl#eF+MCZ%+t9+x&-|Q6e)daVdE*p`ARtI3
znTQuLe^xm~1$m^#yJ;VrNkwrh)dTaGTi8Qye>+Ex)lhxpU}(dK=<4f8`@*wt^V855
zHnjKPeWcvXI5hmUHPy0w^Hz?%a){+yH*=w}5gjWat`iK5!kTrfDJw1%F3Ubn1VB80
zCRbgvjTP&+Fg?~!SMLZ@rq86~%prdI<cpNfSwK-d#KWGd4>2`CZDSW(HZS48%hhbT
zd>Q9!no%)DkQ!l>5-JzXr?e;!4NQ!WlUG{K>}iwJuW|F(a<NpAVqkcbISVV9KBXj`
zu!%uebV9+6j}%2k1q=`NQ&cvel}qN5m+R)%{MZy@UgZWhZdl2tE$eBmu13ccdOX3H
zDyE_$?F)zdTj?)Y!c}XRv2FL23^z3}IM~m*mIV9v?O?%zMXXs_K`xpgmEg_i9^w1H
zKFcTn^rMu-P1rH<JFVk|#f6Lv_K`n%E^C&}BQG~jZs{}%hMV~LqmR+1r!sp|fj`&{
zfAn-UGPz<i9q$}u#@e0q)}LW$u$~iVTR8OG6Fl_jFZkW>-yx9-KnDyCPOxx!1!bkh
z#L@y}q<@&%m6c4JGAZ0c>+6$3QCL#UNPjQ!k~yqjF`vBL9O9)jC>d+w@t;0S3+2oz
zaWs0VqD-#DcksioTlsW0XYmrWAg;-WIgC1(uko^J)4r@-&o<#oA9)}eHA4|WfJweg
z9XW$xH#XblRAuYRSvxtanZ6f}*hQc6oEBTb?41iGVIOc%f`ISx001BWNkl<ZiqU}!
z4Cc;Z|J9r6I)9E4okM411IZZFH+B#o@1?$Dh{SL|3s-Gsy!HUUJb0S!w)3Q>ui~nW
zi#hVj%k&KPQ**u(f-xG~hq!Xjb(9C)oT;v+Wgw4huh~Y=nb)Z88l<+So}9vBI%=!v
z8;42xnse0+oIiJxWcf07U$L3kPzN=Q9n_yYP0q}f?B2PRNwHC?Yw8)#oyyMLd)U6N
zg14Xl1#i`~VV|P0f`!rEHrgkqu=|QNG@Lp?eO)z8ZLPHQXtwXz$c5@tG&DC**K~oL
zf)XYMIym3fLntT|n%)bolq|fAtJcn=rmC7$UNHl$^|TKrC@spRp|+O#nrh;+SF>@+
zO!A5fs5*R*#-2pjhA-pMzHao?3ij^W$Y4_~ox>@Lasz6r8qoR01cU9=)HiUtzMHLA
z?PmS5IrOwP(A?RFCfv8B+(^5isF>jk^&CBRlGuzT<n}f3=G(_9Ubul>J1(POqMz!z
z^CW^Cit`mU)paC-v=Zgy7ZDrnrnb3@g5qNO+Uu#VtD&hchuwRxp<+e}O|?~Y4J05r
zLQB^e*Ic)k;?Z_$+j_l0fl`WKVuZ8j&a-0cRcu?gl(CB~G<K)hxMnWTKlKzR&($({
z;R?n(YB_P@B>D5!vwO!z3WGtaY8vQjtEXwSls&t5QXU`XY}I-CyW1HnUd)!|<(xWx
zlJgC<bo7sr80coacmda4yOq(_I$HZiQ6y<?>SW`t{Zy2Ua_n>sr>on!V$WWtry6<i
zv1b`bXo^dVX{kQPKq^j2?gUlU=b6xo;wdvJrI%AxEd=441)P&f+fgj9kYy`Zvu5=g
z)~#Jl&R{FYkG{<XW^>)uo2YyB7d&^kj^55@^t4r6wP8L-4!lUu=m;lIoTC1sV&C4&
zX*qS2#@0rvo4do@U2b?tf9q~(VA|@P>|8m6lV@wmFHqFgx1q-SY3v?jwDUYYiGYFr
zaW-GRfum3VnAd7A(q31?f~|X5JuAlZFC1d5zm2BuQB<Ovma%EPZ~s*c*1tnlbrq?Z
z>)5@yf;WEq8wSQlIa7auDJ8j_J$a6xpoD^ni=3`+=giS#EZVw<9qX5Hq5do_7yCK>
z_6b&Bv4<TSR*>j!rlzTb;u)3f-@lhNv*SGalb_MtKNk8bGnh`>>1cN&)jhfFyKWb=
z3KE<;T}#{f8cLS!VB7L?4!rO(!$aNFcaBh+JI<M^MpR)b$=*g9ItOe!#((SND5uXh
zv-R5Rn4jCp0}uU*zR?NlPaUJAatl{)UCYRY2I^aTXs$X=YWhlcU%44I&_>Pq4lcBw
zXG-N3DkqO~@@x&YRaNvSprkmT+H+Nm1)=qMYd+~rMdjq769bejT+gJz(>(U<AqM-q
zINi|4<=0%xy2?3RY^kI9LO;Pof{wl<yRO^A%-mtB8(JBeh|}9}gkudC8Sbd!WL*c<
zr%zM9Y%^D1zJa_%FVzjr)Sf*>Z2AhW*}0X;@iA&@>PW;2Q6m?rZRw)0xR{=nS~>?3
zAvP{PBu7Tc$qRV%;6Vm7oIZAxrshWan8vOv)^YymA)0#nsjF{>yke3A9W=D{q}hz;
zvi|Zd=z#=_7f<Dx$9_s}*9axk7qR!cJ*=Kvz|%i}g8GX?p*-Xhs*1$u1o2c4um1iB
zNj=7qw@)%W5uj6Jv~`ZKbI%?oPjph<(!<1XKLhb;T(|d1baxZYJtLG9=1^Z%%Xqwi
z+=)JF&bJdN=xT3c=8DVNT3N=qsybr%h4eJl(KQ-SlCP+%u3;=?Y%APiqykzRn_05y
zDi)U}c<t~Bx-YcT);;WRl_I@CUr&ruzF;*4y(f9>+1E*=fYd0>og-Ym?>b7yI;d)B
z;%waow(q`{vhfBU`uT6@A5Bnq<_uGpZDr@C6%2MX(9qUPUU3OyT@4&Lc9K-tVoHWv
zY40DCLlczJw4QHd@upo=PEB&~&=LB&+h`xkWyh{PT)AmEr(b@GgQuF(F(F*_Gjp%0
zi3zldapH{wbS47oj~}M_LN_N)H?r-Ty<E0p0ex)^oNw<zYfW2oGb=9N!|GXioT;iO
zsiC{Qndz&p;PT}&s5*U`M36?%&8TbI>g!mrWj8CP!;1&rWTdZ+mi{<9cJF29merhj
z@fW;urimbY0@ayLIAbQ?_7Ju$pI$Zt2{#d@#KZU6`W53cy(L;LJJilrE0!(O*yPJt
za@fANw2B*9d*wxg7~qBCh&na_w2v0MHGMa2=r8ej=fatO#FKSVTJ2aXNY4bsVlmsM
zeIidBoM+mMsqk^{Ag}@4B%JB8LHe<8rBEscIv^0VMroHbjDcuVsq{3r-*;FiTyj(4
zsnS?waAHW$bZXbPGD)$c4?x9Y5Wa^mT`YWtEj+(12>UYln1E~OE}mAw%{N^~NkK7X
zu}<#0|4}ZECI#-wlbO~bkKr2w(odtNanf25vYlQ63h8rG$cEMQ?;r?qA$(7t{hl31
z70NEHBZdxz4jf;$T~zqi3l+|K=n%K`DfCh&CFW7tT<Xj=BUeMCQab(aU8!dXygV5?
zy->06jS8XO2SNI+1o2Q_X&hDhRC;tKJu$PKqAecgBll$Ja0)xHk8GuRG*A5*Ju`aZ
z(GB(qPG<|s;^^Btb~tL@4;hO^Y!YYR+Tt3E=#H`pqUqb(gVjBsM&C9gXIdG##$wi4
zX4;{{Cua2)OS@rBDV2UZOjf%#eLCM`Wv)y>Fe@EK`E)32p|@cjXA5b36yZCFt?p&^
zZT8@Jz9X~^^M5lyORG-AN148(HY{~0)D`D@LcC)NCFnM7ru}2o7djcaQwm%d0qmjJ
zzHj&2lk!jNl=DL>w}?-D@k`V{{54*zyJ+8}Vf?7&FO-Gs+fL#{W0s9mf%Pc{HcONA
zkCs>8Z)Vzu)VG)o_*QzMT}AD~=%(8umF~;fj1K*gbHjn}3k?=VRw3(J=w8~zyF|s8
zmk9jErll<yeKh(M@xcb{F!%9&mQwbuUs4}q0xRP)V;R^&EEab$*7OwdPm)IG6GC5T
z^w4qWVuzV_Q;rL(Hpa!7Sgb^9WZKfv)XPUi(yNqmuif&!kuhdToS&k3-P#N>%9<0T
zUGR}in7dg5q6;Z<BVf8kKY7GsPkSYl2W-avA3=i!*#S5s$J|ga63z4tWfKL7m&M3l
z%A$fIphzBlf#95lRRV{g$8uDfqQ3c|1bya{p#&NHI;q8CIZQ1pryv*ly1N-pOgM!`
zN?7)puB#)mkM``2P%yfnJen^>BL<snxMbz?63Xv!khxFu59x}u2haE-eVW9@>X9g&
zO^?VzZA7fr8IuLGY}`tFik3`c6}{s-Bxh0CXp3IUY|Gj&nfjMmK8v)hL;FMWxkO)9
zUmUt_(2|W_JX|iN3%=G!7n+HGrjPQ`_kCQpc1;rOn$O|_Jf9^8z1j8O_k8{pUz^{}
z)(kDuXL-D4@Fa0@Rur{!zqiOh9~S_ld?|q`7Ud}I8byraNs~wp4>Fbttj<ecE&I$y
zPo_kpSV<fsbY!(lNn^I_m&WQ+_8E;m+1i&jj}AFdKL1gj$D{w<d_>rnaalrp{2arl
zM2S-A#3rn1^~=`_yqMr@7lqg<;KxMMrW|BOW0;SJ$aOMetV31eYIyNth4l$o`!;O2
z11A-T*+@@PI=o^ADHY)x8(aVv=4{)O0W6Z_cnO&6D4Nrm$6?*8GYB2D_BI$AuWc{6
zNROOKmjdt@^q4hLP?B)hSHyy{&H^Gv7)ZdQ0Tid$+A*kuApIUYBMb#Pl_C`c#NzSD
zNGR$aaabCM4(YmP<me!FtjUZ{u$me4CC%yg)qzUCXVD~V0zNb1_MfCl&w`}i*=h=F
zQQ!Y=XB)z4W-Fcu)>28MpJ<y--$ehR^%|WF!uPm(qeTf-mVtzalZ8?fJvrTc34%b#
zRasusV$X2nL^rWY+mgZ852+?Fxd87G8qsmXf0}PIGlIh`3zPnU!gsP}uSa_N9`E%u
zD3z&a;<|I>_+}{_yc@|19}>17b-39vl6K%jo;j-@;t;{ytvG~@*gBcGNc+l``2DxO
zo4Vm!Orjk{2RahJ3??(cxCAg!pNnf(`tT@b8G}(nr|2tVPQdyvU+27;OqA1uC{D(2
zMgym`8&ChT^&j~WRYD^7vS6k|%si>dCdjm3H{%+CJR5#l$aEKt&AI-4Jf)YABGK;M
zoFaxfzmUbpW>Kj>_z`t{_yo-%51tQ+C{HvNcz8%371qfdqTtEIk2?-n&vcCLSXuc?
zpjK{@ey_fxD?^u}7}?Il_QIsiOK79?NBH#kLLrW`GmJTfGe6zw#IhBWXesZOb3T|C
zDH!p%*WoBv6fL<Dd<t;1{A)W?xPYDht#z6)`_~4&@Vj=)H1@uB|BN8*Z$ne$yPbB;
zQZ_2k{hz**T}x)Vn?Z`z*WAk2@BJR1{lHe@Du`6{p~TV{mMU4m=kNMUW}DrkIxQ@-
zEM|E$|HA(%BP&U>2@qN{y>dH$|1V$ROP{-q@`AV{$tqd8{M=RB*>}b2a5KK9Y{d@l
z_|y&L2Pq+>&=7>+{N)YMR;MI>8k5L+a*F)3{m6Pcy*ywXt(<KuoVkX-y5oOPo|noZ
zS8Zv&q-?`~Rt_qnLuiP_*|6{9{MUEx<um)%;T*hnZ=;ZMbnk`|P8AO)tt~`SAsogC
z2t2)!_G<Ln1PkSw(n3c(6BpP@TQ!(jC!~B`v$VRUE^Tn|J-Z_ngQvSCgSL~C5=00m
z?5lyOBl^QRJO_#Ut}`%G*jLkKb<&o7+Xh0mUIJ|gb&?iIuecU^aI-X*Zpt`O-^Uo&
zXY54sX|dDRok#T6`E2JST)P$yftj~55!R8uU-};eZO%o@NMk@ZeTP2U;A`wb;O_8|
zswrjYY{(2$JjjfHN7$9&NBQ*)jn?x?Ws>@ZP%-UR7Yx&0(i_o5d(qQ<H>2xZlK~Q!
z{6vl3`WGKtL|sFd#Z0-xA=;EDQ$d@(AH@}C3&uT|c0`Y%2|GJeB2kmcY_<WkWXt@H
zn4`nRv?Ka4vWPw$<Kl(!DVpd2Uk(Vp<x<>ri<$7T&L2i?)Zw8_l7!>O^k-eKefSic
z(IEU}4rr$|O3ajat-9W$GY08Sm91|SDovmRME;g-+Sxq8hzuM~sYMciY1a_yEIhYx
z3QLwOq&PRAsjh~ufibj(;;FM(Rxyu>!3$K^H8T+?<}F-6VjLFCDW|>R9Bti06iuDO
zvWhtjU1+Ah<pL=kkY75T<&_JGPYhC9ThHKFlAMA`EM2yQA~jA!T@5`$<Jdi3R)m^K
z<<pruZ!womnng=pH5Ud(IdkA~9w?Z~`=%CI98rpCb1GOgYZC2^wX|Lw1P%EmWh|>)
zOnjoB>Y4^dVtLG%IgQ+yX3FeEOxArgb@Y)}QqHo<MdT!hsH>}IU{pK;Efgu1TS!Ia
zQc80Y)Ku3pG#)T#$!gyFkq=YceT?T{JI;tX*jVZC9j6*}jOp_ivwPq5tSe|^U@S(}
znX|<6i<voVHp^D7BQ?=SbwdluKvPgUjb)V!ndtAJ_IxW7_KXrE+O+QE6qHiAw36c3
zIF0pnTpUbLJY_l+3+E9}4O3rR$KXVq1&bFkr6`Y~;W3KxG&MDKjHMLQ=PqX9%u?DK
zYH9Bo3KKbPWubsrZUHl<Pb03Ne9mHulif637$Co-oaIXwksP={bwe}B0H(~COL074
z>de^${q0mWU1a9mIpoFjDVvl_eRVZ`qZ6oj9t$d#GOY;eYpUrTnc$rlALR!*C0swb
z2o1zM`^g-U8{I}{I(yF!Of$w#!r*hsC3<6*!b!^MZ<{14Cx_4&9W6EpIcJ=%Beph#
ziL<dsJL=Z1y=*fnnOh0u42$p?1_QIS;JqW$%}B`kI5|@z^}x!&U}3PA*`-Ke(g_K!
zzINBapQUp28vO9exO%32;UlcCH9&_gxr_wkqdkY{in`f5fvxkEu8icz#XA@HOb}C2
z#zF)Vb)Vem(wZUF&=m4&bcIl!A`DA9tpu%}Nc-@YiXBAS+J#pY7x-i%D*c1SK}C48
zeOtkav`6ZUi@1_knaVhftTT2r2%xPDBDQMzFiE}P2`Mi#vEs51LApi4wBP~*j(5S6
z+wZ8X?^4R?Lb_dl8P!jCOrHi4lM84Aflcz9?)Z9Z>2Zw+e(>b#>S`fCg-e!15|l*;
z<>1SXqlBJW|7Ge3Ut{gScC7@K#<jZ16brmcmo?;?KINglPSxBbl$@E1mrX1GvfN*w
zENxl93LBONVinWN@(lyV8bT38>qn9~k7DTnq%##|q9Rv1U|QuSc3rlFqNxkH`TbXL
z`mMJJ%2sjD-+qqLV2qhd7SmgGj=p4^JO1fDcFd14IFUz5&_nZB8TZ_C7n4=MwKsnV
znom;QrTNpp{sPM<rzkC-$7oLz9sQacKYbThR}>N}nMK}MC(T`h5k*QHz^W@h$aU+c
zktklsE!VB(*x_Rgk0+V7bTi8fdU)gXc>)bHS6$2h`pXXzkC$-cP1n(Q>`nS&Gr9AB
zeSyWr0h8t~B-wtRj`1S)T)BpZ&Ln?y+xuuc`wp%BasK4azh+fAl$OnAtha^EzR|RD
zC}-rva=G@l&vDD;^O>}0GkeyR@z#-ZOrE`nbsINPI?_Qy`vuzCyO_`d_b@S^GJOtf
z)?dc#`~>IQE^wiv9m*>B@cXYNn9%II<z_~zkI<GZ<IXRBo<$|Fa>sSdO15#Xx!aeQ
zQfQUW^?&qP_O6;jtZ*6y$!;1thgi1Z3N|g9Mfsxj?BBM4<7axg{q8#`FP+KF`_@pp
zU?X`K&k!uw#oeENAE;vX@4t$cQ%C6@nXtNNVr=fDIb6AEF>U=heEdV#QGenHeQFxF
z-+ecglb~|z9u~$sId#5^?YG{==RdTCp5774Cgo7yF~pz!&A)Qlq#<$^Ze!2NBHlP!
z&yElN8K2y@230bj{X181?5$%Ybc`958(Esy$)PhXn1o9@N$1>z9jGuBF@Z=papO&_
zJ;~SsXp`V5W9H6&U83lPD(7&C+&5383)^$Kgfn_uT-dr<evl(-vwgs?>)W>rLJkYz
ziY?!s^+$r14KC&$?&Fe~OgYFe(WOEJE}HD$t!>D*qRE($flLU64|Z)S*_8DDUPE40
zIU^=zCxegUNAzd=ka#HfSC$Fw$n}9otL4vcUC5dYx*bO8maQI2I+Y0>$_Bt}I^gh%
zByjeOF6GB0OPlP-QD<jhJSI(_MwT)b3B0sSL}edg8{st@xJ42q7iuk%!XHGFYZ-H`
z$OZoC5R|!Onvpa{?i&iMuMjYaKSB?bU)M`OLVi764CNu|kbA65pE8+I+4}PJ%%T_C
zl5d(3xo5~PGvW30!uVKEZzFhMavYWA;9q5J+@G?_+-Q?wD>&Ozq8(ExCTEJ3U?^SF
zjT)SS$iAGB8otx*%M4$HZqc5hJafI*&x+lPSlwP8yS8t|7Al=mj1BgZGi46TD;KhG
z`DM(X8fW>Hdl)(T6!$*(5Z}M=N7N0DqkvS<&od7{%u~O7oR^L_F>m#D;!Uscy&pcn
z51u;1j_oUm$KvGX#0kbnIrio&RJZhjQsm_3Kx&-o<A*p}-65bh!ZB=&@#4=P;d|fy
z4jp+5sF*$3w^AU`F)rJ_nR74wjPKt6JzlQST)uV=vsQ0oTJKT5b^n9>@csul*4hg|
z(d-rc-4{Q@Tfh7fZ`XIBRg4@%<6Ey%)!bvrkVc+cP|l8Z<vjSk`?>c!KO%qe24+vm
z<@~8PIdbki=Z?P4!8cDZlG0WSY%EZk)|z)Xe4?7#vqyR1r9*TLkE77ERvqRi4?e&z
zUvH&yei`#tT+ZTx4qiO?28Z6RW!sfo$c-uYDm9B%_|C!91l1?s<nY;6VoEbKG{ofD
zi>X|+fMu)KQ&t$z-_y*SZ@k6%nzOuFdx7awW^m2Et2ux4Ag>)dMDyrWHm{lQEoaDt
zVcL={eDU^=^2%f1=S0gOb1FBoVsbCPIq*8Ky>*stJGT>83e7mjUVfIxfAJ(w{N{BM
z$$-(Jc3ynu8J>Fn_ms?9#FR-h*mL!2UVHHc-gx~Dg6XSSHm8K>M~Av8m-4fITx9Z>
z1Z)DGt)}wgv{`;JrCn@u9$4C%OwO{d&e##SyqXydteev5Wx8}#=aAj<Lu8gL{N`p=
z{AFY;`%37coLO`@$lw~K&$=9op|;pX3TI*v)^iv;Cn&dInD*>O20zR7>pSo0+`Ekq
zf_VsMJY0KGlFdwpH`^!W9nlBlH4P1tUado(jNFuR`e*{NW61+a&Maimh<1EP_AJjv
z&$0w!@iEDUA3vqOh<!|ke~%jNKnT0m9!=)EnDxYjDuKTU@J>`>e#-)_GV!p37Fjf$
zy%F|n5)Cgg%cA#giPI$t*zQe2E0_N=BDkn;QL_9hlk8~l^pePSBR}mFn7S)oozS4I
z4r+w1M&;%jv!qCC*0Nn~U*l+OBZ^fdv$Jg=+i}SWUfuy#h8`Y<7t@8GkvW>R1|7{>
zWE<MMDlt|F%(THQeLoh>`Lw7koA8}^<RcW!C0b<Yj8+Z^n>dP7`@^2RZrmQJ%w-s~
zF`xAHAeqC>pZpX9C!XaiU;7qkT8D_o(vJkEQUNg)vyU4C=+Qy?#{*)qc)Fa5p>+T%
z{fNAZ#mNc!`0jsvh2Pf=aqH*4#7C}LNjw<kiSPagPaSSy*KK$3*_(HyUt8idOR%I#
z>GV+yf$^rctQLZZ*+>6_AWc&&rU-&m8Wlk|qZfJn>DRb=-(IE`#EGT)_`!dEm4ns&
z-24|`;I?bl5)(=(jW~uT{rEyU=*sNgn+-_oo9EEnO(kTjNsK3e^jn-&ToH@M$tjq`
ziZyGPT{ytgzkM_P`Wo4mhL|4Z=^y<UKR;B@mA8C`J8rv%g8b>+_L28-^qB|w+I<hx
zF_=QdjIlaHN#he^>a6*!UB8O1GY5IAy32Fo?xRTbHu3aJ?{MvV_fs6##9}$*6qK-X
z&06M88snGG9ZKVroS=Vb7!`}9AMgNlFhMfqM)E2aBNmS{XYo>2uUNqGm!IK$Z_>(M
z_<590zev8cqzF&bJtydHxDmH&L(2^(k}DhdoCA-9TU?-w?g^9a7`(!AA~DI3y@^0!
zvvTyGkvN7MvP?3TO)_p4$JNQ4)5-jpeY!4pQP#OR8E6p*mDsl<X7NNno)5Rz1uaSr
zM3m@L+p@9^6QJxrk0*t>ZE#4l@B2R)fUMt=!4O%_{W9hj=Ert6$D<>%A15sblc0n|
zTXKA*iLhi77ZnM@Za=jWasq@sdkKLG|Cc4<HGr8n6{raRf)kf~Mk1?5)JJ*x>yl!^
zL5T(2w(ll7<P4Wc1_U0yK83v~Uq_U4vRRZ}ShAK^{8>m}t|D<UiJZkTa|;GS2ON41
z5An`EON5;bl2k@)$IBPA(Tq%+i}D?X!1I~mpwVts)jE?F4=3k4oln-DJd94-Sxtk9
zz|)CGg}x<SKAN={h|+yoYqfe~_}8M288+ECY-m9GQTIn4Fm7(g<Qjci$c6~#2(@S>
zI%-J+=Ru7f;aM+b${DK_l*#=eq6LGoCD_Z94R+E}02R>PH^S0Y8@T>`H?p>3I)z2~
zRK0$X8QX8*=Ii%x!>u>6uq1twwmTat4Qlhbx5=;E#RuQ_UT*uqdX62bW+HDYd#}BU
zsW}r|?CvKOXo8$VuDE75b0@{<>h7^GQ8$#>=}rJj*n8s*-0-1~Fjsfb*xm=R7=5j+
z%vgIlJGX6MN+Fy$dWv<|+`vsY-o!PlOE`L_mG(177@NC|kKJ-T@BQG7te!O)P>c<B
z^Xji2<k)y6x9#6dyl^VJuh~g?UXt#f0g@?mrj4N|txCy6H%IHnxb4HYaLY$;C4Qlb
z&c1}D*#%N<W46iojgt2E^f7nYdbVA*iplvopp<=elv3#@Wm>8ZQ{N40w4b4bB3FfH
zXL%rj&M9Ks&Rr~;noG~c9>$XaK`KH2Sd4WWFXzS&T+hN8Q^?QDwdZr0r#@q9j6*NK
zM!c|?f$koHcp)lCq)ozlX6I2x`Z{^x$)8XYU&F_)TSt4{QO>oeh$s3P7}eydAf4RC
zoKr9(+;rdA?-Qe)yz=`qlosdF)7?)zw~!!}0IeD5>|pk$tJtw=4JA2g3TAJ7Ki~fP
z-!QK*E|tWZhzl>qX3SnRKX|7_6Uhp~WI@?1#MzzyK!Eb^+b*5`$^ACQ7kJAz!Y80i
zzuMAZ5x4DXm&jyfGRCzzpOEg$VPG;5xd%@m5q6`!GIrmPrP0`d*%yUv%gzF32|Thp
zFPNLUwvDi&l5J?*@c>bL1-%As+nx+I!VlXvO&=mKDedv+5=y)>Lf|3+@ELS7g>pZf
z@-Q{4DN4#ji?q!qi%#xY=~oUmd*9%}C2%$$FWiV_F3W@QXJO;QA1Ea-GdRe;NY4dD
zvG!+jwdfzrY><?f4bIl)+#jhUh(6sQMEC?{O~sR!UDnoSmS52C=_XDWjZcs|;)ibA
zzQe<3TmG=QwrvcHrESM5vioY?lF${SUnY?@hU@SBHQ<Qt17?3+Xf)>RPv@UP`2g;8
z3n34Im7O&XG4qv9&b%Y!?Np4=fB8PshgKOomA+jLVhH&bKGi&p5uL*_!i$$pEB~^#
zKs$xE)9L2AWz2IAHa>VWdZA?+BhudW@T<9Q-wYu(MkAJrbhs{VoWAHUUB;oM6x5zP
z&O}ie<L#$;^0|X_b@$TM(Zn0aYne7{CPQuYR5y2$3N#lx+i7iUV?33HD;VkG&Es{H
z&nV->OHXm|WFtY4B8U|*b4D5UZ~u-L4_6WBfU|O5KmY&?Wl2OqRAdlm#?0yT)V#%0
zFCAgr?khBWq`wb#HE`r~3nlqu{OZYH(b}Ird3>nrJm<TU%qlOXv$cixrfN<#_A_mA
zoEM&aoHK0$q!N9+_0|cB%4ZVvw{fnag~WJ*&h}PX+dDXO;v~tOT)I0tNvM2g&M2ed
z$jiL=)>)F;y#7jI4;2hj)Sfy@zbZxb*YWt1FE9|kfPT2Io3^%A`bH8$QL*36OkIC_
z18u``=FXf%S8FSM0|T_Tw$j<%&rp9it!-^w?CRj{<L8(>V>Yow4`-|D7#y2OV`H6$
zM(KcL5M%m`>GU=n<>?pSVsv7Zv!~9IKc$S;ldtmpYwyr?p_7)@Hd>pTX>RYJxv`Os
zjt=V1y+d{TD6?ixrKhEinx;-BQf?2R(J`GIrK7Etw$?Vzo<2h=H=p+NH5@*AhSC|c
z$(`uq^toCFM#mX3I1DAic2e|Q=%BT|labM3+S^*_xNwoWGw;wfUdZ&xc{J6YO`lw^
zVc_C<8oOb3c_FP$O(ar~80w|5zLxsdE>earZABx@Nn`?Iq-}zlvUX!^MBC)s&CZ0$
zcxb;^n+lg2tkaQ8Q^x*CJ0U`Q4*h_$Gc%Kp%Z{|5+w^ZEv(O!Enh}0e8T#e?NXVm-
zGb#e7=##dBZ@b6H_zUYu6lUhx&}{at_$S6#?jvnsovZafre7`nnZwHYtI(GU*}MB<
z=2h^S{wjT=!%4)FG__*3MB`}jT)LE>m9OB-$Qu>m!S}T$9(c?2M!qI)*z1B{g*~8G
z;$vx+`Yd=iLEU2QvP+o_cs6f<(UB!ZZ{|$ncyWM-F;8GE^g&yla(1hXj19d}qxNjb
zDw)Gv2r6CQC6*>W8d@CVGNvkvnLDD!VCH6#tgcyock+)W2RiKE>4E7sGJ~lc_LeQn
zp25PuFJKXy_m%C}iqEKY$g4a;#$6Yy3-Vh;-o^xNY0JN}4G&L~5L(JayVm=L^|QDl
zYxe?NlWIDhGBk_TNvR`b#xc)Vg+ObiP)n9A*BMjK76<{jEuyW#8nmtNk(sS!Smc~?
z1QAsnL%0#B^xct!-cy#yHTT_J$+quKMF<e23);dTf|dx`ejC~WFNG4_T$=G)@PiN<
zR&<$kX7(*a7<!p#-<~5eZCyeZ@7Cv~a5ulnc3*4fJj|Ww8$m9?+oe8HJ-B53$PJ0}
zyS6U^>btd<aappaUA~i@G@ZpEMP%bCh-*6=8PUP7w+}A7=G7H#%Ol^iIcM93&Jr}U
z@)ouuf{yK5;3CF6wqi7~l02xa?K_?S|Ngv-QF%{^U7}vpuF_TiuzqFqoz3_7<Ye}f
zjr42<{~Mn_Xh(mjeze<5+KEf9vbXc!#7_2+h38APlc~?|f@>Dv5w&qQ`H<Ouv`u56
zmy9Pm)b}g}qO88Fm_zzx?36ym*>T!}WZbcQ*f5qEFMXc;+`tT23?@<A_hMWYU*ZjR
zWQcI10T&sgenjw=(Z9tcGC&quZ}Bw*XJixE_?8&Iixp*|56NrDM`o<_o0Iq`1^(O0
zy%9(RK;?RoXQ4Pi)8CB{?Nn)4&I%Xph!Ol*!!T35%3ZhFxv8MFS65v2TFh8}MT*M*
zm@7*1EEBj$TUz#O+Lc6ml!Y@wp~88xu@@yqIL*k)rxw>_<?ydP+!1F`k$zo&k`L{g
z2>ZzlnpxWLIE~P&1wJB}OXc!qsXZRE^_g{<QPAzx*V+v@Ji;>_zUfiQR%M~lC~G#o
zlk&AQvf0`YjcFlkqbJhDtc=38JjyM;PR73Dyu>~fTV3jgkd7QNM(xOy6?_>Qq)m7|
znx@i5o~Bdip3zq+FV}x03Djb1g*I*ac52&wS3UCk&bV(px+t%szscgC5WXPNmu$nx
z)ZSB(4bmAz8J(7wvJf88x2UhW6iqG&WtMT}{BAvaQ<3}G{AL&zl<-TgE#vzYW)G~X
z<Lomc17&nZd1ay!w|i=Fi4aJuyM%vLk-JtA-Mn(fKC=iuo^OoKN=X_`ArVJ>Kb*za
z+q#B7-=39V*7<vhlCtG8XcFNpQ;xEn&O*Dg^^6PDks{GfoIUzcMVk&)xLj(JH*NVe
z)to2weVqu4q>+qxD5>0pOU1^yx-MLP;<2DTKy1b%c=x0(MPz7}F-0ll9!?F;Cu*^*
z8ZNWO%)S=8U(FDzby&tY%rNuQ*bJEPyD}hKCbe^bkv1(@S_B*07Ip;U_Wp>v;-o&;
zXPVA5UA~8L`v#N)K)ZV8n%Pqzg=A>AiYS!Q;)DDy>pB`tIU^D!k~WiMS-!%uHpqo-
zdwq&_4I5T*O&O&{zdnZA)^*pFrPC>j#n}s(5d{u$)1eH!L`cZq&vN87Q&;jXi#po;
z_Q?1+!xi*eUGXu`)K{Y`j%Nw(CF^+oMC!UI<~cKok=6OIoWarQro&k3m&WhkIwpf5
zj(#b-^z2u}k(eN;4X>eI8_YDGUN{ppUf1z!`7mXS$%g&dGE#)v=_rH&-r_3+rXBzC
zqi}s7?99r*jB+Egv-V)+;j5-W){J-!t7h3x%YG0xh>@|w+4Y56dX3n!;n&IEPYgU8
zw{kVXS9|=JAf==rtgm!B;UzPcrgZ;4{f2+1=b8RNiMryxp<mdqFGa(Tk6(H);`;-O
zjZ6-uohdv!v9lV&54iizmb^VzR^PPqKe&X_$;5-f$x!fR?a}Ih3+C20J2Mko7mW>x
z*r8=RMwULEmOtDzFDaL0Z5_jdoN^(;i}Err4OtU0w~Cf6!FAW{5;0(oYEf2yM8c<J
zZsGTl){p2!jngr?=hMYEXAj0#*#%uW<F15Xb~&lge4$?wIoTd!RZipj4C{GyJ(>}1
zOMfQuboikaIe7aN46}l2l`g0VM>{6?C?kx}X@qHIbh^+Qr)fR}!5LFJiP1uc?qE+H
z8mCbX47vgo?apKg>o@@_Th^wVl9&(_XFJ*hRSML4aZ}sQS_&hSN)_n`Ayj{@+we-n
zIl1a`+Bid2_G@UA(LPXq-^O6Y=*doHyC`FMGyOPv1)kQ(oY6@+Su+yOOU`>s$7OWJ
zo6Hn)wABpef@Zrv0Z?KY*>|bVusl2vI*qf1XfTM%&G3+B$;rdX-RL;Bj2|$Rv3uNP
zeHd_*#LO)T$UPa7r?l+|OeJ*I`W-&t+mv)xQkU$kaA=!`H9Lp{=a7veI0s-Wc)A&u
zk<0jo4}?sV))pV@ZyYxW@Z1ff&_ZU;4Hy87&ft7j=-Z6q+B2)P&M0H$AG&g@uiDWs
zXCd5oiBH5Hm7~qtMAZI;4md7_&+=UjLMRe6r-Q=xdP)-}22vJs6PP(JL`Ct^U|*pq
z2sD0C?hR1VHmz+)j3PWEunBcTi@K&moBSywJC~Rky~ALroqR$djiLxSg}iDpb0bXO
zA{y$Q=Q}KiW{F?uw)9IDGuOAJ-(n}kD-s1`U-s!^<J-gu|K8C(#_kL@;djrUdcG)}
z#~1p8pe^j(VrwTmB6i^7gYUPcK3i-<Oie8H+6x;oho1ZX6sx0_`K+<V(Ud7m$CIU5
zHf>{bh)=}MrLVKT0KXqErle~)`Isw~KSQWd4VyW+WKKbjH!Y0wg^n+$gw}l+y;t(8
ztbs5Xzd`G?aK@YAkZm6k=j7DdFJq_{S*is?38RS!D~*c;5!a@c3gR26sb_cyFN8|#
zmdAY3eWjBdaHF(E_Xc?EqWtR^)-&s5-B;q1i-)n4v(Rp7M7EBhU5O@)A2dPED6aU8
z(r(kLH6*8$Moo>Di(>+P{g5iIEE$pZ#Apb)uOSnkjO8zbsTSV_-Y&Rg1J_|DUAhwY
ztv)Gmd$}B6G7>mW718$~T5v>gmx-Q|m_<ial5Z<-nXG0)Zmw%_-P(jH7WxG@qatWl
zUP(uT?{pk3VS9d%2{pyjGgnzUfA3>mhBCQ=$DG>}Bz*@KGwIhn9h3tylc*y5SEvl$
zd>wHzld^QSZv&poUNYM-7%5R#l8nPi3!0?7WnIr7TJ@1S?E4TunG{Nr;WHe6*=KEB
zJ%v+SPtTO3BU9d<eR{Gs_Z?<wMR$lf{*41Nbc8<MnoJ}n5YauY@odqPGyXM!n@#p2
z{W#mq;&!zEI~pTIo?$uk<B@DkqTu+qB{FcfisY3sd9jx#IHhxK+6+td8R^GQ_9FV@
zm5Y3Nw^$gpOaGQni)l+JRyMu;L*Kn?;{TBrGxx-SA18#r{C@+Y8LO=)mn8rI002ov
JPDHLkV1m#?3UdGe

diff --git a/docs_src/source/_static/logo-dark-mode.png b/docs_src/source/_static/logo-dark-mode.png
deleted file mode 100644
index 6b005a283ba6b7299a08cda1d37ceac8f693f535..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 50546
zcmeFZc|6qp_dom^CR<rc6td+KvX$&>B_S$BWKTk5o3U?0!d0QgQe;a?B}=kzlZr~3
zk!;zC8IdLXexK>{{e16!_kZ`l_npUeJ?b(uXI`(fJkL4LbKcj#r;PPjS$4A^gjn^D
zA2mUUJqw{NUQ7(|o8SW8CHRBc^SA{LA+c?=e=&DBw+<sDg!GT<ob~@U@fw4Hw-9b^
z6ihPFr2PH)tAW27_^W}x8u+V$zZ&?ffxjB~tAW27_^W}x8u+V$zZ&@ev<3)WRjED7
ziThA?GWF;4`$eKBtJEF!_UtOSsl-&<5NW*D+8Z^*|H0(qyW1x2Jmk?~y8lnB<L~AE
zYT&O1{%YW_2L5W`uLk~KYv5}^5f&l0cW=%l#3)-bqs+tcAw2I6{NHj48SD1bW)FY<
z6da}g`JeFjy_~B5kKgD=vi+aG(dNpc|380o>#vyqQOIB2{I6&H^_%~y;%`j-R}=rg
z1&T2(Lxi+-qhkCk2J6DE$Fv{#6GBfNq@k|g;`9#3k=BCn#o=!qbG<)0+~zks8b-{m
z-~S1~#K(t{V1Vp;p7|3gV|k@kZB*sQ$I5@(Fqty(L4mn{ytVNX7f8)`d0=GK&9rt(
z^z=7B`mq06g7)VU6YXoC?|fa{89;A#c>cg@XvxfiCpz=>&pfeo0NzD>{crO)*-!rH
z+kqgMYya;L(eGa}2C-d`m#~Zp7P`Uw!*OKZ&insJAv7E~CHz`t)7!;+#qDyXoWTEn
z8lizqTI%4wckN9CuGUfhQ1AHGKVK!#UR67o>g{fF$WMN2>x~YfKVH^er@hSQ#B#{8
z_4m<y{+y_v5~rQJAts<JJJDs#rtyytA~mtYjA+UJg7sy*uz{BDQCFWoUoAC)BYQeM
z#Kk%9%xQdc=GBbaF`k@14d!->NdOHrV9Ta$`rmPIzL{`(bRuldj<y8w-yZ@i2c*d!
zLhepYcSW%pe`ol-F<Y74e|G`?`#wY@ZHG5ng?sMIAFfZpYn}VwSKu#xdua!KXyNbv
zO{M3(n?YrU${#Ni)Hvaz!4XeCP2(<|`Eo<(|4^+jO*@-H>d`+rAN_gwo;WzXo&C;N
zEF^nI?ARajPzOyfF@A#;p4`4CZvEj)XgWj$j|K{{&rX%;-m(!HQds}=r_5;{+RL7A
zb3N6e>e%zA%xQVrOB_d^5TADltq)3kb2+JO$^FMW2ukpV%=7iQ0gn}#s2E<6hbMGa
zx<dc-K_NJnq{B=3Y3^d7^?A(B1&%*HKTrr4B~CJv!d-t<?VC{)=<WNHDdDGGxATR|
z`5B2&y@%-Bq0~5=6MBq)2#7lh_MW)@;Vto``3XI(=xEzNUPRn9^HkZf$QN!Y9-}UZ
z{qb7nc{sUU@d<v0Qa43`nxk(&*lhXZ<*$c9u3Ow#Ju&@+d?lmPA1|Rcns=lM^<fwC
zr!K_A*nev^@W01^JdX@3$C+XBFU7<p7ldAERk$kl9W|z1;QrCa_|x%hxkt(1BI|;9
zqj7Dn%kF-yRf)Lmgkj!=01z!CvKugFZEw~;h9MMw?jU|-8aPq4FDcxWGwV%l9L6<c
zN9jQMn<NFJVGWfDOSfO@mkTH-Y_7)L;QskUY%QI>W7;+H_JDHKIA-a?`is~YC#mod
z9z+O|V-`T=RW3gQ_(EIg&$?e{ZF)78_U24G1`WiR^>6Yvdi|U|vj6eRs=?@3UaMBR
zAo<=Hqb&~@F@=e)OC@!S@swDr>s{~05mAF=F;HCa{K77T%xOiW-$5g+<~2UE@T+50
zydoDkvcaV`$ub2zW*6rDlwRH_F2Bpu@zRH{v8eN8?u7rx4b~(?5UAP_WqrLPxuw!v
z04=4qb9rv<6K6nnv0&!%3JJ}{IirE;x8V9j?&_d;N6x1tkMn+SRL`|47+C}e(fO!Y
z&|TIrdub^PvgkAF_B+hmd%5A8*%9NRIENyZo?DCmV$jl6nl$EcF`{FE{K_laBu|I^
z2zd(3y53l39pCjN)$X;@w!xa(l;u9U)S(Ak(1x<Dhpg3XUDYIexGMVkS>ssGTs4lr
zv3+?jLO5CkJbOz)8LfV&MIOeqU=V@tqibxJ!np8biOV9gnpClB%DF>IJSHE;JLOrC
z@|;gtHTzfVReEjR1~0c4bbe%G%58)=I5}xWyeBLB$<uq%FWiiwcpj7_KAE>psJ-z|
z-$_YugYrbh(EdlEd-@Z`vP;zWGw)<8mL>M3?bqS1)?-0sRSSOW>}b`oe2WeEV^11f
zb1BIpcrsR;AoW}ST2%EA^^;0DT2&Hhn_X3R_v-mdZOtS#y(&O@J)BPl<wxR_MJ_nz
zVl)nl&N^+6RdW6;e^)=}PHm`Wy^eCsjYdx#diB}QOz<TaBIZwR_Fo+{phu>L2E;vi
zTQSIgJj?(#rM`|Vtd~wa+Ksx)(;ebsmc4JN@Y6v|#@$W|Q%^P#*%Mya5%a!P!Et2Q
zmm7X(GtFd-#-e=~5dTns#TkvdfX%Hz2%Tcgvh!Vd1Z_`n_I6b??VfD;s7ZQ~(~BL0
zCs<Q&0)Cy{`;L_M1+UL{(k!Aj;PKc%(Dj8b#w`TZwQthvwG;H_I1JINzqv5^VlN{i
zDyf1~Za$h-Nxb=HdHZHb*p*s}Ki)H_AXr1qtyvy_3{EIHS88}ZE~djJ<&xuAhk{W>
z!b6Ub*1@RJx@2b?Gdh%Oa(-i`x|*kU|J1(=7-Z*APvkG(X!;2L2qz_GFsZ&Qu3p1k
zo}1d#>a|EB%iYVy#Kia)DcTw7(s4LLY@TdQNPJi|B{Y?tmiqXE$NN{xN;xs*&m&dd
ztMN5tl?Zyq@W(gx|D+!Xx_PLkoJv<qa#|#7Gfg!j#D7#AJWUJBXVF#lrmLybFm^*F
zBW#uuuwf{TH>DWnQ|`%@y@%gR0z#f$N?)r{OQFxUUS5zfcw!)-?-|J;CPuBl!G*Mo
zU5j>T`RVf<XC(O4x!U{XBy>%wA;bg<>SxZF)z=bWX&sy`=+m?%C3jDa=SEd`c;D8k
z?b#EpS|4u=wNB2|o16rSxoWmb;nz}^l$WFVy@pM*EYblc5i<9JQ0Cnb0tW~;O)$A+
zI-tGKgHVh<GeU#7KKo?0zJ?)N*8-gCR6vvoQ#SqC)wIcnyV}n_+9WKD_~VZ+=68=S
zlKB@rD;16I_TMhjZ0K<~vHOPkBF1(5@y117!IXnmmzeuxpVog^VMTUsU{)d#8@kNe
zTRuMfb5}^WinUx?|Ce2mE}f5GkC}TRl|rA>{K4LqGdD8MuGjxG9isM?v5l#S*!<jr
zi2aEscO*}R?-;x(f{5=U$rwBBddgdeGt-9-cUGUSV`vRYU;A#v+Z)V|kity5r9l8&
zZ%eAY(V2F_6OWt(3!%s;bqO7w;7xj>vWdFP<6<6wxsA_}*&2GvcbSmf0bxAijgcEF
z^k&+(C?mZY)AV!o;KtBFr{y)LSuKRfUshklIXC;y<aZs)vN^xHk?9rTgyhz2wBL%}
zbeBaav~bga+v+?El2fFKB1{jS@@9H4J6)~5d3davHM{>!mK8#op%S6^qc~J!qcWkg
zu)%+L-aw+^vY<@@s6NK2w*@nhm2luW9k-W@1R@gQnwbvulq2tFn9?ZYDlfgKc|4Ac
zH8f}DQ<SaUZzs*%Itzj4JQ!leTVvd+Fp_gdAWbr7Zx;LRTM3z4M|Y!vjzcz*Io~;s
zG1^%jt~*<zS!}xv$;rS0skf#BI`GApSY4JIXGbns#a^oL7m~G~tGW*O70Yp~pw+N5
zdcuO`^{L%Up{F7iREXQq+DO`WOWyUiP=vDg<vjA7+2fPn{1WJmn|c6Ysv^OCVuN-I
z(-|^j4OW{;ax34fAeT7*6oAu7cBwh`lPn`OM1GX;zkeJe3jnls<hmYjF9;J2S=6~Q
z!9$O>8PMbiX?G^_a~5#kPA2S|kk&nVo^FgYo}SR)(3rH9aTLI1P!IAzra(QF``%-T
zWRo3q$_C)j7ojSc001HN`x$7*8b}l$;zdM%nhNdfDZL!OaxRfLXM!tsJ+G-sAI4Yc
zp4XgxBmzMqHT8lmLr^9tej@qEwX>7@%-@uZ&X|GA!O7GUOo9teA6)f9h@R^(BY}ZA
zLrGTe?f91v-t!{2`YJ|dKh*++6L$7n7)qR+Q4XH8_OUGxj_f-a&GPhq#gpIn>|{rT
zjL!J%OHUK-<W(WmAOa-`tA*;<KGTQ#vYea|6#REfA&qE5?#kle^x2V}i=OwuJDv5e
zH)3+tjav0p7UYdutMM7x{S=mG*;2d+)pfHo+2he}GXs5ltvJPYn|4b$;j)$(BCOCH
zLnspkT0(`0*Jy-)_CGRwVxnG89r3=g`b8(z*ox$JJ?6~PAXwUY!n#D=U#~ZQu^~=6
z+7@G`WOR4xGO}x1C&ZK|n>=`d$sCzFkeg=cf<X*4fIsUt7QDf)$Yc47i*q^9cREQp
zC+2vvrFziLm7?INfC@WeJQFC@Fdg)fwe{N`n>Twdkxa^+2`m(UG3WGvEvR!s;BkUU
z(mKIH1v$s8v!$Fe&=2*)Al_(WDms;AtC}Is{j{$@fK+@(TV3yM$k;7SDZN!d%_HaV
zV0I0OJ=WyvRQLB;8<Q$)2BrX++?CVM0M^^54&)?M&5I)G!e}Gc@%`Wr|B<;X?NVx1
z)4}Tg*)IYbdT(z~9$R}<K78VDl>i7#{#5lepPJe<c-Y8Ja5ROjoQOr8UO(>{NK`(g
zKUF0zmRWRYCqh@rP=QH$RlEzX&}GKVrHuKr+1?#$5m?Vx3+&FPyzjuB$dZDdi#!nY
zH10rZOG~LkI-lgQU&4YToMXY}NK*8p9q8+&<<MUOx~{{%;9vxKDmVgTt5InnfLwu7
zxP|%Y>oKqJdSU;@OqfI~T9dra*EXiViMI!Zs^N9}i3&zI30=8YwyN?NS8p$7<e3m8
zA$GDAZ^D8k?<5cy0L#v8MRIg$WXyoje-g1U!RC2#&hWB65>TAy*Q(4GA8{sq;a=Yn
zzjX`P|NQMCuj!sDYUN8V{Q4*2Am04P8wDdxJv2aG<g;$U<lb!3aJe(T0}**?nCny*
zNxFOe?1-3%vEfp%8C$0*gm8_OdwShY>Df^UEM$8^X^)|V%)eIgpGyUBnYRo8b!VXr
zLEJx?KYc{R+Y=Qn$~^lJ+NKS$?)wTWb&WKBXIQUe7QAVRS#o;+&Axx&ixJ^ij>+|W
zsua1abkj+fS&q#je5)ZMq^NgqM+4L;5wT4B_EX^W91xZQR_^k)?x*Xc?2T%DG_=M7
z+1j{{c_idR0jhn~L|rvgENAJ!RK)try5=nW(}}Af2nErh>5M61=Ohg^MA%qA$&BpO
zKxO{?pA4U!z6SVnm_mL)X_8J)c3K(<^u1TJrHPV|Y>4GIfdb^bmuyZo)BBJQuAS^#
z1{QzcuPWK(mx7Ua4zgpfufh+h)Ze#4g!C~mF<*I)h_$;{d`ytCr^QwN%Jd`bCP#Z@
z%gSYwgURUkWf?S*O(s81tI(~5;N+6BDNbU}ea}H_iG~Z{j<;*rDj`aqfqTgUcAIy7
znK50eW|IY{Gh9-aV#JGHlM6ia+Y29v4Cx*ME130Z2&hK)2b$2u4G6gkTAigs3a`XZ
z<>_EL?@aAd9X<^H7sm`a*&u0>BIfp(U%D}VdO44uT(YB{YSh9_OUplN&Ty=^S{-hm
z!DV52gMv+<w91tC!{{Kz?++w2iY`HlzQ>`8f?K#W>l(9HRT@8VdY!LKCw;^m(HFki
zx9#`U+WS;|v{n=Hu<O6nJvkVQyB8r?wXaUf<kH)T>N8XIB^+tNph-In4;ZphG&ue2
zu$)S0B&pb&NQT-&=|7P#UzrgCA<#F!GHQV|XM5$I(;@N?wOc}}$d#)-=Zws4ETSgR
z&Qt$oy*kq9u4s)FNuD`_zV7y6-Wp~IXUJ@vM>lz=0@q*4e$MCrG<leIBd-W5%KNaf
z+xizD3)0R2KOdk^nqJn}(@RomC<CSJAZk7Gk8Ek?NepVGDJIi5f;yF)h|E9S<x-e3
zWkv+HM<&-pFsSzJB2yafH67B9rb$Ul;uzLAe6IH@QwDv;JOBshv4ZX|+#xK&5PF%R
zY!B)n*7~MYA0IrVc92rKnP&GHp}_O0-+0h~R3v^Z<OL|oi6%+#w~;mkbeH(eZ^#AG
zL;k<^+q>8KmBTW4#HdN1zfRO&1J6ABu@?BIk=3jDT_k!$jR$qCz5XBbtZvt2Ed3Q+
z@owFFHrx0GUKifKdyu$X7f>*sT*O|Tyj33&Y`Wmvk3`~jMMgmjp2i8`)o)(;z3+`q
zr6}AZ=s2er&+lvW3Y)?$F0nn>d<R_}_lo00@%Cu5ZVF^L>U~g#X1ylkkwQ)IksEM`
z>LR?BtbOqmQ$2;qNlQkf=?2pMTg4Lrgd|6a%l#ErdA1E2sRzmyUSHdbh-hLfnm$=8
z#e4H$Cqe?WW4wdR=#STOvaP3Rv!=_I?W?EyRHlpClH{-D>qDg1Oh|3q_p1n1IsU%=
z4wnK+7sTwtwqTZcf?RICtz<%(qKBzy+m!c>fU(f@vkvD#PAteNw{@@Gei`zl#BYsA
zni_zUunKsF%lex{j4lJF9TSORBla)F+3kZ$5@?eZp-47jp-kCv!$q9ZiAgEFxB5%`
zx#rhnR(}cTIObCt7mq6wS!mV0rbNt!p6a0QN1el^>E{wNugHi%BHat(!a9#Qocer;
z9^nr&2@r&>-R1S(YVg~AI}@k3wDt<igt?q7Q?{?kkAb@)T~yY(B_snh)mCH3@^CY0
zQEA`^|L4hwt;Z4JH1k9H>F45Nc1PQfA`}8}Xt%F2Lr^4oT7_H+_%)fFdc*K?F}CHT
zB(OPW|4}iYKbYoOb*Uijh4O=5O`I0gzB+g$G8Dwr;YXMKGHe?nV*q5LiXurqTKye2
zK+eBAIQ^_!#*2d+sn<arUmNUaH-GiDOE{#(@}7&uVITs?+=$aYo_<&m5gH(p$u&8v
z0nDkpf&wej9V*kW9SWaR2ZYA!7BtQC2aH`$H-%u#eQ(Vi`s*3C*T8_7V&z7@#sjG`
z=Bz=H5+WGt^Pc|?FqG27l_>R+y7b~=?oO-75kSjFbd!Wu^x^G<^U7`;5Jtt4_;Gz^
za|^H-E#dm-UHQq1ZW=D_6sgw*vb1^oVa80u%8V%)fS1t9bb<YJQMchVq+c43XleUY
zb5-`$D6N_N_A%vibY`P=mxmv_9O$P<)d5aiM;UpOn)rl8Xr%^1-^3!>wEgf}{k@<X
zIc4h>DV&C$P36N?cc>;VPfrlq{0a=n;>ps5L*@r;OQ&?9quLO)7^l1Gcz>~ZeR|*v
zL|GK=Qn5|amwHKli3OB<P~IfcX`h?hEv<u(wt?)oyS#e8$YYljQ_NEIBW!aM)XUSB
zemZzU*1g%+vyce3fQ?f8po~=|S4wyYsz63AWG7^dUAMVE@^VMN1@yXu1`>TdzyaVt
zY^b|J&sS>HdNCcS%!44tt5f!>5){dj2))t=2Yd_;*qD9elu4|0HPnZop&eV6w!-C#
zgFVs?1HX)vmqJ=}W}!_sjnLfHpLgh-q)scXZyCsl<ie<myKQaPMo0q!HB<k$p0ig0
zInmT(sX$#?W-11lMxJX`LsXq!=-c09TIw?|u`s@>x&;t0&_89Iv!hOwe(t9}^FW=|
zMHWQ<hju{_GHG0yK_8rwPffIlp#H`Sw(l2=gx}eySly6UD0fNuG=<RA-N-_;^6Jyn
zsQ;2iT|D%FRNy$YrIJuTt~0Z{sVuIDd_9>xZfY%g`BEsww(;@4DWAHYPtQxr0YTio
zu9Khcfh!)B%=vWH4;#k1T5TP7MU2S1`x`XkDENI%g17f=c5S~qTm%X|$`rrLg~$<N
z1etxMf@!3#)m<6MVaq$T6nBUD*-3Y3Dkd*WuK)7b`g}HI8`PUBrO%ue_Jj_gg&A*+
zy48n#OHWJ;PH!QAJrw4q@U=}V){AVRujw)}wwo*oyq}#4VjF+8yYUx)D1~miW;kfu
z6z@O?2kn|~zAy_AelSP>G?0sWkv?qO*d`eEy(+!CcApC~55;!l=dzLXmsFLXAJ!Bf
z?BE>pKG<(I_bEN^5X6aSg2`m7LK=gN&Vmd>wY{eEgEwy=Q#gd7%;ze-Rq|`yxJVp4
zGAB*5;CZm1Wc26iyxE@59vdgi^fwy>5C5o(YHiC8BSE!qO&XNb5g?7Y+^<s)0#8df
zkq|R#O<-J~t&1YN#PbV%mvV^qr3K>BtCmGZ6!_~AcG{y|6QL_K@dm1QCJmHqG-q!<
z=D26mCObH{oUA&2ukQ)8W&g(9HPn9Q&j8TnU5+3_b#`coJm%aSXjN#=*|BOM;7#C&
znu8Jo*?P<7Jg7Fryn3E+r+!(+zkuTTR2XR=goK%UeUei3F>6(2DoBPhV#{)PYN8{5
z?C@A`rLFsIAKilC`&B(d7o{O!C!Qu8IN7T394nwdh=c_nFq&pvscl5aO!(#&a_J-N
zKmZ0&Z$koWBd-UatB+e7Q#q0KCZHtpOI1?Rluq*1uQLyf+To(axOCB4TRP>G<U+=_
zY;iH-y<G-ST@Xz7^0Z+Tnl;6P2|nyl$HVEAd$=A3v*{GnJiziVk1QJszwLQc7x`s9
zx8?DUc)7}e$~ek6qzE`%PJ(yFW0$vvqPWgR>TN%loA-;V0mg}!p}h)7c?|LOb6?J0
zQvy3qUJPlA(qw-f*@;uWSkBW+;#8gLNj<ePQSAP0-TR=^xnC#J@=U=K(jJ)@?EZOc
znZ}z9wNBlVQ4GGAmwR{?S3%-HcGe_L8EHmm&q)T&OYi%2q1N3C$xayXE-k;(LCaMF
zdpmkIRc5T%i=6NZKNu<>Cb+S`Ioa|%&|@%rDJ(k8e!njAND6xK5iqN;^7{o@!osM#
zSZ-Vh#Wy`I55U-G6&h7L55s*6dldT?ZXH1(Z=Kv~dxk<QbS=^k@n~{%;68QW9sni|
z6@4&XH&L&A7!(>G7U~`Oc@7l4FcqGYz}b%}j;}Rj^S_XHa}Y`}ly?_%%*^1vX_lr8
zu4oiUKMhFs6PL$+JiANPv`Hemm|dUDc>W4YYT{DoQKb|lX|L>vKlMU8aDCEHJSZ;2
z{nxKgOSI8JsX|ZqbC;BAC!fn?LvFca$5P)of!aFVY9@lYwfQ2Q$7g>vX`NY2aiW#2
zl>p?0H^lZ@9+vZ!eKKxg=S6V3yphwb68wIie>qt@&@Az?hAdBo=i0$x3qa9Qh099#
zXpEWw$$%jLr6dphxH~`Dx(9noX%=7dOLp@_Z#sZ#raPS8&3&v-Qn@TXSflaLYCs+7
z%g9c@*SOMYX{##i7lF0>GnG9Lt=!otX4BP7wBpusw->2V%`Ag|&)eXRvUA{+g_juQ
zPM*21ZgeIt#(YCXN`Ky}njW1}0cce$#$Vo<&`WY*e%I20n^W}P;8%T9NZ;G?+>hsI
z!H|l|wXJ;MELXPq4gBcN(q7?%6l6{6r6mlvO&e2#qDi`3|8%p6AJrSfn+?U#e>2il
z@vj)aA3Gm?5KvZ`PC3-kWlr@cES1^ke%4ye)fbW8^AFMie_^*>)MmnED(gr+7cXD$
zZ%5&4)<TjOJ+h7KY*Hv(8skuB*BV1&YLm`VeqA0Cyfx-hj`K!^egx&~gpnVfv3~qE
zw&l6MqWABs^HtBrwzFzrciStRG$1QjzeUJ*FO}((SXS4{clM1{a{$k2HqdF_sAY^|
zq|>4|6eWGC{+C9@{3pAzm_6#LPYQKxuL1y7^n4Oz)Z$Tr$`KLfS$$%>QXWr;D#h&r
zlIbyJf>!@6gp}=`eyK{heE;tA`CqTYGAheoaiL5Tn*1T4(IcG~!C=FrCU5&U8vON-
zPIutGkBDrupHuDG(h?u`k2*WJ@yBBmQIm3Z(fl7rlpHRYU3*M}OXbK}F*JH{W9TGL
zS|uvk^}*%<(oTc8UV15Z!u8A*!R5d>eN^*-vKJak+sbsU=&gLzNV!1M$)+4R{!Mj%
zpehI@H%q5}ggiT`&zvb$072;asVdQKr?v*}CD&QX$c@b~bzqCu!U5DWN+U1dNZ#nJ
z-v3(hTcGjih)P9}8TGzg;Gy*7CVS<DfN6fHI$iZe*X?~CjkVw9n%>I8SEg}}^bBk$
z%_{fI<oe=}>WJ*dW)W=^#ZEhDLQd<mTiv0?9mSKStQvxKz8cZ?OxfpznIE2;+3yb!
zK``CspW9M(HtY`9^d;kikaq{RPMvH2ilT9Q3N@E)c^<&c1!MxDKqPG@AVU24h%5SH
zzPE>4Fd9<2+er=liuZ*@_SnzGyoZK;q++_JsK7%9-!I&#$tXVl51ecZiSvl{spTIU
z&$OUmU%a?8eX6u(A4XdZ=UXsdAVyd%IP~tW4`bTY_KS{4*-hYZh)sXhF*b^t<uq-Q
z;R88ppf^b<>+`eK<#wZcD;YO-D2fl3g)(QavUQyU%42<ROepoBr#Fo)pEecU;{U`Y
z#ZaOzE7O+kcB!-!260y({7woMUl*>P?7g}($Hd@E3-<s=JDX2FJ7sZ$GNEBP4xjV_
zhpE0X{dLkNpORI{SF&{C5xI9)-xX$^leH(=ym*dk1+PK6{qD(0kER<GjLw`4OE>x0
zL+!8p?EFf>Oz)0HYHvVXA;l=tscQL<8{pCI%{{C0RTHbb=aXw#>zuySt0k4}B5vH_
zIx8MFq&0<e8@4D0OXWs_@i*}m?=;*hC-hGhPEzdI7{3m-CT&%`WIxy2E33rwfM0z9
zxX33QM;Vc7n`QK9f8>vvI?q=vs$tbp>t&Nvn<H<M+oz`=V$s}0a!u^Ak}EF-TX+)5
z$*t~gTcL(ttDz{Cf8BvHMK+&c(b|WRonziNxD>4x_7ZHSX60l_S0^cj5n|gySd@|`
z#{w$MqlOzoY#KCx^hkZ|vN0mq|87{j^~T~Zwp+&n8A5s*<g5cB0xedN+{&KUE?qpz
zT|AjwGDg_W<d9g^j?2qeH<vBiz6}g9B0?a{NTw|fqtEM<tbZR@`3EznnsH5O5ND?g
zkvOhmESymF1WcG|l?m0$fBwG!YvoFiHNGv356;>~j1}gzEIfG`U#-UW<4!{6*r8hi
zyHL{9(JXvk0j9ykK!3DXGGxJT*LbO7=ShbDQD;$PE7pB+RytDOniM{*GI5bPJ8bKf
zLJgfcmWE&&O5sPYG|qW0Jz!;RUmk7~?@N1xJwme8+$`;?+CtEpVqV*A;CEVJff=pb
zO~^D@zPqX6T5>F1E$BM9B=mIyRZTyG_B9)>hGNN5!3>e4L0(9dGfb`*+d6y7mOuM#
zcnxbrLz%6eyy3aj6U62ku$yX;64JBId5iZvO?6EG(tVDhxYGh|@49xP*`K+<SS{Zj
zFUbpTK28G)bw~^%L!7m-9NPU|fHUbcbymcIU4*OB)-5N6A5=x}5RM6*^H%~FT3Jlh
zRt=uFcHi5j$JXm<UiEYS<}xqRTozdQ=hoyMRmH)VvS@Z9)242ze=*32Z$g3&odwR1
z;J>}rQR|7Ae?yUj?GnFaNo30tPYwt62r6Uzyg#-j)&~(VYhRd~ci%d>Yl1xd{3IBG
zHBQAt%+xvMNEa&XOD7oqm`>k9P8S!;#a)?yS9jFxonko?9UAb5c)DsyR}N?)MzA4u
zDs5zWS3aCN#<AY%m7ZMW9)gfuOwRUiWs3rgq~+Wv;pvH8jlF{5--VYybtfev)E?I1
zr7zG|j8g8~HW$q=|5|ZL`Ny@;_qTLAn)LIk+R24FrB5Iy?<IsTb!NndIN!*pR&gHH
z3e8h<#v-|UVmCF%BN`ZE7bZtFKJY#cVY{waYjYZiZ@le5x0p{YZkeT*j48bhHkM6X
z_vu_x!4I+_ViABR1B4`hJR5m}Vi|pgjepT_sfXQzywAGDPdLV=QJDm_pImy*aPhp*
zl!?K8#+BjPmA6%*x75?2MfB$7LknHoVPKnPWfFZzN!#bap*;N0S6^(M%#2(ioo*O0
z3E0UcTlTzZ=<LIi;{|5xDP&HN?PV*IH(2f^;}56bCqP~zfMT{z<*w@+$i=;uX{O$u
zXF~kbY<q9Ul_(Jc61C0;#U>=aV*b2Ihxm_!Ob&oyF#?TIp`$j&1q|Y-K)t30*|f9r
zt2GpZU)>yo$y_*YW3L288W-cY4^s3(%296EWtkdOu`+3FR2HN!lnL7Hb3W+G+_Ohl
z3ZHC4p8Udi0>;Yywg{HYJ}p8B{p4>Oaku_1S;e|VDH+?sE;!7#6N9#W2Dz6C>Ceer
zB>0Mn$t}{MjM>pUszLPCh7#{q`0Q$yzdkshQN3_2plM3()$4b#wZaJaPVIp@nQ3^>
zAn-nU+p6N`%LZW_Bo`-^JFfj*5!*7(+iwB+xFJVMHBA0XxYU&HdP|lEqLUl7LBu}P
zSAupYf&ndkq)7(d4LFxNPBg#^4kBQA*df%avwtkGRH9_oJmbg`e^jZ}nwQDHMobR{
zwQ9FORpH;wEl^umz^brOv)$)?xX$bj5IXRtpEL^)ztqd*>PdK^;%7N_-Z{JO)s4_R
zA84btf;QM{#+ZIO28Gd4SL5fo6hp#eXw55Wko*n~952`(0epAx)~gnLzpN!3_fX@<
zh3p{5;z@lqDm<u@_=USy*JrKWE@%rHZSis|JvS&)`?%8si<Ai*s@hIKXJv`boyL;=
z_$h|GCPG|G@nU)7x4y5?l+oS38)hJ>k0oTIn_sZ*ik0ur5C`=TNEQo>R^(VeX*5gS
zS86k>?Ap%6qYItONhX0zQ4c!m#7PuV=d~dXoG$&+-Eoys&(+PHw9|IiA4BNq+r@mR
z_9%6Bh0)l>jlE*j<TshkqzX+f<V*>&Jcr+y%{MiesogTo+_dJx_5#;@S7{^T{j;8J
zzrW)Ie8=|6!-57VBtt$O*fRUOJpNC{ResI$yKmZN2(8RGSavcYLJY@IihbeLk;;}i
zrBEkRYSNp`pD#;+sM?>kKL}f$n1G0DUALPTuO-hXl$;eMJ>BXDH6GAB<Zv;j(Q7Nb
zbky$nsqc5T3`k*pm4sv5t)PZx*1=!MtFd<w_+;cp<A5_YU{eC5%|J;%N{VY&nKj-#
zvvBm=ZK&q$w7@f1TX(ITQo)iJd53Y4hC2xbmULC(iROuLOgzHE>JYYN*OTt)FR@Xz
z%AMW90+Xp3t|6g_yx_Miu@?2#!8-jchFIMdUOlXO*=+Js=Q#!#41nRXX@)yEcly`-
zenRd1l8uac;c0B%rTb-DS~|pH+)w;I>s(QPGsEgE%NcwF#y0nQ2^Qh)m`^RUGEbdH
zg2Q<i(iSF-(-W&%W$Izv0Hm?@)joXmrLcgp$4$P&tM0+{mcT;Nm{!4U?T|R9@3BS4
zzycA|!+6(1jsp3<j7QjBFXt+2TDchlQ`zYLTX{{GrI<%p(|niFv`z7!v9E?{)zZQY
z^nT7wT)zw?y_hg_R~mg;24^p2#4fFjj<=RoT&Zvt1^!g)%}$`WmyNAWD|?0}gMJmx
zJdiC@Uf#0>2@P=`mCf<*-!)ycpMlv4_B0G7G-qm+_jhup{Y=epfL4t#*_UBF!z60?
zgtb!BR%S06n3~O598|60FARk$O~%{h>bx`}8Ux<$7TNE&4}*6I!pqjFvj=1s$u)ZO
zEsiX-zUWa2#r7(#_B-v5pg!MzRLfU=D`fELUaq6bUj&+*h5M4pEic6NnZd7xq28Nw
zVit1ZX=km`Ec;Bqq!bkEqxwVb(*{lyC^kcG3@Q}|Z$bhO*~JO&ZtO~<!<<X`5))}d
zjqF3PyAifw<6}%YlO$AG;r!0Ylw+Mdvmg3@qGfy3)tY$L1xoWEhut~;ofFrcihrwk
z{k%-x_M?KhkV)&U1F{AZwS42&b>FXUJi&Dizob>-Y5*4>nHx+8b(s}+b%*xk0e9MQ
z>`2d8w{l>siiTc@aG>Nr<y9NFfQok$7np{3A%X)CIEE7KQFH3G+;n!m6+Tc-Qa4s@
zI0Vp!VQ0B<bXs@GW>1odf$DTU@EXEv8C>YAA*x{!7jB<#oFCaKww4?4D%mmkB=qj8
zTW>t}UsLw1qjjiWbv+ksM$az2aLn(h3DO{?tI)o0SUTGMrNK?ucVJ)8Q!HH_Dl7<K
z!bzgSt1c<VwW}{$F-!U;5t1GI^}@njY}(j@nqbcqXx!YdU&PL%b#ppugO5ia1Bnve
z;CDUa$k^=2<0A(*=l>~powRMV=2)MenP8NMCQ?8qdQXj{jxCM7O<8<|9X*ROnM04f
zG-edC`g*!(Vk(@SR{Io6gUVQ>b*yFIHF9a&`kfdL4SjI>DkW@|KVGJ}zv__`bH5eo
zc7c=595?erLA7)z2e`yFp%vb!9}A~-#QEEk6NpYRD%_}Zq)IEghHe~6Dg3GYw!!2P
z>G7P2Z0D<60WnJfR!;50%t<n37N|X@0IB4mf+h7!y^Ggg*}uO@3A~N#s8SLJDc^fz
zg8x)Ld1Eq|6-REhVnMHTIeG&Hh&|?aB+phEKOG4gZ+p_RjR(%|23a_hOI7zAP$|9g
zA?&GYL4g(#l!h6KdZXxmOT3==VShiz2x<fDYB0h+)9YWDZT_BLgo(9##Sl&c6$<L$
z5d+1_A<aheyi~JhoNu`~9n3=kwYbd>Td3*^p7iuvmrpzEfTNA&oJP!JC#gpxrzbx`
zz2*oTV~Y>5y?j4vnuLiP)XyCXjAhL^NITUvoFDXT!^nFE?C2P#MQg&p>M{uo*wInj
zIJ9j&PV~o)S$}uzNslBc&A(8qoSq;?`kf1Vtn9m=9?2DoT^wDH>f>$ol(UZdl!dgr
zAG`bt%JoqrI^%}&SlaRpFI(QNUOAg$|AHPBa)SBjfy9n3bZ5nqJ9YuFktyERc{WaP
z5I5|rB@x*Y{ZCA7U-7HpbDjg0dH_XHPxu1+k5#WV32Qst8;05;nhrG=+uHTA$#Dgp
zY8Rb)X*FpmY9)t3g{&a_>VC|AK8wbti0bDzDLaG#Yt2~)XLmp1I3@_Qh(OY9p_Qy*
zWm^4BBqzy;^j)F4oFF|T^YkvK%z@XPLYd1xI&5{q7G?C?h<#_;#&1`AsXLTB|LRRe
zFe0!*$HK2&M!o!9?9}^?#$vg_s9}Xjl54^c=dp~$3xGo78b>0i(Q(84Y@8T$^K<yq
zIM%?JFI9oIces{5$@$TtRh(BEC`Tyr?0CX@O%Vf+5wK#AhwYqxt9x_B+_+(OG?FyF
z_<Iy7ctdFA@|TfOweLi)2S=uvkexYO;^JoQp@EF$w#Gj0-sRm;W17D1VMcQfDgQXR
ztk~3vn<7uM9Fw-mGyChFlG%QL17u-4ak?r~PHkIzbv<4ZA8q8rrRnhzx<|k8Z(d9f
z3mR~^3FY)y2SmuKNPv_CeCy$JFrs^xgZW7#f7Zee9_lMYmERB41Rg4E99QB7`iEc-
zeA0xrU6~9W)pieQ(NVp)wi=u-$Jyk%US>N*seMu3>|fdKN0s6rS_tk`YraDqm(~5=
zh7Ua6E9M+14Gs1apNNvf8CD`&?lC?+y%po(njO_?%Ye+2IaG-aPy;``idH%riko(n
z-WDDD9ueuTE=yve=NPb#b!t#%fQnAlZ%%@Bg%Ud7@}`ysAbG$EX&SYDUpt(UHyCa&
zy~;cKT6Kh>{g}#w7cL))VOEd_yHIvVK;tUbB*kz|wImo++Hd#WD%(+9)RhJNQcG{Z
zHoEHPAsyxi1=d1ne8aGp^YzaSvw1AmUdIbr^_f#fRt1qiyUrY2#R(SPd&>nTO&;9R
zo}AAPu_NLwNLz{`1RDWiTZalopo7K&<18d9ed~IH8$>yd+Ujy0)xvM1c-{hoo@Fz_
zn0g;CWR}_LeD+9m6^1a}v8|nTeI|d)&5xSL(o$Sf21L%1)Dbb4CRON_W!iqi##m9X
zQj(I_dAjE}2Q3PHx22B*hpoh~8ik`ZvD+H6o@yqV99y@t4PLR<H#jw~K0GUbjt;f6
zy&BVJCbVp;wmo0N5SIO3v*Z%M3o?%eWq*wHeSY^EU7bSGU4uN$J>tnC+azcVmW)J}
zM=h>)va8tp!HC+&MOZZP=im2)r_X6cg!gnA#Ek*U46DqtY@)~4Yo19vZ{@J0ks~aV
z`9E^6gIrO<Mupddn<GZlP~+<yN2k(^zdWDG<5!D2IVi`8h|YPN)OxPCOhJ9-XQR`l
z6<mndX?<YvBuX)b?Wv5XX3bkFe?C{BSO6r@7)Lv?!&Ro<!vl?tUf1Q4&Ls_VYLg6J
zA1*J&0XENw5{rgAc^P0=F#nSEZWaE^w&kL#ZD*w=!d19XVH~k)r#Af^uCGrAwaa`?
zTaNF6i3*Vi9666-%KS`ZZdC_r;ylVopyJMDX`Q*x(xe)vP1i5pAJ`jTY=wQL8|%%V
z`<^mv6bA`L$1>;L6U7i_PPATRWzv4Gpp$#1NoR6m_#g~LZXSh$YFIEon*o{f$(El0
zk4jtT`HK%s^F&b7ELHVaHCIwz1?v?}RSH?aHrdTadvDd<O?~fHhX{6Q=?O%BX0%Pe
z<0RC7MqamV+Pt5K4KJKaxeL|mfGaqInaG?~?Ea-ydBbE2EtwZ;4B|Mtsl73Mi)$(M
z|19k0zucGM1A~b2mt17#*D-Mku$_6f(dmLa7BPIMQ$xGIf6`C$X4G+C-OuU;>kh;Y
zz_+}n$Mvt1@{6lmxwKD87WMJlZTrO>K@Is_Ac4PATKiHFcyA4uoyNarH+2-vl<GS(
zo?;oiXY?4XkA_wimohrLq#x{Pd%D~&V)eO|pH&4628@E8wFNVq*xD-Q@TKwix^b0D
zjsthGEvqTEXCTL_l%2P=03y3^d-P+f^ZuaZi5qdIKKcfBaMGwtM)p_O?b3%0Rad0z
zD|3Zq24L$PI1mE2wD8kBu4oRERU}+&>Np&q$n<dWU>!?lfkIu1rJq1maS3qw<XyeX
zD}x~=GL;PXw_^z5(mdT46Hz8+LGH)bR@I@lMi*l+De$bs6*tL=lgEFiL8+c>nc(T{
z(Oe`>_?&`m=}vdBo-X>e_H5hg3-)?;z^pZ)N5_<=l0XY4AM;r3jIrpM{ZuA<s35k|
zQTBuC1$F@+yr8ZLRA3}%W6it3=8_XOIN=gMbGl2Fv-y;FoerWARMyW0bky4J7r-=A
zkDEoftH-WI+g8qEq`V2;4*(jjV6;WS3sf(Nx9hSN)iI-K70CA8p2Jh2kzf0Ch;Hj%
zlzL{x(SR!+p+?UJt9n<U+uNgjypMb8*}{*YhevD$_y-hu=#aSsI#ps~z!H?*&c1PC
zEj{ECBAf)as`HpxM97a|jpy@qO!r<)UhFAqEgr8pR2N5m$M3pEhfZx7_rB@)te9VD
z)LfGu2ZOt#M}H><o+%4Wy-Yob)<iX{?T&)3xN$+;|BM&h?3hnFCU!oEC4_OvS(>t8
z@V@E8Fa}7sb($StnVp~oz(hkctiY@_Kj$LTGoq@y2?NUY5~GI^p}+7m$~^Y@$k|GF
zs(<`pB|J#g?f_pe9nKtOOX3i!D==cgd?@)oZh_K{?47kroE_J2JLYkKB>5)aLHQ{l
zb(m$g6sF-IH>_33G14JZ&T_g;Nmc%HFDM7wx*EH8%>MVBN5x6K%J>C7Iw<FEes?A{
z9W)Y{eLg*@$bU*c3i#KUxTpArYennRnnC-M52VmzQ0RqCqN*UVx`DBswfGQc$-bOV
z&me<^(yYRj`_0BAB7->-+N-qlv>8H%C8U*FKSdbS_cB$tD(}>0L<6FEt!e{8rv1@;
zi~_BFu5AK1A#{qzDI4X|3Ta1I5y|0t7DQmDS?88Jb+x?bE6j^cAWC#m+PBUng|Y>z
zE($60+E0MyLvQSX!O7~Ng*R0%Djg>2QJxBHwOhTnD4C@HCPU}u{rNcUe44cLH5n9E
zh>I;5ANS6on4vV59I>Mx<C1O|e+{Xs@HLCFZt!A>eg~oTHpQBm8e_=4GWhVAPh$@w
zI(rd@m@HSef5WO?*sKp6bZ*xj-G&G<0GMvDIH8!F<jLL8EqQ2%c4M&K+r`f<im7FT
zFXPuiAddumJ2Ttu)-L~aQ;ggq2DvrBY=$i-yU6FTm=#m!&RGUTRHI3n2#n)*FY7*H
z62au~v&p#f*B3&S(8@v*s#Nu8)JX^dX&5}Xx5rDaIh|z9`GyC8u&9oF@u5vXf>!QU
zOt-@nEFFe`t1JxqR{iWcdUGmy&$Np8{F?`jBRlm`Z*X^$IXENW(dR8Db8j_6w9*r_
zYvd^yG|y3fIenH<n;j^43Fub!Ru8Urre2`Q699R>nZ>^}@eCU>Qm-3b|KZSgfP$>b
zF?)&=TH~5~9)1GQ^q~EbB9(XkM_$_p14QJ+AISd}ll;A+r%W1_iW8vWo1z`g!NESr
zeKdsJJLCWyh`3|sI~CFh-#F*D-Yd0fiLcEWQ05DtYFLd*e=6TA#z^n?9Ey%nRYdUf
zHrqnT?xcV56{NCuF4r*9di4!jHWWX6PLb|ri6nDotEx^bH*|C73}>&1JE-h}qvd_D
zmr&iiqb2?k)-InOJ-d^bX)Uut_sQ0rFVCR$Woh|)U}XhqM{{(rcSue}3nEgl>s<G8
z=!?&$6UREnlh0nT9PRF1It{@!AbV`0dRTzhN3jEgCSEaoct(Ep2&<hvVg1$h`|Ou?
z-B^kL#Do+0-0DYzLY36(?3Gq)!xlSY9*Me)g??fLx)As)huU7J6MQ7-i^L#W6>jQ@
z%j5WAEXIUbj~kqNa$7dnVq>CR_euZ_MYW)%>};9mS8?0#X3TqVD6pM#Lojk~EY_VW
zxS7t;+1u!1v03=@o?<!%b>0ctant45BemwyJ6!jM;r_ZPH?bz`S<Sw1pVJO^3{@~_
zgH{iRDV{5R!nbn7EJDlMRk+vUaPC@_u|(v;q6ne3zc>SyF~OZ^+dWx)3jt*#Wjo<~
zL_J5b>Ci2OqCq_E*tVB?t&eE6VI2?$g$H}dSFjWBuQ4_-U5y<VvuhTFJ;`XaV)GKM
z$T*xwP%K-wFB-t|pPKyAX9gsB!07IL%%T)LfMae|;$}3Rz0-iagXe#yenk1mt4gI<
z*64rMoNmhP#f)od%`O(6u1*F{TYs1Du2$M!RcFeZ3;2(FLd5x%`||ROB^AbKLw3X)
zc~E=)rB>F?=9jW8XgUZi!N|eZcg?7QF<N4C0@gWOxK51J9nzO9+6uf1kcOH6E}j#k
zMlB9s>D2Nd(-yU07ueHy3qfPASnl0*<`U-zA*TVH0hE;E1g#Z)E4rMwfvkq5%<iHq
z8>=%Ko)x+?)l+&XB-u+EfC)|O0@G$njoRu`1{}J|=pi^)ROT74wwGLM(Zxeo^m7s(
z!IlBdu*xumFP3zh>{2d%x~P<#O1b6UtI>b^tGyqM(sQLnY~0YT9Sw)J)eARWssVca
z)W3Py=Gv`*y$Xk)3<Y-KN)&(S(EKC=kc7V}s;Tr27q8Q%-H{ac*x(NlXhqmqMzbr}
zYHKD8;PaY-+{CeBX21kIU_jFrtqK;&sPO*Cuj)r!?U>8;QRy&a7@)Z?;}_PDAXRHV
zr_EbQg_3jBfVVR>Sf4HAMKMAxseB=13oyCY36oPQYaQIlvH4BHhB(kA`YI`*Q=(vK
zlIp%>z2!fW8jC9mrK5B9z~xVy(0vLI;rOaCx~hwM%VE^&d~bbO)8E79!xy7%xLt^N
z4ra`DqMxstT~2vy!JQYYIy7_!5tJZ6*&Z9tFW`^FGcn%UXlPK-Fcke1o@}{Sr}qeL
zUiPtYtWz-`^46vG0xWO$*AxDMhkiKA&?ECVu%%q6nD+1Uq{IQzW0#MYzdpD~+Jy!f
zp^8#vV)AfZ;v2(1U9W0teyv)$piZ2X%B|AE%>8UE^<qWZhmz<`?A*>AoFb2t2OF&b
zu*(&^&}FCohc+^qr?dBnfhYQdXXHYA*X@C2+NBrS#J17E?t;I<T)ePEG2jf@=#mr%
zopqM_LPgdY9nz}VIc%+GLWko;L>5@gE_|FL3hcWo>{>mK*zNN^{U9R3#%1OnwtwAR
zmzq(sz?!KEU7+A_jdep2b{tFq;?_Hj8=x8`)=pI#AWquS=O=rZgWr4nVfPk7iI`Y!
zQZZ@DDfTOG&|~<&kApoEIXL<rxKr6Jm_w($XSaQ+3DP5tYpj*ycWCinQ7eT?6x?IN
zK>48{8#c7-F4w&`V9iVuRjv*xsn=CNPvIu+(;=A3YG1)tl_n+-c~`gcwc!h*n|8wZ
z1J+#EJzouW8ViH}Yxhn_*$1CdW2CG5M<Toex(0N4c<6Oyh0xTIk^@QIccD)OW~v4J
z_3Tllxn>PMhud$O?}oj?qO9?N!Zt5hvw_8t$Tx+0oZ9^xt9k$*Xh69yVD(nQ0FM8W
z0z8Xh{v4f>=V5Pe+7fVCF#>RiNChAH;c(Udhz1^G^ZGg0+HH3#FU99`uLKq!zn4(^
zbE%fEACA)a<;4@h`mY&}-A82ra#d2ljH<Qev1mKany{?yZ2Qu+dgdjULSw}RWJjCU
zCAP4J;3~8D9K!A<5I7qG413ARO-Dea=x>p$lVH@`sKtIwf6@j&qeS<xi7R}`F?hV`
zyalu7=bVTu-ly4-oGQ)RRFq!|_mX4fhv~mD`N(3d8SU8D@?nq}9Q=E0FgHpjB&0BT
zPN%X_NeU=icy^$$<BaU;(+tX*dk}5uhW9?`{~G=o&5{lGW?oNUuJhE<Ui9qpyEelL
zqr*F}6~uV(*(0pcXA9aI2<$n3e&bV|u_9){oO0J{lo{QmjU;T`E4y#GcW~N^HZ9rV
zVSXRNctZ9+h5yn#c`EvMJazH%XfvWreYOPok7`9+uSl2?_SSb#G(wMHQMD~bjxK2D
z{7a>oElnd4?FK*ffh4&J4@YI1RK~l1X2^*p*0rz-IIJVXeH)VwDhX(%)WM2o$4qzV
zpSG-S3c>_g1v*;7H|ar1v~>Luw(Xb3rq9B|?qMr=B)1>*Rmsfk0R$PwANbnug}avY
zvg6r5U1QY1!4Qv2yWjWbZ%8}qPD6^yf%Vlie5{9W{zoYKkTz+;;%jr?TggKT4b8wZ
zHPb@$o;x)aCzW=C?IwG!&{9n#ZVNdc&}^Oe_u?_RLG2~itAd0fjR!W%)=*y&IF+l;
z$p&p7|6o9VK$|5UgFUlTT6~J#Ev(i`_+fgHA#b(GFWU!ufGXvKT#MG=DIe=;nGH=g
ze3|%f18U$Z?0WIjHi@%?KlMlVA-khAmudx<Vl(f0e?U`cC}7Hv2b!+>3R7?d-v__t
z#CpZN<XiCFd~N^X5+yf}1t2Dw?o&%nh53JX0d)EIO1%!8m{pLHfPw37P?jPSvxlD2
zTyd-p9PC1)pl1@rKzY$0vPc}x0d{0E2mLOI*&%)APoNHXiV5@`Xd2O`BG|Ww#}W26
zp;dDR3`;E*wgayqq{&$OZ`Y;(hYg>nWueND<WqDEgL00Hw4L0w#2OFWc6g!*@N1fL
z0rvwQu_QGHGUK?BoDiJGm+ATyJtaJk0I%S5p^NewtQ=v<u0jwp%D=tJsorDTYx|Gx
z>JpgIe-PnF7d+3CZIyCPHa1RC4jyj1_D@$31B!y@(8w|4D&FNY?uLr8SGaZya`ayv
zvRw0-O3%66z5@=Yp+yHXMYR6jfUG`64a+<euR^N=E}wyiQBJ|~TB?_CnZ^fr^qR&;
zWJ)XUF3skH?y7-NUA;h~Zwu}@)H~r#(y+MpP8n=Yy&e}L;K?x^kN+Vr4iTt*MWHPB
zUR-Z#yDyxH2KL8<;Y=L8WF@EVt-6d2jCsm5DY*0PTzSqFuLkphU_&8Z(*=C`%wOL<
zfcgj-K}jb~F(^w;%vAZjgC4yBUv!Rde(;kTmCsi>gOQ5$d)@PcDbs^UI|1Q`3e(aJ
zT4&u1h4gPvKKHeG#J|elln%#(v|j+s?at;~8Rb8YYYCshuxGHf+d!>l=Wc{0D+$rc
z4>@d5jy5a*VMwo;exFC$0KgZp>#6fJlw^XBfBQbr?TbIs!P>h-c*ph6CN+$*CzSWu
zzi35~bnHKk2B-g%NrJn=ru|gDJex3N26C~9fyuRJ75pArG${FGKfO88+w488xRZh0
z^tC!UE3xmm<!Q^B+h!3N+lu9SPOEL)zGxys#62@fym+8@Qi=mB5nPV)fNRjcdimZ7
zju1^nyIr;oj9+I$n_h1{L`YA#B9A3I{k}lqo;Py-`IA@O6%tu*8y#Q=Ly*;0XLf!X
z^k;YMcTc?oO0I$8$vMHjt@sOE<ACOXtyXADv6?OnM))yvyC#g~qmU}(M`#*5Y9C=Q
zx5)K2fHtYtCK2y`xzC03U;nnB(FLJinwV4`_1PH{p$pmA2FADSjIvhvYsLm8Ov~l5
zIw&M?K64B0`C1}NMoD63h$>-ou=kh)o)44)`-3}3HBC@h3{GtIz{wZ_^vux!*1p8j
z$LwDVYShL~JB8Yt>@iCZXJ?^>b$s9Wg8t~}DSGNCY<dv5zl1#IOCNW+y-YKFpNgDE
z#Y?|z($gxgd(gTh9Ro=r#x<$&$#e}JepnBS#9oO8_RU~lf&JIfO0a`14NuXOes|f*
z;=1aML;op&sfnLo-=NgI-OjY3)D+Bs@UT>o=~Zb;k__2qz1!C6{OXdO&qB#;#i_|(
zOqnDl65lvTIqI)x01x?QO23i2`5QnR?{Mu;UgSDCa_TFC3U&u%60n?LwdcKfh@Rvb
z43C4&K0hz8SfQHkfp+NcW}P_1>|*g5V^@s`eYkQ8bP&Xcok3+*7R>357&NMY2syO1
z27-IrEX`&s_Q4NK!@2eCi+|jKa~v#q!bHQFrJF)BQk25a3olBy`%8!|FQv}XR;kh4
zNBi&_6l?0NqnNX|`)^ItW=h)+I~RMarX^qR*FRLqG<}?oda(R=DMJiPf0vjU@F643
zjOxDy;)r6CU{}F^yo!l!pWEH_QP@F(P*@S!r@p<HEXrrw|7(cJe=lY(U~*mpOM9AN
zDIzvG#N11K_f%o`g<AMUrse$l36p={y*!D>@Sp13kK;pRF~GVw{xbQfSP5JgpY1r?
z(5BFf^-Q_}``^CYH#yJ2$u|y<DGh|X5#4OhcMJNzd%c=8hNqo~1S&pn7V&Ocvr;D~
z;WRh60C13VjfT<pCyeV|8am4GcK{v<n*Hm&8nrFsgXQZuYx8VS27AEH(-EUDW;EP?
z6{SMGk^UW?;aG|c@oh?swtxSz@B!%v^Cx&D*bp8ABHtfY^!A^TOFpZXlOGTq+tAU$
ze-?dm7A0tQSErqQxIUhhf7(hEtd-|Rd~`ZO3!cg*I6*!qiU~yUz~VsC8?}dzDXQak
zC{qfuSI8G;Z%yOegSEBAD_2<v7H&U;51u%VYU;+be02ru!-4Z8R;%L_+^?mk-91?&
zQG1o9J14d#%R+nJW2WZ(Q`f&;EKosYZqOZDn_V0a`z)Dt{YtRokwHV7J(x3G+8Ldb
zJ7BkK3MA@x(4J?zcf(!#r)Bdcr`~rd3DBVc(bx8ws4uotN-CUm?ZdrsM92r#;eV9J
zp60%UQ>?{S)!z0M?+}!OCt?5nzuJ5Aa4OgKZ~R_nnPnsM5E6w%nP<vaWGK^;44KO^
zPb&$PX_q20WGu;$%FHUEiDf7<lVX*yWFC9Z+kT$!^L_q&kK_I0cO1X&WB;++yLH{y
zbzbNAIfwhx)eBTzL${NMxP58`4%g+Wq=Sh_PxaBXx9JVTjI<7BIzK%Ki*`LKpt|WA
z)k^UxNbjuJ;3OV0ViM?y_i|)kPr0NU{CzDef=QWTID&;sGiPfw>y_4#L+_fEwu%Hk
zJfnAf1LxeU@%Vt&kjL~Bg%j$tMQfvQ&G_uuJHU7e)$Zkn*iK3$7}`egNuS8)KtcRW
zEay&?ai&Ti?%>Z^KdYvyk2H47O!UVc`~>w$6@LRPoStM7HEY!P8)MD!Z2lzF>q7%x
zfhhUQfLSHBMYbk#Fl+M`Y+enF3MnT|M}8nHW+0*5w~!~4s-|!M6<hU2M-}b@waiE5
zag&N|lZvqw4`9_bvCN&iir66Px9!8j!?!G9>v+?~126L+!rv#y5Y`6}7fR5wK=Z#i
zvJD2a`Wml4PAd005Pq?FMUKMG1c{(g!21aJ5R1y>rp`5ciJW(40*$z}a@K*~<~T3k
z(3g5u;1}gl3cpKcC!FTnZn@A-DnK24#sxs?>VPjRDB8UfoOFimR@&0K3gjrZZXp}5
z1B*d|J2DjTk(H4l&2wI4uwip9y^fQi%&i|gMxFs&0tCo1G^}tR^;g7l$DaOc@T;FV
zl1mHsG$V|aPcG;UfV}Tl7Uv6RXvQC}TObkVmYbyUquJrZZRB`lqi~lBG@dRXlU9n`
z0TYFj8n(X{6ps~Ou!K4oOsUZoCHF5+v9NsWC@0{P1Pe4Vpv$y$;ZAz%z}k_f$ci8v
z4EcITK4o=dJzZ|^-L0Q3+(|>+le4Gz`}aeQV((%C`_V%ITW(<s3yQ5Y=RuWgAG5=n
zD<d*6`y+>o<sQRrxUxfFdTiLTtfuo}%b)iz?=9X(kw(>3+{~4u0|nH}E#`d>+h=oR
z8x-VW(L<Swy!{JP&0l-H(XI^yQT~?O;8HbRCEOx=!P5Qib_=4}3mUDj-6%h#w5d0M
z$%4nx(MUmvzSLIT$C*_#$12P5w&77X3zZZZ9~1xKHmp5*VHZ#uW~7B~Qht~1`)Y@>
z0x2bz31M5gES(X=Zt^s()s7A!`vHBWEUv1b6?3gT)|5~#SnyoX3Xw`n>shFkfGZo6
z-j<D>RMd9@6+WIZ9U<nPfn_94HNq<p`^%?Bzl2D17nM>?=^}Uo^xP6M$y35uuW<WG
zulodm?##S`Zah;#8`n9@uQ`3kEEz4QWSOjqKz3kl_%VBhZ1l4CKmJg0L%z+6QmWZU
zBQp_!=z4K5_rT@>&juSub~}1MW#caFrSJ98i<eEVGCW#5fL@P=M!hUJ1mrYS3Ax*v
z@w*#JzLgjp=G#57U!u4Do^gk9hYGI&mFq=H+@FTw(5KKtTdF9#-bo*w+t1+^zI|Yy
zL<LXgU1jxSxEzJJ7ZR&}+%0-gIj921+z$RuBHjwWZ?XVWsYrJMB$13~(wVIOEqn<c
zkjn`^3UVJ%RqG<~D9~M@N!AqG`0Elcq<5crAS1&A*(NM-Xb_2ZZ^k}YPV?J9uJ@yS
zjhiM3|2{Zsky-t@p0cO&Vh{l+Q^2-*$R2+DT6(3;`fJ7dF9|m-9Lap{kl&_Py#<&u
zSMN~ugb9%Z9}|cTrJ}fUsl2727iYXX)=3uX$Hvz=Nv4u6XQ=(vs(L#{A>BrnMGq{o
z|5)ZTl~(ugR&IxLJGyQ>r1viy1I{g1Fk_ZhzLHe&J8v^Wok!qiXZD{My!LTJkTBIc
zl@2YD*#oo_>zsaq*xI9<KPI!)N}?g<IDQ(qGM>(<_TJBc+9W!eLs7p*N4QAik>^;(
zup-NvT>clreqE~1MmFjvS7wPYGy`&BR-;#6Jc&z;l2LCfB0-rCq8WrHx9@gb5w%}1
z9kheJvcg_<ndn?VDo58sXJR}KgiC$O73(VEt86074*$ULn?`yQAJ9}<H?BNVAiD?x
z`Zd3es6wen;6F<i|6H_D+~nT4b%%g1NQW+{`+BfNTi&qlfeLBdFwB`c<Jr3;qW@r)
z$S0&usZuR-vhTq`u9=*LnndC7>kKd#FZb%HO+(52oGe;7F!Oz%kfno?FE1-`l-CpG
zA4fwjcbpB*U$Z}6J-LegIB9&a5?Xall+uTaET=UwOZxqPGdr(cW?B-^w}u^(a&#UJ
zuH;dvzCnBIkuF}^zoi|lq7R2icz04E=~++|A4RfK%Ngwci<aN6#pa6slstcqM6-3$
z6hZvkq#_Mw>2cp+W*w)w6fCy34^s7ad3FX?)tdrI%HRujPMBt2yt4c&^85hbjeg<z
zdmrv0NHW?i?1;l<ne<F-5VdGc9k#CMFkri>e5Ny38*F?RLn{IsE*;vBj!N3*iG8Wu
ze5o+?tDCDU;0d`weZ{lvrc`rN+pmAoO9dU0x>CCr!I}as!}ZhM1)^KoPOH$m-C46@
z*NxLtrW-c({729;PusD~*s|EiqD?HF*=eb{7vB;;e4q+L@ILh2NNGkJ<@>2Dm$3%l
zBTzZ=0iXr^?}sDyRffi96>CK9y)_&6gp{S<7YXFO#I3&OV0OQxX0sP45KF!$1K#3(
zDXrd>NY|-?8E&Mjwtxzm!$_N3MMs8=U6=zxW}>Yn?CjYlB@a|^I}L#i3GWcIJK!Pw
zz#K9z5O^w#A01z2^~K=&Tb4ZMPjiD0vLVZO)lGa{Y6^UHq|U-{pQY14LzNwMPNfnD
zf>lHhY;Yo2Ks#umZ(W}1Nm*r(A4av~<pb&7f4C>)iQ0P2Uz$;b4fj${KbTU5e-Sc2
zP;pmrdm#7oeY6)!xf+TT7By~8+7}Y{`5!<C<zRWpg*->z9h7V2A;`ClMq9OgCU!=X
z$bOm1267{fGJ15m#}2BbOjLT3&-tRd*L_XNkUnGOi<=AcKb|~(`C!*=h~20cRc@}`
zuR54+1im=L)r4xQB-h(9Hxes(>GN8gzXI=!WEVeIY2}`P$isX1an1|<I!1KX8dK#&
zdB|JhJH`(Tkc|XhKZul?n@8(GPVHdbgG^Y@6=Gt70+r*GaDrRHh^i}9x04mE)6n->
z(@RP5jfT%cq5_3l7C*^uur^6kA^5)FY!4fTEH_;H#ZHvXv%*eO;4PmDUKnUdwn+mc
zVUFOH(4*(`sP7*A1020AeSA6q77kS!ef1*W+&DDZsNN#g&)oBN|La{f`lZh(AJy<j
zTWN?d#56=Lcw(K-Kwh7R2VzTe)tSThSJ@tKHUI@IfC4DXTY9*CE@i)MM9~`6W~10v
zCLYh+`?mdw57A;M>wJP=m*M;Vm?P4m9Y^(TGAR*4&%pRVLu1b0^dYgdNF5BeDd|#;
z6QW#&K~VlG>OmhKA)3EYQNNb9cagQ{F<+qHPz{@IoZ}GPkg0Fb4gb$nvl-Yqh2Bjk
zOrI&}IIq5$587{vmC%Y*T;~P{*5yY%{o?PZE@C!<arrMAi&jRt5SxR-IHb&~{`H<b
zVlK@Joz@b1T@k3|n47*m_P%G1LoEF5=aNLvypH7%dzG_rV}v90*@~s9M-H#G8>v4C
z{Jh10l;SB%Fntn|FP}2Cq*yvV4_}$GLhvU6f%Ur$4iP!}JoCk%8>jc}uol#13n>rl
z7f<i|*m8#NVUKv+5!_RK4Ik%<SjaKBu&H{F-pom)zX1FiT4XCq@KC~E82HI=b1W`D
z!xn>xL-im*tsys~-64I=gy|Libq6z6{z!3mm8*l>TYRQY-s^RK&9k0N3VjNxwV+Tq
zmwpyfbr?n}ier0a5bQ{Tn{}=f8V%j;m`GQTvXpJ7K#bA;?@W3+;ycN}wt+^f?g`Jj
z9Aa*#5(TM@fwLyIB<Bfw^U_xlkCwZiKXr@0o(fiko?f>cBidM}3Lk2pg*@==P1}#}
zu7>K8`j2=&tLtwYgHl_M*F#C%mdWbky^mpcHZAk}^%|(V?75_D41i;`6u?l1;Ib|{
zoEs-z*n8gAUlYtxuC<ZsGkSd7hwZ7~F8&9^*;Z~mlRoQ4i5Q8(@kemP`zGxBcZkQ|
zAH4Lu>MEm<2reYMb&dl_%-(d$%TnEky34xeE)VFNS6m6;lzg0)6LarlryP6R-or@z
zf@sc{nqWR`ODE{aJ(?5(Gs1~r4}yl-x{DuEjft0`%NvuX&#O8QRJagyqkH?&MOs?<
z(nvLhzUTGpYzQJ?w)!BjP;6UPdHiU*{BfkKzL6dGa^fVucI<xA0v?tdH$s;)WLgh7
zEk`N*<#&R@gGWPjc~$zbq`yXJj#<O7&*mWaohuMLW=V8F%th9$V|o50^=Sc7W?eU<
zZUs$~^}R*XH$!quskm9DV3y1y;3u-D!pBt0^Oqk5W_@#HR;`I(yfgB)O?M-Q7pIY9
zkfqV-bk+ayWt76a;daf_pmsdLR0Xfe^K9MKp%|LD$BU3gZnDF=XE|xVIH{)l|4Ul;
z<Fd0l9Pn%9m+tCJcZS_h6ji>X6E{8Zac<h}j{EAyaUkmYC2t_XolD>!FNRr0yB-o*
zvo}1cexoS3j|Qo;>@Vso?kmIB=D1uE5@^$NycP5cA((@vkrkU551~UGSaqAD_-0L`
z>y`qDY!zAU|GF|&ubqukG-dX-pBB!4fbcdU)`{Elfjz^(&$MuM6Z=Jg+VM$eGtbpi
zOhw*3mXln9zvMJWjig@%D|7pimS*u(M{Z#LxA{u(0|E7omAbUd;N-oUt{U-e^IJRl
z{&GyqzM80r0C36Gz~3c5dWO%LXl?IeB=!L(tQaELIk(eK*|Fj2VL*#tP@ax81V^#n
zq9LZYzvymYF?X_UR;f(exzViidtcpJO(<m7zVS?=-4;@8Suf`}Bu5?2)khr*Pb5Wl
z#>}jHf%3th<$G0BX>~{#ncMe-ABEu^pkmIlB9QvS_S$`XyZw;zASFT+2X6kFH1zpn
zid665yy@T6vcGveqe-hO^QM>lDlmzD?ynL&R{eR$hg{X&ekoY#URVT!VSL!u`Iwa2
z;<4)MHGn6fah3Aay&@S<baqhg%_(z@lJO-UjCs2q1<jN9NUxQW{b#L=mIZE^U$6fe
zB!P+Kob?-S6T+(}k4TzlQF?J)4DS%bcP7k~g5UM>9JpOY!s&yGT7G4?QxpgxgB#xo
zb$r32e0GL%ER={Ds)M9GuwD9@A#@BoK@Hy8$A370@O|;4<eX9qS3lUQ>Jc>Qzm`|w
zG9b*P8vNv2iX=qZv8z+VEvpo>Z$Kg2q@Ca1v-AqHAGw*53jB%AdT_O9xq4PiJPlXA
zgKMDA``tNEa~^Y<^g~r@gjj#+;i_8j*K<-c==_n}6-7S%7Nf<a%e{A<xa}rUu!;4F
z<CRZOo2dwPj7aHy;EXEvlnj%e+Ku-McP+nIPyX=oTA%{sHqC3FHJ!g1a*XdIfxXYR
zZw-P!3Er)tl>*IR^L1A~*bOOtoPqG}I5!LSwEwS%dk{huYAxxs4=%ED?!*Xflou+$
z&@UXvT&Bw*nyxNJmZgE1_3vRg>-(*)?U(?7b?VV2vxa7CYK^@Y<EAelxkeN+*(O&$
z{W7J%Dxo7Yf;rLiTI(Eas!4B$JQlyd_UY#8v8=q~B>cUvtm;v8{~TG-<|9vYlF>{!
zj1;xEigq0!wWq~8`FT?!jqKV~HpSq0y}x(cNOH;r8J7g(7#t7QrlQh^^LhQD-~3Ro
z*n6DU+4!eW_)ba~7_{xr9|Jg~L$c;m0P?H3lIzjiBN`>oFOm5rMG-a$(-o+ZKBeEu
zB}m0_sQ<vb0qQaC6cg=sJ0_-?-(NA(M83^%_95fyn*~mF!I+EF#hW1-{U(jBrCIJy
z9jJd-x(ufmAGKBd@iMa_z`-}p?L3m({@i!Zm{Oo88Vcim-9r(q4-FDe2P6<6w$+6c
zjJ)NF%IVJXH{TqQ>}=yX_GY24^{m7+ZP#`~`Lem%3F>pUK}xti2vX)Y`$6Fr`!gqQ
zn*O6mm*|)ZMd>NE7%IF=XE(S_NHQu=I-h*9wJjG}nHA>*gg;ffHY{ZE{f>oEQXk4=
zVW7?YRUKaUh!HV#yn;2WjrX8V^l`<#imjLr5Cr`QJ^DzuTJ<)?*d;o8!ewX~M#%1<
z$yM!`_7A4Wv%DOiZ_g6iz?t*=x)qLVGfynWm5dOLJ|jJ?%MkL94?fu2w*fH6?63m=
zOW^XCq1H&HF9>G1s3eAPdD?xx7_%Fok4t*Q5lh*t$a>@blZWGkHa>!<;S$vIJm?#<
zf~r*bj*vq5&gk}i^)lgAiO_pF`j8Dm><&q3RrOFtvdnMr$}zS8XKK2Y>Zv7DNI{QO
z0z*YOdS?a@u8&{z_<Zp3>#w)wK8_{T^F@>VKQrW)_k^ttPw!}QVTc7h9q-vHFEMp@
zZ{e7Ed%*;I?4lff@9#I)m@1lF;-^IHA5G}YoF?WILf4zP2zE>enF}r~dp4J<idFY_
zn;xs$<F>Cc-J5BXhmOL2u`q9<QFrd^Ib|Pbi}{mX@3&IVfUfe1Qa=+dUf32DQH0{m
znle9}PBh)5vRREl@QPDEQBc+ZQ0wtWxWa79!7Q&cn?67JtD{Nsp)C11i7h@2L26tX
z5z#zz9OR$vMWa`9<JKkb<-mrmscd)^7R5DQ=IArF$d<q$Ft(YlfG$}oKS9KO3=ABS
zjC$d{pJel8j!y|ATZ?OZb#tssCcA9BEST+yIUYebE`h)BDyz}nVon7oZ_sLs;{7$Z
z5vikS19#U=^Ba_I7<4)J&clW2)fjvnhy#|*qec!eftaW$fv8MB)>p&k{5K0}`fSQz
zteeKj9LJ+Tji7fp4dyaQ<;>y_ry*U$PrTus@$H`b6!VPn3_2`K;3L`4{Ca>NHxIeB
zkuZ#O33vu0<H8`nHjMm_+{EH{7T0)1&*!tVRF*p}RcUVI+0hUq4x631nx)LN)Eb~?
z=Kpb1cpv0D0lpOS%y<29bTRL#H|r<{Q9jq3ZApP$=F?=x&e0%iAJP7KF(n^o$laU<
zKvd_=@8X5DpyJOlGB$!@db)#I=LLPUNQp2uLRYu%UdX;54;UR7SH=2u0&~+s4xWW1
z9bpN~G4!_6Ka*@-npe2}R1xiW>QctwW^C{`H!+BJ=R|(X5e&W-+>;7jZNlP-dW-wl
zCYr2UYOXTpuvD{53>kn(pEV+fYk55M_wD@dt2U{eUsd~VkPbnn?mVf@JbJPJa%NK|
zjDkWmX~PYC`59OzvN$x~qq;JENCUv~o{xmPxl5|%Osorw$A@F~=H^gh&1%#-3<
zF7d)YnShSH{Os~MYv;q;=&Wsf$YL)Yx4u^l(&!M4cpEbOUQWM0r;TXNz)+;^atkYp
z@x9Dq7<cQ-HIj(zG2m^^M>N&U2jv*FmS)-~#QEaMrGv=|vT7fRgmAyc)pdXWeX7=X
zT?VqwGfpfD@c&RqyAy3XRdS&W{PuQMcW-M+mv_X@qIpHscjs|z30-X-bu;rkJ3grf
zff9b|><}eVwE;a-6=(kOE`pC|>jC{aj2k`prbZ+c*`hazTZ~`4EylPPBpP^~bf)V0
ze6yZD9iRF2X~Fa7N(jGz-Bp~T;{YB|TVhDlGiFE05s!>rqP5JJ&j0S;{y<Oe2HrpF
zE9j(g9FY}7Pc>VUn23DvTB*O?_2bT@tifxn^x>&R?wPS9wy0}8{g>Ta!>oh*eRVvJ
zO*ASM7(rGp@krM|_%||0LvT_*c`jGa!kfG}aO^y)zhTrVeQwy)8H(|i2;o{81Semd
zJZejU_=6N<7pS`30>DcWeG}Si!>7YFSr~D6vOTK*q^w$dvm*t={bPn}4PkQ+mK5F2
zPJ^Sjm#Ibeu6gNO-ZO?LTV%gF?i^80e!k0Hq>N8ZyrnKzl83pFjfw3eh%7Ih<=iDY
z8MSGV_{ghZ=0+qhlUVE4h~KNCqS#E6ZS*zO+Q*6$88U;k;U#rB&Yj?<x`s_XQ}i{z
zF-EH#KIh$~#*VlrA<ArQd-?8DFSR?xBij|Dw1_K&6`FgKhTha$cxjV^4o7EOL_Gg@
zEdc3QQhPjiU@=+GJ1l2ptv6~>_@TNOX7Ntf$(pdawDc-)_mmoAgOUzySf<@GfA=uP
zn>+TU8#QtiGLk=XsR(hKSEp_9%+rDMGML@KyNgk4X@EX&OkMogbN`2O1O3y=B^0Zy
zUw>xLO)nky9SDZ|WsqlVu`yhTe4y4G@gnr}D{+*o&Yd<qxv(<gV$Ou9PuNpwMy;@|
zG*ckGo6ybVev;_~1HNP0PUq9Gx_FfbL3!3xZqD?yHqz+Ei@_;pR-0#4mu>>MBL3?=
z^Z9JzAUZQuQ`EDUV?yZMPNAqc`EA52HHzFZB>@DBPBJ-K92;3>_yL;dljP2}T&Mkn
zUuIQzyaqu3seXa3_rn26)roF!i<d))H6n`CFLeB9L03k64?_+^m6<y9)O8&S+lRrw
z1-;N!z_ZqIP>xq?X|wGccU_*&st4_af3VY#$_VlMrMaGMD4`fkxs!WxwMpt1l+F?M
z8%i?geUnE`C$^bEvl-1vTL7}13p2o~_UbnK+Wok}oK;7clhg8kF#CI?Y4T#43GAjT
z#fe+>w&IV@vMI&L>X#}}2)28si$~L}<fVkFGez2J0`TyMWdx}PWuAKxUCgR3$}0(G
zx8k(c!Yy$c#qH}b)eoSa+XT-{pU|B;`DdN}DE03?ER&2#ew)UJqX|c+UX1vYL32m~
zUuH;1E0*&#d~K?HB&GaJ?Kgc+c^q72U2IhnC5(E@v4n$F`+PMu?4x<fk6^EZPd`RY
z=HIl$zu(l~GcSVl9fhG{#^@YC2f93>1USD*Wuz|7iN)n7!hG&JasKIZATM3Vuk^c)
zY)BVv+VD~ZA$UE>!h}MOF={_1;$4E+DieOWi-{jePXOoF9wzouznQunSbU)VHdlA^
zzI=tR?)P7515fmg7m?SMp;gA$-)5KYb)sv7x#sVJTcWbhl+vTY%C%Aji`mG-r(Rj|
z`O$=r?_eqfP}kxSxWQdaT1#=q8S;Kd-WRBri(x9L;O3g}cLX$fZW?zi`5NU>$VM8R
zrQZl}w#kxUAI2b@&mx!Xk?IN=>a}1&3}KiB@skId)rrLKu!4h{@ToHCQz;Lb@746<
zf=#8y4(It1Vuv%FR`_MVtNmueO5Pb0R#+#yB=yK$^YG%gYN{3#X$^6RjUbGxBJ2mV
zNlFsh(4osSJ8I=A;V}7^z=^f+JD1^vQT_u;I8^{Z6<G1!rzF2+xG50*z0vqX)zd^W
zywmsiXT~)l@XVBRq|QKjI&^>{b^CYswKu7MH<`!bx;T2s2G)a^(hHcQYP~eZ1QbgJ
zXSUnW(yV-DV$>|UJAo&$#4(n<YheRX6n3Mw`j&{zp)fHF-Vx<lTLrHvqdOt=d1flE
z22Qy2zOXe_9D_qXpYI6KpfI;(9jaAr=rNyN8@I^K5@H_FLCXlCCDpubaD8|+!}w*@
ze!r6l8@3T0+tUm_u4canS^DwoQmXFhebv0qTyxbbtu`vuLj{#EDzA6szN<R0U4X_k
zR24>sMpmuYGoiOyYac^=!geo#vIESvu{kwD<ST&;RV=W}h>Fj(5J64Wh>c5`GoKYA
zCk&q(k-=RQPYiemu{10?(BNtc{v4EWwYN2`(9a==eA!<XlHa7mJB|4+(AZth!4MiD
znFLv9#7c{GTNvG2{VNjrDo>AUyKNd)#<<_pV%mu0?EuSInX<l+dG);mrJU<ohQ9H2
zrB^$9TqpOj*L;nn)J!!8F2UEM{sd1niRDt+70DvOW?1YcjBm+2+BbWuJ|~m+yQ`0U
zIpiyI$iqX~<;+Imr+NDMkO~2X;IbJdmPsM`@m~T3dyq5PdjDGMAgl#5qulsgUXe-%
z!-B?n)K|~rMlLr5Xb7UVyt;cBG;jE%CH>%{UeLXQBkv_@D3Ptp{A+T-K1^@&AvUZT
zuXjU6L&P!292#r<pv%*MfN*Pa?MT9$k!~H#9uV|`fF5{HVsU7*?k%hbSP%*ocNO5)
zS(H;0T2z7{LMlw%ADrZ7fW5dXZV8hg8eg7cLu%fm&YA-1K)0x$RB#31J>H*tbTRQ8
zpKshjz#|fb3Hk;<(j(=p{U>r4mH0a@NqT9cPWEjzoukuw^cjtP(138}YJ_BX+VueG
z3cHmQ40)i@=?pY7T7g0@g*H{4hM*NylGSF4oLXD66V5TPw}9uJ@x*)Cox3AOB(E~w
z4p4mreuoy7=DzT)22}<$3k)T4gu<IiSDND7-ZBHq;MYtsUgehgbz1`QlR_`t1TNgX
z;ac~xS65kh`<-K$A3xI*y6syNg*FAiV0$O-HuD>9H`;)uHwMH$xkryPBR0*6%f8f|
z7iaIuJ*FT}IM5)1v!Xv+DG?tO<oiLNr}5z|>-L1Rib2HL{Y!1^pDtZoq;XVg5G=v)
ze+ZJ~V}zBBMZ;dW-z733SYh>;#c;=yEPO>&c25l$@#VoQ7{mwe2oa9o(VQMR`H+0l
zIXof$G+mP0Ue)P6N~08{vyvoiCum~8J63+|%U9Z@i;H>jkC&6HR+zffp**0>IAeqL
zUe!g`<{aeCX;AE%4ZI=iw$lpWLwwbhr*9M|Eb>JK8F10K2Db57zY`8O)mU`cGXZ48
z(!j+RatLR%eMUwTU@Qdgg6{}jUnN1g9|4#4uT?;V;UiMO<^*ccswcs1rtZ#BA8n7<
z(5mHViot3}PY;vG>JnNKi#cHs-^v#Ajn+P!Dy1@kYBg{&0iGGxhT8N<7)S&G9H5A0
zkSag~fh&y%v+{Ib>-P^8C$IDQ@>&~Q!OaE81yT`fuaTA1I<Dq#N&pllR6eoIVC=K&
z*dgx5oB|Z1cg$7$Dg2&9#54{egaA;k<9k7Dz^kL`<~q9KPgmeC<{aY)0D$JP)!r7n
z;~BJ|UkxA`AYB;DeB~loek50`E|)T`L55=d9BMXp56DH;E%>8BMGtc2`5ZUvXfDG-
zTGjsIDLQNSo2Fi)Lt+rK@HW$T$n6mHZ0GVBl*whOx6WYjyz~5%vW)rgH#ES>#QVbO
z&J1RKk&bWr>?v%(RA8|6edAz!1I2Pxk{^O;4m$4hJ3+!d#KAbg6vqNUNA~wm{`AJ}
zu0}wy0tRJ2NvP>OHfZpfNQ-PiH5n3l>MF^Z4y&&H56RMoTOGM3H^*j@Mz|jMy9$My
z$}BqnbDzThL_#o!9qKZXt}p%%4kBC)5)|XEg{emwb*$)N?gGe}-?e|-Q{DTPlfs?g
zcc0hkH%Pi^5~}@Sw&u7dVY-zM5)8a9xwmW0*U<TD8_Q@7)=_;f+&DD-L0mt4QIPpN
z6%q+)6vSHDM|9q_l3r`cU|`;KL4@%6>+>Uh&HSP`L&sY;DJjA^KS;LQt}d2x994HK
zVpXRFPi2;snKQ<*R||IOX_I?N2DmK?^`LeM$O{Ct0|N5BO6YJrwlGnk>qJ_ABxjb=
z(T_V4_0I}}A)H*=abu1Ka7=dlt}7abl9J)oOoaJ4v3#8*1V0b103i|X^l+;!cZBHY
z&z}O0Ms5Wu9Nm7ZqIbPIF{*qy>Ye&#xqVEBsF(7T5<q3W?co{P=yBcO$TFfIY$2<F
zcZX;?6QcpU;ezdvjW**e+Nh8wC<I1=SaqXrEY5MD@rXGt%sx@!RMz308}Xm#O`~?I
z_@aFG*y(9D(unr9HnS_bN%FC3%lMa)p*pIoL?_sqDrhgXa^}8t)5IH%GGNG_|8qg|
z=mizu6sqg$nJgF-vn5?>bsle74%~MtdCm8P+W85sWiyH?wkDzT>&)#>dd)6o?k;+r
zZ&d81ZyDr}-D7o9@qm^2Qy>-tajS*U-it~Jmq&#Hz_!eoA_L`lQ;L-EgAtbxMKYE+
zzFQdU+OYGYDWaWn$dCvJ*l7uhLhKAr)722s^jud;B0D1308rCjw8}di3J(x*z-Phl
zo<Nn%Yg?fsQnW#aWwtdp$VweB!W&XUkv=T6La#b-F!4NBI0Q5_BcUF8FvlV&WoH{*
zmO440&=2gou`cVjq`7VZM3>Pdq>5F&IONqd==VKeS+(QB%6Q>4ty=VW=uLa(%pI%s
z7s*nxwds2#x*764K#!3kK()lf7Zw?(LxE3wk3%gUl;k`G+}K`l?I|F|qzs>6zcg!9
z>e)c*b1n%v=`7Z;cPY&eZ2bLCBSJEe2YX(jqWbjpI5ep^ILm?G1b%23k<{N+lZnJ7
zL!_T~H=l^`oUI?cO0`v(`sLn>LlkvOO;&Oe%VHq1WONAYhw++3z_W;XQp$wc7~(|-
zrku23l?Am&fFx8L0WSw~$CCuPad{cTE@q6qm57wqF+li{aFnIwYw7abJ_W<XW=xrA
zxap#L3=!N1tBa_C^Hb1*IfUm@sJX5r1F!SF8~tckMpgRyF*`1xypjgKEd_8AA`h}C
z2=J5{%#0>X!~3&Q<{(7ntlmM&Zh`(qh{2lqFxWs(HqFw{&ol@HFlx|uvk1lSoPi1S
z4R6^h+0=_&>Mh_>I?>BO9Mv}|H6Ubs4p@5+f2r*inYHW&;pp1Zgy%*YFjrR+#?C+^
z9wJac)176$r8>=X&d3l`YMcb4>8EOzpXMkw#t}uo@R<ezXRM{-DePKT!3&qR=R8NH
zF3S3OG#n1{0S?lt2D6`a(3KtrW{*64-CGdMke>`oX);gv^MDU|0Od}<#Z82uK!Ir1
z^WCQk*W<!1DoNkf!43N(_(;@LUHv4KE41N18=yz497qR0v~HOe8*S?HYX)bhCfi4z
zoan@j7W`FUX+5pRp-_9!N!_h5VH0}9!7WBaM0Q{tAURlp<M_wU?2~%gA}9<cB3R8#
zjnw%a_i;W$bIy__?&QRNdx)&-cOe2*>zC6wAxZ@M_9=57!I}z_Imu7uxy-YoyNZ@(
z;_1*2UOe(xQ+oW`qHsj=-Q4~f`lFx~*TPvfi!FiHp|V7%kcZnYf@#xs!1vEwxM`R;
z@bhbj8xNhOxA`9GdgfXg%*8dQK{Vm>O~Fqm6&0T^PM=lC_GngVHEOt~RCO6)Nb9hE
zRwjTfvYTqcVbe3<u-E@}*i#fFLK+n^1RIk|YuWssUEo*IX14l$;ftW}07Fnsjc>w1
zdcrM~f)sq!f1?nb&_-2Z@JvFsPR(AMqN_>M&&p!qJ6UiAvbT2~rs>0LLt4PnHV+dk
zM97r$A%kDw)YB;+cjmP6k8I6|;F@xBU(7d8?feoDNo6MVph{W$BVn;ZrB$f`AM%0&
zY8IT#u=JO88#0pJ2X<3c_iZPE=T!Aq7-@djii4AKY32+DH@*k1DP!mkUcvu_N0g!D
z<e*M>VJb)Rz~+nc=CP(xGY9G(j7>DE67GJhNi+#&tK|T+-f;Hw9<sZVh#~|>R9zQ8
z&2c!ZVcUgO;0?)O)5tO}L8v$nu(VX1rWse^8nL<<6U0h>(Rn($tf6#W-Of;Q;eMm9
zq{zy*L<q^z#1g`A<|?Vl{an(jY^eF3aSWQuQwYz#bCvTGIfQ-$71(TTH^#O1yM*C}
zou8|tuWnVL3!Y5**i<k`AFgot)wEG93qw97`3&g#ic<=~B!*%aB~Z?s`n0YolTT*W
zb7z#Epk*VQUvD`Oz>h3FYenIA2w1jZ`4OV*m-w7|Cv+Tp!tW5y>#tSdii3ssoomC!
zMq5aIbciYnsv>tE({#sy2tT+<GB1b+$n$IPjMe=+RBP9+b}2o93J1mq-~!^sLYe08
zfZy<@1oT97UomjW^Kh22yQNg*98VER9qZMC%4rQOQ%>~}S7ISYoGped3t!68q=YC!
z_qCQ7r`WYl5+UL;Db#e*qkj<w$-<)Fdc0{wUIjs7SNdiOo!?DpNCKzH&twUz5&}1-
z4HK!mewXfrj~R@g#(~GA4_do#ZpiJEO`lFUe35NS+TitL*yLk-qT{Qy5xA@;Z?Y%+
zpo)zk;sptPb?UMn&#rPe=9rk@4tonZ5)W<s>Y3P9Y*pR-#iH?1&SWht%SbX?0ZG3D
zD*0#u&3z!mIn_%~f?&wbmEX#`x{4BAPe;Db3UDcB19hK~6dH_NTfG`UfQ4hU@wV%c
zO^0Ak`^MmjcU_5q{WX?)N-*Y52X7u0A3K*BO8UU@V98%n4A@4_7_*z^JxT(mo7Mf{
zviJ+;?ZiFz-|nThN10);c!*Id2-1`YrzXN*bCRjG`>wSZUZ$MM|D2;a&X3%ti&<)s
zWt{Ylyz>5c6jQ<Psm=Rm43%J*muwu$&V#t&Hf}Znjx;H~*C9|wGAqQebg12y91faD
z6mR+%N@gMQ0<#qy_GnfBjpyK5Wz(dco6vmOwxt+DSH?7)FW?59piaT^rV)*hj(9!K
zY;97QTIIBd@DaU$*iR{ws7%JZzI(9M)@>=i8vo<r{;ov$$V{1MC@>?qLH*25i$|m`
z+=fdv6UQ?TOG#>3d{`zgDx7ziIU(udwLCH-LrGd^vv2eCZs!qio*slY*RN-(@RFcX
zG-=T+vd@sIjO~l<IgU*r(cN#Xs>OQ)W>d6TflRqU166`j{Hx&eO9Knqqh&@%r*o;u
zPx@K4=R1sAG&UPIe+o7=3C21t-t5|f!Ql6(@Nc*BsgSO?!M`H*Dq20qHmm3jWPP!t
z#1nq;i<}3~Kp)BK+eH~KB~^2M)CL=jWL3S5zY~gj@Y%YbDc2IY#N2Nx2}U;P>syjc
zOD0G~A3MKPza!d>^^dKNdg0pw5o3MOx1<wHdDGB-!FteB3qFCQ^1YOi;cH!iR2|Ac
zrK2z9g+0&vqohd@{UVe=H~g4p<Cfu3Fitg*Sr;N_jBdnc6x}gzQ)$BrzyW^dG^#s(
z<;gQUwo^+qT9Ze*)FDbGbzS0hW>QpXEoG7%eMQ;~4wK88xk5p3MkyuFT!fP8T{g?v
zfnXRQ8&{@vO$AR6bA)_%tSM^VJyt4H*^Dn*IXk+s{U)k!wJ`Nai`rL(FLgSWaZaCy
z*>ug|HgctrNF%VX%9c7GTIM*8<%G9c%!-~bS5a0?@JyWhIKF;!Qv--J5Bi{YJ~_H|
zmrw#H5dQQVlvKQ;A8D-}rJN;O`T7Az4mJr6#b`{cF^w>@taY1jOZ`}e8Aarg#zLBI
z-|eT<+x;}V5Q*kA-2G3}zqPHG`U<-E`KP{Q<f$w><h)Ajfmemkj@SOJ;O;aj3e7mI
znzZ7l4Hu+`vahleyl0BKPURnyVH$Sq<)8FfEYQ(6ZwNR{T!`R~*jw#FfzzQK??+uX
zYMh?$XjsrveEij<4!<e@t&*vT-%V#j4{O2;2ntW#PFNl3guJh?rx^x^)dtsVG(wYp
zT)f=r_hsL6bjQXvvfemh0A2LlyvQAi`o6WxT~06pbr&tNfeEDb73Cw{{<;l&3R=>b
z14jK5yRvvxqv#!>8U*pXS|@i>m^6a=%48%C>Wf{FB0sk@t88O~vo-4(`-~)+PtKa%
zaEZ(8eKLvD(%4+j6xy&ctabUF>jOnNH$X>~CX|szPO$w=)iaUISC`dW85(cAhkpiE
zU(2$sch<C^x&p}#d?J}fwh|_(DjmQKi)Aj>RVCX@jeawK_e7)L^}%w-*znCB-DXZC
z9KB;JL+hoo0@f355tr$*dX-*+UPfF~)ZBX3*P!-Li9VOopIlZX$NchYePb+ZDAd<Z
zYEQS?&XtEenAwt1NoQ$EF_K)5^?di7mN0^nxfQ23Z@%XWZX;EPbpOt0D3OcjT6Kk0
zi>!``Nm4cL@`~#%;I;#lvd>lOo;}5fMY9tkc4JcJQ4;Yz#eDhqw%_k(be+R%ez_T4
zia4R(syouf6^?~`0*n!W_Pfz?4nd0V!UQ9ggtJz9=x}d5t#S;rE$+<}C?Qu7JoAU|
z03=Pas;+$-Joq#DM@QO7UCz4v$S*;SUn7^F^oDUQdaleihp&1n1#{yK07OSbfb;%3
zv0M&{hs~GQL?;c$xj(K;Ny>r+KTtmpnt+T|is}09ZwIAw9~BuGdJ+CwQjDPvUo>s=
zipSiH`3;xG{Kju7X)U+sBx+MHcRYCc{YK93!TE4VZzNLssetT`%W&CuU;DE0B00WG
zEK4Fp(4Scw8}LJIWm}yANW@YsPm|Si4JL;?LJ4nV*u}R>y;ivEMMYR_pFLNzemBf3
z&;&zR1yVz$tJc>BD8<yi?3waq{_uCDn4Wumn<xKn<9NJ382m2+#kY#?dr2g1NJOL(
z0lxD}#RMZ^bb~;bboa);M$LkXQ;)n@%MSFWHG96Hgdy93r$I=AE`^}tHxHyX1`F0y
zQ_DOR`dN?1v8sYwj$Hsm&2i&z2odnt`KXTI4Gt8hd-8l0z}#MltTU5g8g=%UA<8-B
zh#>Niz(<+_y8lwsdwo{VO7c_l`Jj!(TiRXee9RK~W;#?OcHQ{LL9`Biz_1T6;X9Lu
z@Y#1Lf>S`<{&K(=6E?s56w*XZxi#Ac@*`bTC<PBHxT`|nUMT6?dB%nyqhwa&dCR0I
z$7kza6O_B$%X3$!dY^Lc?t*~ClTU#IIQsTzNb@O*^fQWFfg#DfPo9)JF#Pnbu_{~l
zE!$pRVdF8w_@SA(ad9-K*_#vhsX)YT?QnXT?aI?{HOITV-KfX*q6Em@TRy=LmBSSd
zN;nPLY*9(AuwfbO<x#WMN6+z#1^!`@E)99=t9DWvqxpuC`hrLmU_FSBm5T5*&nuP3
zbYp9auPy{_%zQh^{9xKZCA=o-6Q7R|dRdyjyAqJ?e=+Ou1^Alo^Q4at*n&moop~C3
z4`7VnZ+?%wW8QO{W$yEsEc0Sj<h*U!U-^6RZUD?9b5<=RP}I)BN}b;3cK6S(3+Hyu
zGUwRk<Ub|M?^3#gLJ61oU|S9l`sz}_0N8pFfKm?zHfd#v&s`jT5=R@9_QmBQ6=-=o
zp`5=Oa&CW5&bDfqvqdUp$G(qhqPyY{`aQh#TMdch_d=+=Ayz#ajrvMIcm@|6wZz1s
zl{j<L1oqE2yM*Qiv5G*Fw{FUbKi8@8g{XLmpRUiF<{90q@Aj)Qpd)Hi54$aI5%-xT
zXscE~=dkqEaS&X@Q(-q<sA~KecLg+YNkBfCra}mO`ugMpm=6$+01fm#$&PP#dHG)p
z2e9c2>ZB)2Bx>A2B1KSIB2ix}Fd2dEhrw`EJE)Qz7^L-}h2fg(yab`4-^M(4?x*Nc
zk6(<Gnlh*yo&9*2h!zDKot{4_GjS7@#G0iaXYX+i#M1&Sa+%Htd4GY${_3z9HV|Z{
zB=y#Z2i8ijZKwU8Q(!B1;0v!<Q{WqL@h5aoA=%j7k_^Cn&~VZOYC7Nxt-DpmDA0)@
z>TlR8_r*Gi5-`#xl46gvZ#P8Y?}ccLJa^-7y1YEBw!9ru<M*C8ffBA4KGuj9Ln%V+
z$BIy4^_LxPYqz61x<+q>&*Z8zd?sPEKUO)1(x{jr^lFUr#DkgI0>+V(iCYO(ss`Z0
z-8C<({XK6a+597Bu7VetLZM{U50`kAM4rNlE$Zm9H+x_aWNwjXw7-WH1NIfrEq9|0
zjj*7Up#$vH*fqHyVbtZ}`Cf_DO{cA1N<2PGmBv@yiy6|yj7cP>3Iv#)Qoyv37o}f{
z5akI+hjSbrrJ7eQaE1O9`BK$Buc8_<y6@Mb|1&oV*^g0oA>16iC^&mjuzv1K-E$Fy
zae0T$WYz3OIq!1{9PA@F<&mj$LflulIe}qI%jbJ@DUDLhJxX7RWF5j7w`_m6l-j=6
zRQ>nrtm1~6gkz>Fv*uf4U@uqaBg#mGy@^VcKTOSn2^!D(z|ea2oX(|^PEGqElj6^8
zT*gF<d17w(DErs9)0UPBl7&svqmnDN&cyk}eQbA#JV@%B{Wt_0D4tL0N8B{{7*v$i
zZ-u(b`c8F6Z(;|<5JJ(j1%>4Xc1jAP3nkz4h#G`vop-U%Z!j0Mt6Q_A#|lUm5-Y36
z0{%jh(QmT1!dI}n^rmx;O`yyz+tkNifj9sODiS?!J9A~vH^k7{94D$u-{$5N7f-N%
z9$Msi<NS;Ak`ntd?+~FI<+JCJgxO>Or40QC4S3mRVNXs!4iQQM+Myf}LQ*9Cq@|^|
z;alxfb?9kJ`{YE<bt_dIc`jxnASeByC}7yGIMrN!p>kOjtCHJO{O?)-M`hjD97BN#
zk;D@M=RZe~TL|6={v}Gr8!ktb3Ci~ZQ6Xymg|O@G-H$;^fAL)CH&OEqAEoTuU-;4H
zn%?EZ(d+jQz4S2I4j<2TUgwzpiD8Qz2?`TP&j5^w4L$nad`jsd*sy(Yb{%<(brh|~
zM8rG}VW<~xL3MwCoj)D%$`i8oj?3lW7}p~U1zzHcF0v!`&WyF4=|ApAgfFb{Z6#o2
zuW10mY;N?b55~KXY2-0uL^UhxCUN|1zDb{8#R@)P5=k)ztw-mK;7=#o=R)%<IS5wi
z#&vcMNfcF{EsL(60r3~-7qa7i8c7zq{`9<CQj(EsUha0#!Z63uayD|kRPL)=tC1BF
zoDL3=&HYU7MrPkc>hzIKgQFv9NpI1&cp3DsroPb`N06I!MXXtrw$r2UqKQs~QkrWm
z((ekQ%I(Q&U&F)W<0Z?SlGOV1OeH`4u94YMB5%G3)ItQrfV7-5?D+<|y{*YUqm;(4
zpP@o*^F(pjvrtulf)+WjRh2>F8_yhMmp1pL$2zdYv`D|<=xv*~d(21UHKtLV-0Uvs
zYhmb3B<=|+ZZA%aEpwU?D@RmyK%h%sT@*BdFTv}JDua!}dy@q4PhmbhK`kwf@;WyX
zN!H}n7rsA~%k3Sk?){`FLou>ZhAE_ItNQz7c|%!Am=CeqPowZmJ9+wR<JZ6N#qppN
zEo*Od%Ww@mx5NY+ubyY=MmgAvnEPEwR?$KM(w+5(<5}-hGA_H)b4H9(P6aU&A3y2S
z{X0naYuX(1_HX_i$-<HJq2qq_mutqTN$~N?EQk-Y#eoo3-=8HbiVQw}+bEDuwWi0F
z73T~94&vp&JI8XWZ-)*>XXZTbOmhrgXCe649WQ;sep5|?g8E@e$w|ptjYfw!$>ds=
zIWvnGQe*Xu2hWD62A9KA3PP_gSgsQL^Gj<PNux2>n~-}XG$~P2ocYR0_(1Cb2#9JS
z(*q^K84sfh<D%Evf9E*yTVzQ&x^T5f#78l7KQ1ZJKfb!O<6D%fE@)GqzsQS}3&DOB
zC!hVkaj5<~DbDZep3mI7l#nIJ4eR70B9aK+QKNcui|Q2fs=3u=b!t~2#9CS3rwj8Z
zaWbRcf}e}^bK*yxpZwq!=rYiKR>p)0{)<f+sZg@+?UvCh%KS|@cpZWc1TXPyBAdeH
zC)iQj1!qJ;06E}j__i|i!F{&}A9CKO-rDS)35bt({Sx7kW^Tx`)lE$8xs>GJx)-yd
zQ7(hQ9zYkpID%Vh!{M+@y3=rS!eiWREomx$6{u^<Ma(oBWPB(;$AF?pT~^}r`T3>t
z;bw|?=*{@aGZx(-3JYuUvsI?PwAhG)AW39<u1ihVgd#YL8;K8P$-#dKE7RIUJFimg
zhn069?0GX&uH>~n-I;9F<=3aPY5&TlBwwb04<`!Zdb?*>gG>!$n41M|H|(dx-h;dO
z*CU(H6+>cUYmaw#{42a!NIjV~mr|Df3Vc$G#eOS<Zep6GOG-&rF<C7n@nQM@*YB#h
zu93Lf<Ku<*_SDZiw$Eg<zzb!0;FknVd+vjK_^;DtB?{V#WJhI+AujBogZ_CmF%Eor
zW6C?^`SxJ-Gk9jg!yOn3jdv=W6xLEE$FC+ooP6$EN%imjA-Qpfhs(@?9vjogi@Ehb
z>c2h>Kl+UB#Ei9ovblCor>L<K$l1T11KO6B^{M82--k9;Jt^^+_GQ6Kf&d{7x+%<d
z+gowY|Ne{M;|-LQc$gb{hhHNFyv!Vo{XUHt8-x7HQfMiAT^&c+^`8ejP*+iJBKhgG
zYeHmcNy)-7YQjf!1ETfxJu_={l9wuuR7gjoD&(IB+P7r&xpF9FrGO0xF{YdDqIrq&
zQwL9QJ=1ogLYd)T&k$~Je!tu-^3+-P*@z-E^P{WE*Y4>N{bIZ1VW@0;!Vgvb|2%{z
zMhOCJ9hNETZHq;({E-{`K^hLczL=w9W6|S8WvyX7;f8AUe;#~OSu_OKH8*->HW)=b
zMZbivpTjY0en;OxT_-Qr$b4FPDNtcVHE+yB?cYCw%*!`L!{oJ<pU^HA4eA$*(f#f>
z)`9b{L>11z9|8kPQO*;ooqfZvr&j)Ra1b1}>+rv<#$lEC;NS5@n>{#p&Pk8v=V-|G
zKhFnUAajLzaYRx`W_+{w$(v#KIy#!%qyBU42y!3+eZQzrQQ<MRSGYFOqyM>MP}PS~
z%Epc`^ooj%YW(W=T~ON}TT_)|N7n(;`#+DBCcKB>Xu8?_<^1g@$4*@M4>iGxn23Oj
z6Iy#OL*P=><&A4LwSGZ6oyLv-`#nM;D7_j8S#Q6OMIJKGY0J8}+y_Aa|2+7YsCa79
z*PYr9<O$+sn+|LKZMxR<V)^0Z^xLmRcPR<K`1*I&ssHnXWz-cpm2g-$s`cZXVq|>(
z?`1dO5W&x*2KP*vks%Sd)qg)&IaD|htA|Mww_mZ$3v&MN{UbY7|C?UuZ|u-}`LE{<
zy8r)rE}*CWujdHR^1q*ZK>GiB@Tgq;*Q1Hrj{kau_WqBM|Leb}puF&3&!6r5ha7*X
z^B-FL;hq1`;{P|2^`9gE`L{n5_(Op|6!=4dKNR>wfj<=Ze@TJy-z(#IG*Hs)`rSWE
zcjo`)+JDyiLxDdO_(Op|6!=4dKNR>wfj<=ZLxKN$3QQG$ibfEs;gh;rf4tNGd-DDH
k#UBd%p}_yADKJN&uDL(qkzWpUIwHtPJyYE>ZEW=a0qVmQf&c&j

diff --git a/docs_src/source/_static/logo-light-mode.png b/docs_src/source/_static/logo-light-mode.png
deleted file mode 100644
index c07d6848c98d3084b6df4ef4f21fd5d8fd32b2bc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 48816
zcmeFZc|4Wt_dk4Xvt&$>F|;d`C_@NahKdYPQsz{YIrF?DLzx<g2J9qMrp#p6QB;Ud
zrpyw13*ngOXWgCe=leUK=bz{Q-;>wtyiQ@SdtL8qt@j%5ZW-zyXWhcL1tG+G@`R=l
zLhMfwqIYFtfbW#fHd?^{m|ahtcSi`n75fi&kCSB>p<U>trurG5?uoZJ9Q+FL&PLuZ
zCQQnouRjR<LEsMpe-QYCz#jzuAn*r)KM4Fm;12?S5cq?@9|ZodMu7P3&EpaIyZg{H
zg%_2HA8#xCWioldCB1!c@9h4Y&eC^9Kdyw1K6%-8#ymx<yKJmv+5Z1(IQ|^&4+4J>
z_=CV71pXlK2Z8@@1hP8VH4q}3J9>T5)U@4=h+F0@_^WLG&j+oUgLcv&bS3>(WDa~=
zqMaZ9_Mfi@gP9`#`Sbt$e~@sK?Z3Z8;Cf2?-`}~TN&W9{pxvBG|F7?y{3GUn5cz}6
ze@x?#-TViMKR)#zB+&nFiQ+de@5qRYjC0t`%#FD|MsfB^4F2`5Ba!ic!GR(1^Hm!r
zgnYuqY!=NlWsj@QuRpR5P~Y4beOGg?j17$&j`01T-zO^FYQ>?lZ+FQnXRL!vIxjr4
z<eZ>IjUvbX&+k~v{C={O4D&OsFYF#thRR&)%6fwTe&!oap#16gyXAQlS`0h2g*n%$
z8kgth9ljfoeqF{!*Z=!vt_M7yZ**~1NszfMF0lFi*#G_r{_tcsJl7OABg4sQu{!de
zHp1cAPK#}p+W-C?B9kB@$%nY-$|x#UZS6GNR_pPv9~0ZL=(E3T-;Nzm+!gF<Jv#aR
z^;5zK7KvC6DUI}wwH<BWeac(qU(pSnIR(l|8|$ZB`Bfn=Hh0LpFrp^vuYc>m<M(fD
zOLKpqAOG*LXUGTvjT{yE`9<=+e<Nf45PP7(?%K=E`kG^wwg1uuNgo#VKtx8qWG^YD
zh+C)QfBDn@>&MpG*eAV5J?sW-x8wh_3s7c>p&NJv|L=zk{yvq$^^n8813%*DxI+F5
zkWB1u0_r&4YcsVB4ee3)+h)1)ujim?Lre`8#h9Izc@EFwB2`6i@A@z4GMFNpyN*UI
zbEW?~upbSFo3OsbU9hFaMxE}z?r~8J?$N{`e~TyQe|@A^8vxGkxWa?K`NFXH<Q+@V
z?*|r6|5qxoH?kE+Jj?mC*3Z+4+x_3zlYK!^lHWyR>z^FyE?!@@y4Q7N{re;nV9P`a
zR+};Tn-sE|htnMAm*bi<wf|n2AgaJU8M5x>qb)u1Bv!R)?|buq8$fmf9Le<P>wT^+
z8h0$i6U`j!#^CV(`$fKuX{W_kJ6p;TPQ%_-3=dFy<Xt)aZy{MJn2_ip&F_0ONo1C2
z{o@CR@Vx&k10lkDK$JYeta?Ln=S7c$_5b=2dWi?*uAJ;;ZnF4&(`$-?eF6Wri!<r~
zVbg8nmK%6JkX*+iTQVyBFA3HkG2xAh^8#7#hdexFl+ZL?YB;h?1Kwa!&Eu7j7=Dx+
z>^#m&J9?nWAXhzLJ=VT<N@==^tt`$)4Rst!^2Q<49N}=_knNc%?x%ngj7H2Ucz#sr
zox_MLljKy|zm($o!#2<-pK(#pva#idw2Mrqr?DIB_v$4h{8}t+z5ikyOSK(Zl$l^8
zuhkoMzNuja%Q4o6v!&?YVb1=@A+W<;7$NB{%ca+<8$o4NysDNz`cg{#^n7+Q%)8di
z71I%q=RADuGO=thIny_eh*N%t>5kf0u^<$T8MMLcZ(7?{t;*!JX-%J<jNZ7SN{7&A
z``8KodiKJ_nR(p8#ScV&%9DO+boin^4QWAT!y`3#I_O6VwVDY%F*`?AiJxBGgU}uf
zE{$6UaMgTp7ZgC!x&73_woID&i&<r%T>8iIlE2>1y0!e04THqt;j`eWI^8j3-OQ&!
z4(&|qNJJ<AV@sinku54zde>u}zYGp=JcHx1w?;4U4yU#V=o^pMiC;6%EA`F^>PMZ2
z+>N&4(38H|)b7QeRa!#T(fc0{cRcR6i+-oYt_sI=h~u}u;1F?dxBdpd$SKNo;f3sW
zNs*OH%T30f`Fn80o`v<yg$<i#b`&+L`nj+p9?Rf=XX3r5jxf6_!>yxf(XY}~ae<Bk
zLpS8BC5GY!tvC9&s&7|?XCJ*Z<+90+8b>KYM>TAj5aM0E$^;O1Poz0|&Ftjd3==+z
z>(%Pi9w?qs77dCh{RH;i()n)V#+;L809&w*&#q*Z)wT-XkEpn}!BlmeZ7H=%`Q&x6
zh7z<gm$R^aFjDz@{9wHD_sGbE_rk3jY4Kma^2<_^K1v*x<bN+Fz(y=XWck_6@i`&+
z3>u=+KwtltY`i3Rd)`fOFqbzhsXg&b`5ZRDWF*nW$A3AU5Ptml>MG@1#^v+O6^8K$
z$tSZ5WR};+A(S<x!s_r^ffo@6un!;HrpjoVDh&u2KIC?BIbpX|bzCOJ&A$CUm&e;1
zdy(NLAJLZ<Ww)g0mGH9AAi|mfGa|+bNY&JwIrZ}GZKq#_3ndF_D`Qog!<D7Wk363&
zo)%4`Cc=Gus{A}0=casQT#+up|6J3_H+$acj!*6GwiCc1r)=C!Pq65784h^^tMG>-
zA|7Z^?^ta1j{(<3<6eD(yylruyOE`^)K{l)7k!d`YxtTw*hO3Iy;JBtMK`OIX012D
zvFbgSCF{2$YvACegN(-u+D+LJ!VXr$EO+R>a9BQj;)mp-s^HMEhPsg{zjsb`UYcck
z$*G*$5VH9m3cnGlQd3ktk{{=+YTF{2b5f@@%npx;QXM!^y1#JNQsUYE`N;wuw{a+Z
zFHni*&1uB}lVg!9oMRj3S1I!I_bdJu^tPE3I0mjj=$o~^F&7k{AIsGCzQR;EnjMky
zW*!oh&`~LI9Dm$c!6AgA^?>gf6vB3-)?UQBY>TuUnEACFus+OXOdX(*{WD7LhGEA1
z>G{&O6Q#^m7A-a^ukGY_{Pq1!*uqYBL>O@2dcy-PE&Fqq#7FlbRKEw*3gt5TAzVA^
zsr&WdTMt!yGz44OAD-J*#sxapN(4urtn1p?iZ~UY>t4*JL)HfXtEk1@#nA(|F@<z7
zg~Z~IRB{E&Ur(6r|88Apw5E_0beltmGAb1v#LDjm3dxU_IUje?l^K}WD4!Gf>-Jte
z^_U3_%HKiSkEVGXgxU@{(;{o@V^k!~G&F0&jE@^RqB0fS_u$v8aF4I>o4S!jhS+pb
zK#A79r_)T%QsJp_rrm5)?zcf5-8n6U*0br$1O<_^k?AZ+=o>F9vNi`{t%^glPoHFL
zde$lC8ENiTcz$@E`SO-$R%e00>@Vx{*HYyqZTem>+HqXClR#qG3$g!uP3bfKah&vu
zKj$?!S$SH-o5-PrJam=<m}uSybk-<5Ppy4QuklcI+^U}w^wBU{V}ACAHAhN|xgt@;
zU3Ty!bEPquK;J;@FIOd`9msP`@1D#~L{0*`A{`D1v)HC&=iCr^L`aHL#%Z*(x?Vc)
z^RnwLha-wRK`D(brBB<b+#RAfCI{Gy^7i4-^hZAeGdO3x-=?}#Z2d)qY`Kmx67|7}
z6o&S+Q^Q*o9dtPFW|;k@+v}X@uIoqnyxQd(3>L+zR6jXt_PC|=+%cxAMY{u%wA*#T
zLho}SWF@froI{(@dTWRI3(unrh)l$^bE^%t5Fgre?dP2gtGv?m)MLA<%)U}CWGsF>
z3lJ0QM|1Ixssr~b;%2g!pS&m!{BA*mz+;F)1Kx(u1lXkL5pOJjC9AVk$%5~d*W-^B
zxzk@Yc6i;hOp|S|1n~6a37gsF4*k))eQLHTlGmP3jfJW*pnM^SG?(zCi^rX~k@ZpR
z;&_K<M-yCo6rENj@8lj>q*TTQlxfc70&DTz&#=nO$6a&Eb3d^%mri%4cj{|3cBaEQ
zjPa3O;*3Zzze9&*I4=#>JBK*{Ds84zh34ijb3c8`-pL5$wkE%*k@)(C@)9#AUHA5}
zPASKb3sFjo2gGsRd-_EXL2S4#(e;QRnyhIddF8Fr;}Gw=O>rD*n}U1lJw`k!_sH#b
zWS_5a&F9q*G*l0%_XqB5TrYd)xkc*hjak)C@PpbIO_1)Note{E`P$_?b5<)*;#0Uz
z=|bDyT51O^vY%IBC_em|2JuD%W@jc`Zv~VsUC$N?p}3Cc<r?o&VM$-KsTk=$5CVek
zySH(WEJBw>^R<KxdIYR#(Eg7@9;oPpf0W53uv*@RP2lOD?tpXVg_7^woLYkd{T|Uh
ztYdi5l+0a+V;-%P9}?W}dKOHU^m=*B_|*83Pp_FgCpUL~`b5wwIzGjQh<PWz=G~Xs
zjaa91)9@Y9IK&_d?6944!XbQ6F~Hz<q(|AiFSB2(#|l?H%gPkN!t7m~`Q%d$j&ZYf
zOswye_=K8Hq|%`5kt6HfJ42D-^6Hig_o8!vW~PI65b9Aj7uB;-#m)I8M0O=F&Wu;a
z?I*CPc|}K#4{>#l4YKli_BTj5^Fy3Lj~*w6YZN~)j3+sy?7TMKc(vywL=M)jL|Q`5
zJ+9VW4iK$EUAHMANBb?-LUp1Sb96M1rY<@+gwUs#^~DNty8Zp6c<h(SoItfHh~s;F
zKb*gHG~+>_zY&eZ&$^k<kaAr*y3Ex<hq9tn6pEB`X^=H?25X5qbwj08s(nnh&!>7@
zbl6z|_Cy~=bI~_~ReHu&SD!H;)XJwoYHn#RS=Z4amVAi54YtQ$xCasRK(Q>KNVL8U
z<Rs{cG$y+GU3SvZoXD8=HDT*|CnK60p=%!YrN_PhxZ;#84-G^Sf|*gTGvP3MSj%NR
ziKg1@YU;cOBJB`27s8Pj3b<OO9U#)tVrHBmAa$25u`FCGvAcJ`&oZ9&<(=;ayeg8%
z&3bQRl>I4V);FnvcKTr&9U^EMnQlYYW()WKs`J@`h_TF1Y00&}YAMx4FOM^qrf9Gj
zX<`TC)OLw7!y<-uRqoJCQW{-ffyjfL%lPROgiIslaH!-?Jdw>*fdNsUfR*mI-=eoA
zwX-gV(kf*$$56f<0;j#<mdBrk=d<bh)Wq@=_TueRG$e7vZi@2463X8*WZ}0{1BY5=
zAVQmxhq!?067x@(hH^QpM5Hi1Q>f2@oSzVC)a9|k<i4<x%HFzx5Y@NosvaWB4JfyU
z{>Bp{yq$vo9DmxERj0R>0-sl9{?F%a|M`4PQ2DJ#IhXL$ieQA)YZEj?#X|uPRlXu5
zh~3D;Q$z0j#--kmoK<*KBV^#|@BVpu_TqX|A(uzvELuBrng)?%8p?855SdG0;e^XQ
zxZSyv5afhB$~0BYzvK6l94F-hQW9vwcSrYgl%@fz#c*nFs?NHn9&BoM(wTH8Fr(Hq
z?hq>4KO1!N6Cq-f4-wh03t)#MWK5UG81#s9zJ}E47pPGCTLs%(5ua5HEoS1w-FRxq
z;y%>+w@=#$H)>jUzR};M%ZggX0l=jU@qi%Ky@D<;-W>?2R-SUYLf;QcF#T5IK(k!k
zqe1VonUX0xTt|F7$yW<JhRr-i50`XnUd}He0bE}VxOJM<IO$8GPLH@q09!zd1FuDm
z#p^d_U*;#_A|{QP(t^*Lg#1dSH;sz<Ht8-6XJPRVM?PmvXp|PxG2`Y0C=Vt%s|7Rj
zCzlu<TN3bTns>eRO4_e4W7mp4kP)OG9GaUi)95VJ7?BnBTx#SlB^1Z;*K@m%N0|Rd
zi(7U0TpQyt6CI{h+S!vL(Uw(XCUaIISd}R&-Ebz{pnSQ+O1`n+K{j2pB(R6a2TM^D
zBpODPvMqw;eI<;WrjD448kDB)Z8x=G9&)!<6wi>ZFluwC*uI_)*G8C})xErAXcCg!
zqn7>l2IHjrE;!9><%bG{1_HeaQ?Venl?Ma1VTkMMmOU4k&Rbz}RK~zc>)wYiT}WQM
zD|V5`S(gU7^C@QGF`=n4nd9e7XiR?EeL06v+fmEynv8?5CQe@FKF^HMiU=s>oiV*E
z=XmXc874}(K|DHlDzDX7_;SVezEy}{Sqby+;>vM6OX7hJs~C5LMw`)p!7ZIKp<(#@
zH^7*)F9ohH>vpvkm2lNk2rB>3#<(o~7jFp=(WaES)gfw1T&MMbg2HRr^=qpFnlva{
z)(cZc>{xlUyu8jQI2t1yEnGgV#mYs^P6dCn<%ZJpvam0OMs0R}c4e6FW+m)e>1i5T
z$!IRz%_vxO5UK*{uC)&vVyNH$`0mTrWG~1W86h>`?ckO!lJT&>gvevW;U?)9_<42T
z*Bt8JcDON#ONGmrpoDnj6y?ngM!+E0aOE-@j9H|OuE*U+C>c&=eZ;#o)ojqT^W>6%
zed|9)15!S?y}^_rRg|Z5tsOrd;;bT$V}V?{5^`z8;3=CzKN#ap8@&&-BdlV|-0o>n
za8sHZ?&RJ<XMg&(8Lx$!$;02)(*(MlITCj!ye6r1zOUbgneB+Z6(pM0)(d-Z)<Ii4
zmJq7gb(4+Ek<rs)ReS56g9PAf7wfE*_Z8C2$RjVn!VT|o%kykBR_0iBE9uTZD|!WC
zn+OKXDRQ&1>)s5<KUz@xA!xbIwm!o6ov@4G0z9s@X!)YagL=yw$_T^zwq53)kikuB
zoo9Z8rOqHAl1;kehYCsofgQjItvPRcTJB%VzT(~A@T;=)DL2(p$^85K9IEHTezi9o
zj&PcvLmY)_P3<e5XK1dBhUYI_227TkyAiqU9v5-m_J8EyOH=cHdGt3N1d}7cVVZn}
zM=E(VA-?>4q%85)1@dhZBbuVUdw>(rMjd=S>88uj9t_bpfG8CZJ$LTA*Q&3{7Hezx
zd@Hx+z;7t(DrFq={m#ad#=Y3Oxi~pnz-6XKkDTKWw63fcF7q{_K}0X?Bw^wKzZPaa
zCn5PPUl$luC`~(e#qap<2$A(x#!+eOY+>#<%@5{t2`hexn9@*jekTea+TpR}FAyJ(
zLnJ+5CRxa@1WKoUjbhF|`MkP>4`7?#19C<y;|}%R{#8YfkYn@P%L;H9BO{tC7el>%
z`%l1;ez{;8-p~!9quUU&06Qa{2RrMzQxwZp+S5{KgaSV0P!k@8P(vV0l1FE?@ErOR
zrY1B(7)f>vAf!r3V<m1}#uGR5JNXgmG^WbONy;nb<i*$StBT~(vh7uezN$+o1C^nY
zCz9cE&1pN4@Z4SMFb-+&x=Bw;jo`#C#y$>$q@LUA@=q)W><wa@h5&Kyl9J(N9{G2`
zA^e}WC6M*XlM9?%o~8W5(5QNM*gTGyvg$MF)UgSf%A6V=k+y%;E2t)(k9*wm;{2^h
z%Hl@N^PXQrfB$;BRlfW06v)G(s*Bma+8Jil;qKiArTEM0CyD#1dRs)!2sx+~HR8m8
z;)FLxUB&hhTqcA=;~Lb&vqfl;tQM0XaSzk2oR?1)<;ew~+><w#bq&6{2d!kPJm?OZ
zqO8BMqM-x_)z29WjOa1|?>tep*T`PoKb2Aap%82*`@s7y{+y=wE6zPyJC|%~2t7xk
z03YyvQh&PM2(6#cL8+=@^{P;OK$sw1b5^rDdn@ujzu&{<ou9pfSNINX8X64scJ@4&
z+A!M44AvcZ!P*C<&`f$u&ZuH@gf90mD?-*$%Qn$fUtZpvzas=(u(dA_I^KbukxS3F
zB8w032qixggt{RD5Q=+)swGPQ{+l}MHxSf&ah~3Vu5B~txwYSbk!E@w-YbOac{DCU
zcj6caFpO!ibCPXuvq_81<g-pMRy7-oawn|j^Z6hr>h%e=9v$bOmQ$pIeCQDnIl&WT
zzvOcMKC;?k_h_RnZJDNDiQ~IeQts$_xscT9&;I;dBzzWhG}o>(fnntrboR7164m_K
z90XN@OV00H3Qzwha!pSs9ocyfCp(ZxYc-28{3K?h5+PQvxcq(;Qe?u&zT1C=jQOrO
z7PLH<yy~~9$ekWbak4uK7Lz?87pa<daf6=9STWXuLq1J%qCw)cgu;p8kUZTXB!q}p
zn4Z!a(gOiF@1=g=Z?Q?)m=<?jdDX4_)Faib#|Wy8%r`BGlPRzJ=FFEvH~<DGeO_DV
z5<j-_ohnM<k6V|69zV6fa7b#Fs}18qF*-VEAm|EK-nSWmy)-lX=6sr@tgTLjQSt&8
zpN_oYp9su#Qo^n+FL$_H$vo|*#&wAM38SGhqgP%DbV&Op)G{374YPx$b$@yMGB?4$
zfQwwJ*u;%UoZwxKSGMQ~)iK|3a9b_6lQIalw{>aApj2vVR<oqP!B-uC`0JqgWUi-2
z&ES9U&PIy2wAsX$oLY6$Gs?2Z<1|C{E=hHs^&<$)mTSK|cp2H>UiX<kteDMCgU(=j
z>z#Ug;zq7#*?PbPWq+8~FPlF4zpQ#R*>234q(dAO+-20@KIfpsOD=>QZvWLbGqsfF
z(h4Xk?IksiK)FX2&E;HJL9(La>IvpbUl<%h!K|0SJ*}OLBOAYeR??@T)~Q%QN(=SN
z!LROnh@V|3fj?dtbcHqF$SKpG(klrL@)|PP3l{DBt>x6-dt9WPtymFAzBqiz_A*hv
zEA?FJ^+gt2sDFii?D0S*!wa)#6UyjYql~uBm_N@?YF6m&yOUD<bWgf?WUke~#<y?f
zoR}eZWtBeVanF$&as7Z&OF6rzYM_x^|HX%e#C($Z<!r!WjcTfViU<fM-Y#brB%TEj
z&W=>ZC6g|8wiu@bl}|p2NM1ah!T(qA`$;d4z~jkS^fJ(Sy?nSZDl!>J9<h9)-Dql2
z^&*Zwfzs0Yw75VoouTr&M_+1GOD5|=4S=i<AZrOq)wsCmy?lMUk305Rjh(o#lv{EL
zr6GU1o+d+j#ev+xP^)CY&SQ1w%yf1<2NseZ9R6MrSDsrEEq}}(OMrL}({B9*Y8$IA
zyZ?;G(o{>Zk^NUHa?YYaL455bMNm~^lc7aLH@kL{+ODk6SeZZL&J_Zc1Nr*jL^b!B
z*WTh#yN3tyBVxpoGcQSBJlmy>I`=WvcrPO~C(oS16g@R`#405F*^@;Ys-TK;2<6O3
zYR{MX!}BMMt)9kpEiCU#!Kg<>tlwi{!gdNGOi_#!HVrcMoN!#*YG}84D)7_Z;Mf`P
z^?}`xLS(Zgo-p?5xJXv=GCmT&kotJPkA8DT&lhj+(r1NL+#->djfI@mMc~AviHxhh
z&97E|F!1-gp9#rOJVJ-aOFIbzr=0W$*2dfLU~GhBh{H+ELtKi$IuE#p7G~>H6ITb9
z<@W1*y))+JVKe7vp6M;!qssI9+`Zan2M=ZbG&Z848bN+TY^Ruvo<pPW1F{2V^Ne}?
z?Quu~4@&N5;}PlJUe&K{9-sEn4sXyBWfwlm{OsJwk@*9TtEZT-^2NO|biJ~JcO^ca
z_;$6VAIsjlZmR4-^FBr)KjS%$O{rs1eg$Y|`p%d<g=gDl`$Ptje^O;{UTKQ3%E38p
z-YUM6996d|Hk9*VIag%6uhf4R!ysgs>hYV!QlH1oIZb(JuAy?WaDI!wZ}(uUAne0H
z;xKI+q5Ff$rgVGxdgWWYcR7@3P2Q?4|0Ljv7G`QJoNO_W@{T^Fyf@|Ni;An%?a=mp
z<?_q|%eUoiQ+JRT?s1(FS6kW~UZz2Vf?zpoAGouN-5dqM-mPe;%}4p2RqT9fsb|Z!
zq`7Pe{9}WoGTlnq^K?`*qmT^jlDo`JXoz~>6wFc&%??DohCB9#`&fD)ZoSGS&z$od
ztj3X51tdi%*HRyE(;V2?xeu~AgpN8Fm0Qf*^%j@Jg)UsJ@&z$ho``<$B-ZFpt%am?
z-KWV<&ZcXR4XmlH2*>2AVd-aLAHC!G27Od@i}HQGW7eVTD5DFP_CTAbUvQzt>cM{`
zOR>)t?hy7)Oo=Dn(o63@j`HW;{LHc13~%=qNso2gi3aRqUg^JIEmVfy2>Lk#%rq#O
zfgYb+_wzEgk5NjEkX2!qZ+xrpWbrP0;=Zqi!^s6$?$G{r&*PGH=vPSzjMAdD$wV$k
ziJIBY)TVP;Z8G)UyS%qJxKw>9O%wdxe@w3G{59aToUA5)@TtkMJ7`?j#_5^%fQn((
zlE7ovtq!LVYKM3>sA1e?Vtq|T>qPzvMcrqIkhEa_>7xd(wrS>(CJod*CWR1*ZTXe$
zSa7|Id}`rXuT=dvkF!Tzx~2O{aNW>8KwCv?W*Wwv_uJ+kJ7DYJzMnY1YWZ`v&wJ>j
z-53lJ2K_f+c;M3l9zxgJxtnM?Rd@&5A^I&EMw~vqzMF4alwZ_e`wQzNL6YXQs3iwo
zq|@I^hSugg^Ts;I1L~iI%Ly|2FHjAPLWat7UORyW<*QD2b`w2uC8Gp7w4N)|z1Vp#
zsz6Q*ON<4<0DQi3mlfNWq})e-2`%ba`G2#;Mg~ZJc4Ki5>w`p5x-XLUi<n1FO4`gS
z_B4%YfA{?vHzjaFr**E25iRwYjnni$%vPzEuV*lwsxk;n30`VB@9!Kse+dzFFqcXk
ztEVJR<KI8>qEu;p`jisdIbE)`-}GY}`<Bt%;~asj@_ALXXl`#rGbQ`$XYHY7+&a&^
z5BU7jUp{s`Y%gss7K%~!Fz}~f@Ve&diYvP@b54Oov<BJ@!(PLO$G;^n?kKwE#{jx3
zQv&0~lN>bamFT{dy3Asl2R6<uj=I~bjY|JjkzkiGP<>bCTlVBPdg7P<WZw(hrUN;2
zbwr-flkx<OeWz39R2@RADiPWacfLzh6mH;AcHYaI$WL$5YDqxidjE0Nso`^`6IP+S
zRu(4v>ctRI=0};6lK1ouciyW)6yLG9T?N%q3&1kOh=J#9FX~MB32Kd%LKezFCdXP}
zP-!K0lTEs;4*wT7LE*HOf#MW>tK`+qT8iOo@h5MLZ@st3e2scaJcogkWa5|Hw>^rX
ztepY2whH)_{UO5<B^j_qw8^OXTcWu_F^R*`jrMk1SrIuLu9HE&FF=A=oqLGBxvON7
zB~?QVYFKA2D=jX~fyx{fAj9ar4XH7d`jCJIH<#i|P*42x?*imgZf@+xPd|Kikb(F%
zVQKY_lJphYEMub(iDtPV37v0rs{uG955pAcEOGlijwMpKxyhcYX+>G6JYI2V<~>X1
zfw;!24~_gF=?MObo$9O4ojQKZzeVKL>sUXSUV4<FwdQX*_n3z%2?aYbhc3ZzAc<RF
zIaZyUj8-z7HsnmNp62BnHJLlhl;$$<*-09j$LDUjA|9*M8rHeOZl*5b@uC&UIglEh
z;Ce!^z8|faR3EzX$RRsEp>Ssh)LHTnxuoxiN3L@1sP8CIo(fbivw2*jeZ&Xn_uXXf
zJ#$*;m--mUFzU~K_oD4SDZWHL!Jk_Y$N%U-JZaet9A;zn9B*<e$7h<Pb0%>5K<kvL
zi`{py3_%&FHxwE&T1@dy1QhseK`MYP&|PxnJ7#Ia@;h+oPF=XW>ccmnsvq0R#4vB&
zeJ*M+jwoCcA^5r38|*eIp-$yzJP)tC%%~kZBJ)6e*g7Z`-B>}_T6%R#5$u=O*s$|f
zQ6mOvAiplmpK>&l7N<8Vok*$k-x~S^-(CvbK7V)v5tZU2mqz-Uc%Upx(lQFU)}VD@
zP5FFys0c(2C>&bL#g9HXbJ3HAI$k;-Q1(vO)IMbB$)Yo(xgk{Ger3G6j$PHoP6&x^
zKzDU9HdKXn4ljPgF7YQs3*ygulbXtB%QtCj(5Maj#bo`Mb^nJ}Sd^Rxl|6pC-jb!i
zu2<H17OP^3)qZ9*;bB(1-M+$Frrt8*$2bCq7jm3v5yuQ=^sudlb`S$q*NCQdiUtjo
zurRjI%p7erXR%J%h+m1X->8Bl%m!NDz!e_$TW(pLz37>TJ7IE-V!X7*w<GWd5XE>@
zqDW+QV2>0uhsd%N8TlV)g{XFc3vS+f#nl%bw>Ty8lM%@oFK{NJd<WU6fa}cFkHxny
znqPiG3-$+oq2$i<>9Pf2Q8Ft~OCc+RSBS*F=2o9`>M8L8-;rc=o%wl{lq$Dc_C}1<
zTbfR%hqdJN@>PlDJBUFZ+`dD>H5!J+^a%As<<xptoS5<txWzLNaCA+n^r~+Co4+1M
zU2D?k2<$1zv4D#1<;|GU=u#m^Rl1eEbEROj13k`K4L$;d&2Otdc300od~CBRjmV!c
z1*PfT>03)hh}F*gH7^;a1&xIRjc$Eo1~<r!0!=tVsJ+p$YVQ1c!3EIBz0&G+-hiGc
zzib)r53UsS&QCkJ;{uPAj#D<AI2i6o)_Dt&ndG8@0+Z!airoe|fzvrYnTG6IDVe+g
zwIcD{`zLljV$ID3ac`6jbkNz@1JLN8oa$hT<Vd4idXWwx-K}Gg<BTmhOFxA4*8DfF
zQsdhQR@H$u{Xx#JnS8o@H<ja0pRqLwq4tzGI6)KmT%Nm7{hX||f2QzB=8E_Pf*K2Q
zT#+i(CNA$FFJ8uH{dJO`e1sAD_TWk}T%Y^m(TdV!&m@JM{dsj@-TQRRc{p)TZt+!w
zEEDoS2Ap};{IOU28LP3HLT~9fl$NbwT%0wv-Y1?F=4~a^d}}PbqSv9_eHXaL8q17W
zAJJ5t#LEMWjy?Jjx|Q|=<a-%Gy*v!F6%ubpohq^*Qxl0)Es!-;KEC?i?zZX<wDe@^
zPIM!#)vhGhV`E^LA^`LE{ChABBPeH>X@^yoh=H(Gud-vdEgEZ!9WSo;2fJ{3{p|du
zwwU?iz)`ior@~tP$v^sE%`Oc0-+x>(mh3GK`IY;^MaLsgxQ88MB~^d@5~$0OQ<*vc
z(-udpm=Fz!kj=jo%{Yy(&Gzj6evflX55su_vn?jvTr)Gtp{K57J&9(BvcC41G1Jwz
z+<{%uIcjC>my5EH)aLs7O;@GEMKv%s&R`Krr2C@a@)Zkpl?87Djv7F=o0%`eZGEeH
zJEGqA3~_tT(L}R!E%;>M-`blN<1Ld%_{O0igc6d3J67=+?ljkF^<JTBX5?iW%HfmV
zPNS+%f-3wiqvG!7Rj{N9&mW&X`YeR{xb&(zOr<O*MZat9tbZ8Cx-k17pE+Mav6s}x
z(sHFIP!hEsRG)Kepp0HX!e0$KUO27|l+?E~*qR~Yc5v@A`jPhTG}OkTac(w~EUn3>
zvw40NbL$4uRiUXdVS0=_+*7^Zn1G>69U2kRT>EDIdDrY~-n|6VfZje|qY&p6k+~d1
zsG|GQAead=?8=skGs!6mYc7dni899g*N>cnM(F9yOgc2r72fj%tz4h%9;G-Xi(Z8G
zW8SRl^#+ki+umbi2VqDBiw|FMOw$=JS`JFcM9I!CI(~hids7BRs+?a1DCt{=o8B|5
z`W8D)@}!EboI!)D0ei1KVIvO&rUw=1`wnkz6vmNuI_JQKVG@TDA&f0?;B8Ji-(yaK
zRfa{4edpt41s_X>)n((}9sGI5`&5Lb^lN_9(0!o<{({SLYQ$qrn6i(figmn`^+J;s
zYA2Z=+0|vLZb$Z^Q<^bfab+iU44?C{H8!d;v_PLi0gOB0q4U^w!oUNxvNov{(bG~Y
z?Kiz9w%LC#w`D{@eeSpGJkZ4|o@u)nB5u?g>|pd)w=~Is9tC~&PD6vF*bCch*pC9Z
z^N`(SeXu~ulT4(R-i*xYr}Qb?dt5c2(6!Xy-w!CH@5S5r@%RWg%|+`)+|AcBlj;U7
z^2X_5+*34OZF=0<%p@eEff@R&0TGuf*+?$w+RyI|M9fpXu=LCn#^eM%UF)k|L{*o^
zpN${uzkIE~8q<?Qc{AHjvrHT7#*%Ioc7?M)dGXtIa^0xYHy&GresCP(A8{ADCC`dg
zW>-`oN!{AK__Tf;t!Q5}`nvJp`>?$ZRt#RojLEt}`~nKJrJ)v<UM=m&9)>a_*{sDb
z;rot9ft=dH8-0eg^-<;K^cdWV0g4QWSa*vW=c^9{Dk~{|gTjXFy(~vo4Tn96oU2-a
zw#nBo*9Z^1DymDljEG_I1V_g$<9iaio*Zl&z1ji=8;_IU_Pn9lZ0@LS+QLtEtz<*o
zcWQjY(F%!=h0LCb{>akBF1jq%!YT+KaZqdDcDv!GOjNzAQ@=aNz^4Q%R>*krMR0T1
z%GCW*7M_TGf5{cKO?4nvdWB5)#SR}??PRtC8VKax3>UG_rA=-{HUZ9Sy@k+1*ft=t
zvy?XaUCc+%mu|_mOCJhZk@Qg!4<hTSQY{uzr#cEukq=~zQqbL|ninCxLIQast@oUI
z3iLD)DpP!vk~A;}!O}fItu+^F5whSoef=aFtzB}AzI)*Tmfi$`iAg?o-ry$Wg`$<M
zlhF3zne;tfOPTj$ede%9%c+?<FDM#WJ+q4S*ux(rkQ_b=Fgo$3@>pRzR6e!=gKrFx
z-(ou5*_XaC3adVkzR%L2q=y_z<lDb!sE$*+i5sr|pSx$jJDpjoWbK6USS@v!Lo-#6
zOS3Z;hq~fhof~Ek;7M-Il&_ThSZp}bZ&+(1r_!i{wn~K+M?Y%V6%^d`Rvog|!!YkO
zy@mYF-nO2qn_T9Tr;+MV6}y-jBft0kf#_fE{5R25Kuhed3a_n*e5J|eOUez!uC^Cb
zbZBLGRSb%qc+!REe-EVAt#$i-Rsl9V3?o?68zAtL?^I*K@$SF5cbs}D6WkMRwiKWc
zMsxM+mmE-bFj?>KCddBRJ|Z)cma=`T8pFqQKy@Ye65d|hh$e6kYQ+zPE_@8zMhv>c
zS3GT@wICyhH9yD!5aV~q(|ggkj_)FFn40Muo9Vxld2@f-_ngtqT`T+Gx`rQqD-opR
z&+sDaGq9H?`*ZD+o@}7oo03JQl~WKh@d3ef1giqBCC4kg!e1B|^$~0%FbP_#J4fe6
zp%o1$#fUoY2Zq0g9Ji$`G8fT$jg`*QJ)2y+`m(D%ZubBR={>^)zq<(}NuZm-e1s`n
zdY=n6*F4@SX(!))%~^jGAx{k3NaW{9*DCDLs84TB(yLexU>bAf%oe<XrW`9>N?aNs
zGi}Itq551s*xB8Ek~fueMF(1;ZlRvE$nQ?Al6FoCKR^E$Hbw)`QWLbq(NNnt=hn|F
z>v#VxiVY8nK0Zx%t()ZlPV}|R@Mh<aeT-S*k@w~qEz9jBwZ+0XOXYe>Xf+o~!UncX
zmvCY_I@Z}T+ZgAH=6n4YaOeu8CDMnV_WgWvGcdR<*7#;gB{A9TO2nQ>RRnwpHAGMN
zZ$6AZ!5d?F2~YiWn)&5}c!Cu(+N#m@$yA8s+3_%1X4ELav0eqCXVB44x(nz2<m)2V
z%--#Gn`6U_bf9AWNu(J^-)y^LgkF=VVv)^PdGp%DEAaatbqL3s?5qXlUB<<5L^pVV
zz7Y)}^*H_Qfx|3?;b0=P;GqwhNar${a2r{I6b?da&q=c%!p^=zp_C1`bLw-GKCmIX
z^~FB*bK3o;^Ap2UI#6`r@vX;cP@Nlpg^UW})lI|kGiiNO_~@wadywZ0%7CBFzP|cQ
zYKL$2)h+KYxF#Fif?XXKT$BQHnzwVs$qSK-ZvDDFF)WoeQ0LeN-nUF@nHz!nIX<2s
z%#3)C!vYYrlwOQX?FN|_V|JoJbxfwXj$uq6_r`EkhP0#6t>u-_Sxe;~#v#<?$15lz
z%}4)&08hFrw7<xau(oj*Rw+*C7~=UA=3}rRu=tIAfS%^r2c5yf^CxCF<;B80uRs9|
zG}?4&%cJcHUA-r#7snw?4ppb~6+*%8tTx(6=cqhoTbH<4^v!u*7nYeMbO?J>ep_e2
z9x6ZVUthw*vcke-+Y68B!=`pup@=}lR4~t^M6EC<SjOYAwDRORr2A6FU^U8hRZ&ee
zvfobFpH;oTynm8y!88~KjO4Oxmj(Rlw=qp4niihIeUMByk)5cJqO2%J@6EnjTz`Wb
z_G=*?!rjkh+-$2>|KRv=VK4DBE!~&ZpSFV2GQy8dJ4RS|5c=obqp$XVKgm27g~+Xt
zi;Iclum!bwiR3kb1-YFKG-#_j*n5x`vph6XqUc&FF8*u;>pdY<!MQvLR@ciKDkjhM
z(4)Zww<A^1=P=bY3fZ$P1bkkCtYuF!M~m$+33#ICR<;L`*&&l}YT-FBzRhS=6gQ{U
zZE3=AWp`wM(B`8vwmaUx32a|@HA#b94O|nQM<3|4CS=o{J)uJ~phMECQ{ns}xG9Me
z!?pd6FoJ`96JgB2AClXC!XZzfKrc2mYw@domS1XO*PI8$>#j|nLr@q5U&d4Gsil70
zdIbL~UU1M`U`0GgmaNV{A1my~uTfT(eE_T9t0953-nnDDiD>$QoH6+n;kwX29m)L@
z5|O#V^Uii*3y*6PC1Fs1csS5uzWcCy;SDn@CS)CLlS@x>8O}^mUVC^uu2{T70f&MJ
z7)wy9dP1@y`1S2ZyevC*gA~ELs$Nwf2Db&W+ZOHXey8Ix_9B27UR&!F`e|m7uYM!?
z4Vv{@-6Dw42|WA2INsWq=22?Lal-X_y9+3pb;(F`Q|aDUUzfF_?@K4)kcjt&?-i?i
z&=-`vQO$t{h1p)ylC;4^&M0ksO9`yh`#txa0=!IP4_@fF8$akob8l9)1;SW+gI>#_
z`MDe{Ay^-uz_?=eg3}Q?c=cuJ$&8}PYm)(XMtSGP(NlrD5V__y7cYhO%pOVgV*PHx
zgcCMX!iZc7Ua|jD#mB;|ci$bA{2|(AdxhL}+%G8AZH*6f*fP|I=VlDFuLI+~c#q38
zlX3l)DhDE)(hdh$b1dBFQ&?J-X851bT9A@p?Q2ZBtu_zj(&DUSNK@j~*6Gw=I?B;+
zal0qUWI`8c71_=Y?Z)R<cRtLf`xy(uvTo1QqHA~K$v!q33sI%-g(MKs1$@BtPS5>w
z_u~hzY&rR|;V50}i{`T&frZucd^=L+ivx>n1DRkOS>kDA&{1(5brlg`a^vkSf%JmI
zXKL#T;=b82pjJsBy^(K&uaCYZt2vS%M;Pv2Q5HSs>OCfNui8YJ0g<lNkK4DjeOUA2
z@4(Lof^`Xuo=3%nL8qroZQ^G_*1tQw9^gZRQCQ3zm7nu06I?2_!}}y5B{^N=r1<Uq
zk|A%RB<fGRD!x1ko7%#0Jqc*?q0U3fONA9xM3SO8ha3+8;@aI`F9h`JowS+Uh6Z&(
zTF2>lYd8JZZ5^Y#i25w|LKPm!wV!kKe(T-tE3ACk^!Wh?h-*iW2A!i(66OBF?rt%!
zoyv&Xp2I|o-~E|eRe=mAY?#7=fFc~xvy1w=GM$c-rj4DGkcoHJu)^}`k^ZyLW+=Lh
z^MIcIbT*ww`aX390i9R<)LFO%_UL7t<@0>CTHdEZ3sL-xIOK60%%0vjzc}k%0bAlz
z{Zng^TG><E2$}Cy4GjGrK6*J`yPm_9{nrEIrI78A$+^+C3Pb)C2!pQ~M$`vYLJsF*
z0pGUby^t-F8?ZwTcaTQD8TuN>51P@nUOjt#sXm~WFQ;ZQ>p-S!5=?7mWx`!j%YO|B
z41S9PbBL~H19J#GjU3<W)z3?WZZy3=KEUk{vshIqNAvH^_G>3^PZ#%*^D~otRGY}G
z`B%lWpedOM-QBpkv)t>GN|Vs9#-SwN{dshR=Z;&?*j3g*jSDH!1tS{rYQ)2sfD<Wo
zzTNzcJ6Uij0CRH-XCYIt8mCF1$FZ=l0l`$M{PDim+1L9U^mm##w1B}@9+Xs9xW|=s
zcuE2QC%#sbW@4nazuLN1XeV6r8dg*c;HV})<e^n)3lp8C){`hXHofls>c|^9uTFnq
zsGvcNY>Q2Vhr+qDA<nz;rm*1^D1gWc=QWVEo_IEU`^%ehx6^jYzVCNOM0W7W>=qs=
zmTmRX9Qolx0+}2VR>1~L6S=dhT+sAT**gjA%Y$YRp7?=&gkj7_&W9-4xN?)P2;1Bt
z_h1Tvxt={LuC94xdQeRNfaV*awjr)_#TR1G%HNZUVF+e*WN+Zta5<rQEsz1Gbz?@2
zSRIwG`jo+`U{T<o0Z-39o&2mH+SZd3hr5}Q7Xr`kR1Nr*H^lv;c;d7F)YI+#Q1!s*
z{B4)eBwzjgc)|b!Y!%ZG-WPE3vdfaSjGRu|Sa4%63{X)|etv$XtRL>?W)Ww&hX=Vf
z`1hKj&rENg!si*b3)OlNm{#L5)tSdzbXsGw>7pQjxX_}dr)hL3n;i3gzCf+JEbB7H
z_#3bcZ%u0~{VMBSTC5HS=Z?7|&Z5b;%;UMaux=6T{}~2ZFYSJHLo(OCJ&+$6@-Pmg
z{37m<G&xBf(hk<by)0;u71N57&9ZCgVqwg#!Az&!FK7gHR%&YcX~A>EDS;F5HZxv<
ztX3Idt4)XE&W1=OiM^6sk%hm}qV?^b`3EoP`Q5=%Ar}zE@5a9((c9+sEV=5SI4)T!
zHt0hwcbn&-vD;I8xViT3Vs8lCX89*?3zaA536Fl7{RHum7D=bbmeOP;trO-YbgUiQ
zaG1Sg?U(!mj};OVdOdQI18YNMyS*dmJUjOibD^DwWcPtdHYHLn-+!(O)g5XIUe4t@
zMip)k|6TJgJ%&mryf?i1_A{@Hn80`H^d7+Iwt+L(%QU&#`fh>=IGMeH^;)6334^c-
zSuvRZ#TQaNco_r{o(rU$%d^89K8}Ty2Q6s;&i>9XEMcc<a$=kOzO+DZYbC>DhTeh7
z5y|`eONt%9Cq}x~7=v|nNC#<A(hU^}v>z|7UR=PHrFVQnk^z-rwWiG5oRJu>ZPUs{
zg^PGRX;yVwiQJMTy0WP#xtm!1d3Icd9uZgzxJXXJg#yRmVZw-LQ*Oa4y*ZL8K$tC0
zqia41?&k`!3>jIZ8~HXgE5vu3%7&FHW)VL>7f<gSpY><O%^hGO<$W!=55J1`e_ktL
zujQI`t&3CLiimD_Jd&{MOS9BsR=u2Brpky&=i%12ADN=JPTX@z0Nc2lmg46gvq%(i
z66tH^#(6mL!-QO{LvPhuC?|YH8WH_qmSTMc-aA-rXIf{2mwmv~eh$UIxm?cLXTvBw
zwvu5VNu82rgOA70s(O7QLF*aK$##O<CmFmNfOHkO_&nN5j(Im<uy{Vl?*SMO@DPcQ
zrnYh0p;)FUQ{gYF)V|jdLDR0cu;n8=uWit$Ds{nF{AJ3veYkG<gu|IZP0-JWoEnjg
zAXP|67;L=FCEI|5*YUt`4Eb?G|0vYW*zH@F!Li=nP{xd#8|f*<9IV(M_DiogRkL^a
z2;Yw<zX38q67dP73tG%=4hWo!_yuykGnxdS<uFwG^!UxNY;QrS6!p2#1hrHeDu0y%
zT`P~v01fy(;%^B|Aijkq=5icrYO=9HP4C}^&R-{_$lOzf><R(C^RLPI`=fB(UB_X#
zij(E}kwR0k;xZK@#Y_Th&OC*nRKT?sS?spH1J+`UadJ93gH=zjP$OV017mf=ouy9?
zN?o-*T|1iY-E;g^sb-z`snXk}fBSE5?dx#%b0Gy{5PueN4CKhzcnEtyZeIZgB<6Zm
zt13Hl!~Ar}C}zmoFflUR8+DlFns`9@uE+QL?S>wSbo)*+O8wsT_F?9?E(Jq$<mVv(
zp=L!Xx(YWHx1UO-2U)OdWtfvjr*>u2*&j3La7>m6==Ccr=Bll*E5Fi8ZH!*aVEgeR
zaopVHo-z#P1BZ?i_%pYU_Wpd{0`x(Ib;7I*r)ix0r7?(41H^5OPE?lb{1x&Gx6<jN
zTR%G-byY`ITj07&+_3Aua5;s{l>KnwZRM(e@AqeU@CFF>CXdji<}}NWJwoT=38~PA
z-U11LPWcNzH)oB``XctM{Szu-SOXu1Ap!gb*bb}V0r&5!kR`21z*!&^hP=ECDa@{#
zAlMJy2{+`~BmVY!Z@3WWj)EfgqIsR}3Rh>rVKy*-0VsoAN8blZZ*Ej%)BXN@)*anT
zKhMS!1Q{{Y<-H5twf7mjOi$l>Z<^IsUBr&PZPVk_q+%}0OfgYM7W{zbVEx<Vzz;gp
zc;}eT;~2-pK)4z|&wRx;Ia6g}J1x?HmPb~qRyci3P)~6vTM>JbfP!DW(M?xp`rD{N
z9!B}(RNn$F={xCkEBhez7$!-O7|t^XsD|#-X-$V_JlP%`QDX1z$|IS7DN+fm#K@ur
zC#v_5c?rygy2L~O5m63B(%bG|lmag5Gtdeq1H>(+oi7rrGG--oNO3gC{stU}e}C`(
z4^C7zrKR-1-Dj;iI=833a$ur#z@Y7$cLXM>Rm$P-z!N&eY6wK=(Y<)mH7({Vc2&pK
z=#ilcTn7Ua^TE$AJTN#TJ=sQ*Xs9QP74{OV&G`*Ph-`?^;LeOkeD~r9H9D%RB5)|$
z3T5dT(M&D8g@t<Xq&C?0AqgI<#f&4AP4!N1_-8__ZizRv>zGIfJL`>^z;#+75w+%k
z_RMrT-INZ%06~&qn=OW>E!di0C8{(vitCWTX0o8|MG(7rlgCb-Ec$c-$5Olq17Fyp
zvT=7{1y2?QjZo`=zy;<<iSYy{rSHmc>&P!LflN8Pk8!aG)_ssPW@IH@)&;vtpDI%a
zX`MBW`)~RVJUS*_x&zELMg&_ECvJ2tbw*^<xm=&V_Iud~wU+P2XPKN1(YlPczb9vc
zta-uOSRWp%4WTx^-bD($D(re#H_1z>S3|m#jfTp?iMbxt#Ds<<iZ%8S)pSegmd>Ud
z$&H2HrOiPs$eIZR#3)WV1(@CIY3&Q&z?D9Irnc6v7R(x<&~^+ih@y$rRHR|WVlLU1
zl{Tm#?PK*mfUG$warO2BuKl_DZrITxk_iZMD)@FuQ9k{uRX5|Q$@NUegQswF={L`E
z5Ue6Yfs0W1V0fzPZv0Z?eJ<HdNW5XQx^Oeia^ZgNvwb(JS^g2idYylzpf~)N{2Y7w
zmmbEt<qpxYA+D3CN&*9B`)^=mpdS;WSm{@k`&)`&qmE&58r>_-B^OvKLL_g{Ws*z6
zEcX7`{wsjV{c1n{mAOUc(GHlIiO^s?JKX#_f#mDFu^}c2jZ-UNJi=6VUt7+;8y99p
z{BPrkzpz2uC#UvCFqwttfn=A?gGaZGNd^nQf;m9$=5JEtZ7Bp$%u^m<TR4bhD-IHx
zhJ3~)Yz}0#2W>?p2Mm-#a-qQ9ufIW2Rr0afGO~r#33`-M2pF=~r@-xE1z(r+rGUM`
zs{;Ff!<zm1X;#X1nASq>D%<ogH5$fBW`k2{pY~RO<$fK=tixLtd16^KrB{c*cn^Y>
z&07#N_xXk|q(Aw$vgzJUiTnO%7hvTOB6fqwq85hu5mWBgYE?&Jm$2(LH&xw!>!pPP
zamH%FXTeyAL*;=PfgP(kBiKcGe$+9P^FLl{98Wrl(XAy0k8?#%_r;cDz2UimfwlEo
z+V|jHq1zmUylAv?$d=ey0)BwNpYAYhE~m=k0@>lOXWm`V>buS5SpPK3sUVdXQ6GRa
zF@E-cY`DV%*e;KKcj?o^Ne)aS9izL6s%jW(`@lO33?{H~{IMktatw&tcWN(aHJh6k
z<-qF|3y>~Z0NYU`v{J(aq>kBlcSbz1WOBSrA>GmHUlpf=`UPH)Eo~`s`O#Bc5NDN2
zOWrK>R`}>NZpur@d(&S{7gHprPp?-j7E71cq{1<|FE)5i!-&pD|LjXEZwmhJ?MxxN
z)GQI&v0MCY+IGB;Qp-dKY)qk6QSf2yts+HfT=z`_OC3ZStQsO#kb94i_vckxe#@g`
z7!i=qnMZUU%vrJ<SXy~?9~d4H!>_ItnBynHq(=P7z3>Jh=7UZ%qi@E#cq6+cZp`5J
z+p6v$2oG=&3?xnqLe~imu7sNXNZ9E8c+RC7jzbLlpp$Q(r<2d51-VK*#Gn{JQ6rpi
z;q<MO)Ntp3b$h0RG^g}XV1?q*7n%tTb@G*waHUCc<LuX9aa>7g0diH+Wq!PA5flYB
zY8CpAF5plz^hE5NINsUt$ul@!Tfa1aPy+4QOH{?-=2qe&hNZ6Gl!JdkK$@j<WZrC8
zHTdu1b68<&z0OLgyT`TP<TT9y<LD7YY=Owpy?ftL*1lFxjm;STEb<w;*5e)XcPmu;
zYwLrZKAmf*bK@B)i{csQ2Kx_p;QCowQiWDKMzO@n^fqWT`fd2KxJ}*%b4HYpr97;B
z73zm#?$12`N@X|dRNf$-PT(L2_e~o;ysy*deN@0(O_;q->HP)BEt2jr%8VzjW$*8L
z1*suwl|I;LR+2^+Wt*3!&kt^T0b8Ju8hU(-Y3K2qj0b`Xgll6^+=`rQqXlJ#8#>uS
z?44`c7T9}bFY*lHNq!D~NP4;Af38j&k2jTVxhZ9A5^{~A>@I{_w_--XdZQSOpo`z?
zZp=Yjx}>)D8)mtk^KM&kb9{oo6osTJ(##4$nP--TlIrt+-3~Gl^lmx`-^k}qd4XwH
z&9D9Y8pGfkdnzBgifTkQFMU@&so*gP&GoOzZ<?Fuzp)*xDQ|4u9_Y{q77^Vfr@j8S
zPSEEcuW~>AbJF~Dx~Q|qT75rG>}>xSrSQ*kwX7pG{FhDH)oj&EN2BvD?h9y)+Cuba
zA{h`+)rB6LDw+BUys-DoMd&o5XwUUrx-+{ZZ%o0c6q(Onrx&qnV6Rq&_7UdozwUm%
zyLQbbfLUkb5|avzxr2kHV4#pb4fU*q%tf>wRU9=8pW_{_2&>4k2@W21kk5mT%V{8F
zDJoN?<9X&C1{wuQ-=5K<N6_5QXA6=4Y8t38ccb}$EgkQWxr)}HS9AFCIaH-OQucVk
z=g6<LtrlOfe_M=ryeuQj>hIatyOdy-)bR`***m>ei#fpHl+xxyS{(5wXqQdq_%1FX
zbL;-h_HRpvw4pmqiklE6>RWtjeGyP1*TkXiy#}7+ZBe&Reg0l*4E;x}pdmc-P`8pl
z*psbsI^-HuGw2$;>YmT#&fwRPF|NYPEiW3(FBWo#d`<@tlcPNHmNeHr>QJS$R@LKL
z2hLr#UXfDa#JBP7US4MaAlQ*XKDHY#y?A&_%UH+9C~$_~x<#=@UY&@Def3I?ZeL8^
z6#k$jQWbB2y{N8m<Y#p2r(@h3O3L22H*r!E9fyH34w!4S9gtfq^v!=Ql?Qqfi^Z{*
zGo!8qf1SArZ@Uhj1D}vQbkqN~&Z&zo0pE5nJ4I)&U#2HV{GA=!a$p-FuOrQ}-nCat
z_1AZNuRVB$X;d~HZv;a3yC7f=m4A9JsWYft*UgIt6+qOunBn1=-PQt}B4?Ia9{(i6
z#L?VB_{I3}0lHQP<JdPo=i&8;zoxGzJC*qY0k7?s#MJ{>0g%ZM-EVO1zrft0*+xCf
z{4$xn`4%D+0|A&^VOn8IGZ?-U@GWUgi%)5uX1qu+Bq#ck4=&obSoL{$qKtsIaH=sc
zguta4zT?a<JHAJ0c(%WY^7n#^DblHXaCzI6T+GjY-wPBj`NwEnzmEA8w+}6zyUL_T
zGhSeEt%35XrJRO3FJ7FpU8<HUv<1|w{%+H}f>&cVI0MWGZ-;`*0?V~az(nKVC`1oT
zlx0Zm*De-beyejwcJ9sRgK_q8%wPFu&*rjU3p(HI#<N@&l?ZQ{Lt_M{p?)8Ie%!*=
zy*s+FRsj|UvK~=Pnqm+t#D<G6M7EwgbCJt`xN+%_K9N^Rm4y_SIW7j(b2?x9)lt(j
zrf149BPPbFAbeK0HnR^G`&L?%1Me#2`=!K~$@_D8O6nLM@c)7c2^e_Uqx$hIi52}l
zKcn+r)qgy>lXRYOcCf5M)d-dq!*nzsAGGC-opCBF2mLfFCDhxEG}NLPE60s~pO^to
zT$x7KJ}Y{|k8<37H!>6y@j!GxGKSO-4v+I)Wjad}(;V24ocRB=_vZ0Xw(TGAWhqP9
zg{-M0OP1_QBWod4_ClnR?1n5^hGZ$rog&FDS+Z8LGf}k2k{DSBWiZ8*eeFFi_j5n@
z@A<s%|L;FtfA!JKIp=wt$9{Z|<GLF=y)7bMge%DAu$SM~UTyR~`Z_SUIZuYfvI<r3
z8gY8ll&JxFD*#U2IaEIsnL|5y62aA=Ikh2HMsaajjBTk#>jR??nd!9M*<wa??wS6W
zJ;%#BZz$hTaLE`WRnEf6_gtV8qcQ;?p7;`Uak!Ww=kmfjpY0K$9J?4#i&TT+L)Zj;
z9^RHBu*v$+NJX|JL`Y<Zphkjf!3(7$d+(Q0G2B+bFm~w8okaymk$rl~Zss=UWPfGb
zwY>_xlDZNOCOIM*MzWUFNV^vFy$%=Ui`WER3EI3!e~#K$+=Xe#SXuN*X+jk^DcyY^
zc(xdGxwS!VEx-$0Ok|hI1s4qGtAH>K9DbD(aQDjjXY46p;wC{~9oLz%IF?sh)jgq0
zXQ>}PbvmyES(_q#<g^<<o9hjGLe?}~vfOlBOP+&GJVww1m(=X1Q)gpiQS}7~G%abm
zW>oM2_Fn*^kbgLAl^wCa2BK&4tLuaOl6IDRZ~YWe{GC<UwfkK^1AJOOK-ED)U9diO
ztWDuGI<f=T+ofON&X<|~5XHD7S(^F`Tp#COxjhqg>!3wa&<<xIpQjn0?HDk%VZWZ=
z+ovjNr!q_1)%JPuZX{B>T85z`Et3fY+9kzR(6R;)^rGaT>sL^Yxtc<9EH<SgA?@?+
zq7q>$(0q+<Z9}(e=H27csxNY^X6xy&-UIxPw~?y={QiVpE5H7=b#cjyTUr<$xlsbb
zRry^`P8Mwvl}s`&!XJRhzz#fuBa_Am*c-qK?_t58a+J0Tnx(!LYy0gnZMiaa)d4=c
zu8WONm;DdFr*x&BM44~kYj!YE6y>`}5PYkWICjZY5_?cniW(I%3uywpMnr^YrDZsF
zN?iTw;?q*@_J~`D+idIUhm4Q5sYt5|7T@V0=af{!uz(lXVE`-)uD7QgK#*AgLfBW%
zH(~M5AcTRu!MrGfzYoH;lK2<p(x<M)UY@}>7JEv0rNgHvHgxm7sI(E<&i5ZLJL9E(
z>ZMd*S&0l}A>=)O0DjI8Bf;^L`YMv;tExg>3|LxS34?ORrS$e}(6Qh~5k(b6o>a^Q
zKH5p6`EryYr-g%5FqS77drA5C!=9OAf@xKvOJXSHK7SYq98T<fud}UPK3SDfGYx=#
zX%G*aSDri#rVk}u$%K@Fzm>bVB2#R|dHS4|`u9Ni7XCLUBLmuPEb>bx9JUlb!Jqo{
z(cA>#wC~_baSCMG+>`I)1NT_v71K5zNNe*-qek4Iz)C!!P1m2po@;39(Szry3uveJ
zXkPHSA!tnqZNH$eP&9FC_bw|@hFlux>mZoI#eRK$l!~wff+3F_`4wJ&`MJ>8daDg1
zLXiQr5+V|-^e~uE$KWU1dfQ;bfK}j$sa6%Kn~9`CwP29I;DXmJN%D?Z1m$VgN)zh)
z0RxvYq~p2MKKQJ^;HBx??8SilnHNLwoNyV{$*iB`mkcPcv>y`KbfOq0GbfCZVAmF#
z%Z)`o!w~VSw+*-5p^5<aEQ`7C$0?(?`BDId$F%~l$Sgls2CskwDKA72?H@6Zw6{AG
z)6Ky7`&cXONGxB<;Pok)_LH?+j`0RXXMV^!e5|4<^swqJQv-6_N|9jU`*du_vv-6`
z&;n=`4JHFS%1|A(Q+@J!GRd5wy%35#DEY3bUNk=B+Yt38Qr4n*l5}K3k5ypS609WK
zO96r>2CVNw3IP2E`i#JaaJz`YV)Eq0T9M4SyZ$TO$kkyz2E+p7Y>s|Ztx*HOjp(_d
znOQy}hEDs$cn-TIoLsH*vR(1fMhDDM(cVISKk6xkvh+xq+Bt}!xcnRPXw3~Vd<Re;
z#!8K*i${8&HF%LvC%+X;TKt^~^W;b`%0F;wgP=SZ8Q0|k9JuJCkvX}sh+z@wEl7ND
z^hIUJvq);dTUB7n1`~x=2G;{21&{2ydmq7XbdJ}HzbXPF7wE14%~E$DM~wgIQ<QOP
zCh!p*_r$4vvo9+SL^Eq}yhz7;j<6puTdyvE;^MMt1~wS{m!tlLE)9vDZ|Kz*m=G%^
zQy7GRQ)%(jk-N=q1)+8<j3mI_2=H|071dm5qCahG!Ya`CWPMd9Ql+haqS`(zQfBRr
zf*NR&3EoqnrnpxgCYHV70VVoZ10Y7$tvNdx?D<n1p??*<X4}ZFL>xulbsvw@GP_Px
z({u<XtTf^C;!iF?|59@=o8oInlA>e+&p=+^f5bP1UFKOFyYdzl!nI$53O@tIBe0b`
zbbVj`9!^7a*FIid@Op|i=C^=Tvv(`Ykk$iIj8E{_uFd)@cJ1XOi=VGsnLOmCLWnBG
zZ^)IeAG992V5ndRk=@la?t?W~cJ!UNDKM|P7)#^$N|g_>=LA)%uJf1k(L(aElh<WC
z4EkB?d^hU%Y8T)C>j!P*EGgepu42itrN-wQxzHg*y#PWp02)`iJ8Nc&tg9A{ufsry
zB(IN*DeDjKk*GZ5J)BAvM&VK*F^%Vm*nE=R^Zk~vyt&!<OWA|b9k==6o?B%S(wbYZ
zm)v%|Y2s28H1x)>dKG+g?0b;r7g?P-G{(nJeN&1Bp_x!?_9q&nF?}8siHQ!{1^R#$
z)h1YP#VTkn45rtPT-k3V;WbNfO*8Qm^p+|jI4JuyTat_T9o)dPlzX;RN=OVJeMdOM
zl>P)-x_=}A$gMMnQv_cvL!w4b0b&AoX18!Ms&S}a|Bh8~C>;FJ(1p_zh_CCPp{O_h
z=dN5!A3l)t{8(GG+eHE#WCS<O@VHN6m48SW4Km|mD<$k_o%EK3WTo00K*_>h?d4V7
zjIWF?DzZ27xaf+wC)Q$&TiN5btwF!B_VWB%LIS<C6LGTATxE9r%^D+2aZ5_VFE3|(
zbt#A>yMXqtJxUGylmX5$_oN+y69a84qo}#yOi!~#&Z1Q^WDB1%D|xMAL$;9%%U)j<
zQ}41_l=U8B$ig<=DV=HsSLT$@@Do~fJWbSMmjfem)tYgcgQ`#U_j8+!yw@u-%Rm4<
zlmJZGqI_H&JmFy*$NFf`%wcB&J*pq8(v1!1cgm`0Ar0PkUHGdm#SW6zJ%I9Fp*1Bh
zen<R&?J+~Ao!Psc5*cElwjsU5hJ?(2DDw!hZhCkVv>e2;_^J51x7oQLI1~4+X*iYu
z;H+V(l_N=+-|y$qwDUY)beFM8d^6_z>$Ae%^)1u_f)rm3Ao?RE3v-i-rMXlH`66;|
z;vqYNl|jlc0<WIZ672x9@25T2V<jORz4g{VQ0|FU0Xxaw&EmH6RLDrH7fJxRAF3`^
zg!3*?9gQY<Lpp!qb<>U9jb49qNWeLOPPPz*=%9uN<R7fWlY_+&zjskaX}eGCHLcBg
z&MDF{hJ7^<1M79DelQG8!It;}o6>&(^}$1HMqX^+=YMwV8d_-Jo!rV<gv+PH+w4Y)
z(Sbwv_BP+7OG=4gx>eTpNM>a7EaAp0DihHljO=jdBUKB2QFX_LJP9z?>QVp|NR*{P
za4vfhi$nisLx+5abeRP>^8fNF-SVzaVvP;FYV)Q>BG7{I%5~-0aovLJs<oL)xylw+
z&4GrNKA<`_){N1@5KhQnRu1VZ;+&PEE<OX*11zM}&^_^~9y;21P;%p&KT5TNhSI6_
zug8Mir5^n?OEMCW?(mRwi<=t@grKqWDZ3|)Lnd1KLW=bZqu14Ot3vp+-3SHX70Bd%
zd4;%!ED9isPuv|Jf^i_6r;e`#?X1nLs*LxpFYi*Y6zsD>JH&{GN`KBMEMpAM{wk`5
zQB>#d7aLMpsQsp~ci)Mi@*$}8>fWb{fs{vQXV(1XT6tn9tEYT4JoNU)Kg{8j4EU(v
zqpM^L&Uxj@xGh_CJ#Kv@%iPX?xHN|r&jPb!tRB%II;ceC3LiIz@J5lVQ3xA8HKFX=
zz%A&hG&cVbZg0qIf?v;wT<Ydegic`h+F8@^Yk7NrOWjakMDbYihoe|?6eP<nmJTAx
zZ3Fy>KV-g<D|v?FmqxUsHq<5!=yMVjJ1)}aO6;&?$7FVUy3T~yHL<guZ(~X-t$XGR
z72YAA%@gU01~^Av#EOAR2j}}9@QCp{5q}8Q@p4m?qj8@X6}cPtM`PYu-H;nuan9hz
zWOjCKZXAi|JbxOr3^VCS`7GGF)-o!n$~ix25|!{(7y{Wm_x0OCUpYQ7fJzAO9dL#d
zkx@f;Cz^7!Fqp7QI7Dd{%sK;$U;MF=i0%N(@u3Fml^{Lt;m*R*rRwLHoOFe7P@{Nr
z^@R&%>Mf&VVRPG;%6qSQS2-0ls96270qhqateJ6<KIaC0+u{kejVP+KDk?fG_)5q|
zIQe>Hm=I3%ZPwQ~zWu2!G2ioU2=c&6wj9lVHs@86sgR5}hUFy=VT)+A=%1<$A?F&}
zOqdJ_IYQeZR~zFoPB!WHlpp<rj3<pQjxJUtMBg;-e@eU&5<Vt!;KsbA-0fMCyrM?H
zVX%Pi^-^Zx*M2w-xKCNfJRQ6B0XgpSeX&k(N1stSoBb->I)eQKP1pIKk^)t=-|{-n
z?e)>~u#Ca3Z0I7JV)KFV6zaH0*BxEXS3?ZOvgLuDA0S}t*;!{HbMd3Zs|1Vg6}_R3
zoyZE>yqwR0Pd&x1({%0o-q0Qx+SDc-i7%$CZv5?E`w#j^yr=As%HfnhdLms5O^FNE
z8p9s*7=F91ACGrHa8AV@KbFf{Aj>rh>5JnE--T(A=ip@9i_wPVgRI>VroFmhL_QW9
z!mFBhHj16wwsXf7l2SzHEww>!$}a)lNBPFt2SEF)7ai8Dd=e-#=xcF|8D&nL=pFXM
zXZ!CV1zxi|{|=(=?4wsNJh<Yrapbb3-fL(2T&HhVkJ&@RNwpao-JY@ZelLApRi0dR
zjyJFs@|c9$8Bg3km-MrjL!~Crwj%lF;CGNKM}l4jAXyl@gFChZQQRAzaJ;U_p=_GP
zJri@_6*_uyiTRwkkgA<V^GxfR$#u5xFEoeZkMf`IkbZI0<eRcaEO-I2{yz$6d*5?x
zJY2fPUk4Xc=6vrcQk=0#k6W{90~)LVA6O+$NWHY3WBS0M;@iNQCR?p6w*M^Q#A_A$
zhBJv@_{aI<6+|WRl%tUiHTfzXbuSbxtq*|CmbX#U3f`u{zFS?{zLLkUT2Y-|<#!jP
zLk{J^XtFyHW-F@uDDAP$FB7`yUjK3K7>>M>sTUIsxm)R$GD-fr-P#4F<<Bc7+>|@`
z9bwmbr3p&&oL~{(hg?c;W&VCiq(l#$NaZcou?*MN!-@+GC<GBxN$;YvD$=H}IrVLu
zai61%e1bvAHuKE7n~o0QMEtxFGxn7u)|uZ=(9Sl;A$E|~ol=c$cYrx($n5Fj1_qX3
z!u57)<g@rKC*eME2`b7P6=5-&<BQmUGXR$n2P-BC@lYPwk<UHQD2F_;Ah__Y$ZKUJ
z`%wTnQ@Yiaw~;1$Au}o3KMu=1nZti4{pP*-^UvExUsarNanU2$LK=CoHc|B(kM!Fy
z-eP7tP}Jhd%?$>~7DgI5T@_s22FC<|Htr3(PNo^~b_l<o4qNlNR%quNjVdi>)0&=R
zQDaQEw5QL!QK*~SmID%~e8KuASl<nUex-tLtKkp9%i!p}+WH+B@Q|I71fH!KfLst6
z5zrP~ZuB`q%~wdAwA~(jcg}|u>jn$0{%X45V~`L@y2DPAA_;##CnWvs>Oi02KA8&;
zbRP()6f8S?u0OM7;Tgzq(yw|Nq|}3IAid`!8NnZf5W}h-rE^*1;7_ydRZ;4w?v?&k
z7o(B>3Hw^()k5mzU+mib>ORV4c7zkm(r2B`y|x}#tcqJtfR5|=R-SPBDd$fLCS$IV
z8I8{k2v4b}z(8ZZ!0pN{_1!@5X7D=&&0V+7S++XU=Zq?H8R~FORAkJ}JpJuKzcW;k
zpq(MI712Jb;j$E0sk>4iP@C!40hjvZY;&Vy+*x%l<E3=M1XQ1JjD#ZUDI~}*9?2>R
zXgCa@9z~!_5j1#-X)dg{Ix;FjxUuz%`!{xLb~$vWC}b|PlQD%mHGf^@NXPj;0_4^u
z%lf9w17CcIovRl_W5nB!J?9di-(*MWl3K+A%h^){r+l7d6>Lp4m{~iiT4x8c`Dw8t
zZoW0n>&uFtfFfz-6)Q%G`-Ram_FP((x<5$ehkFzCZTMQ(`&o=A6<}2%#KoF;V<{6E
zsE7aO8HRC&xYXTa(gG**vAop!Lo$b=N~=eBnIzCuj8wBKL)BC8&LH#Vh$XDo?|?XG
z;hY&jxP?~y?)@?!Lyv#E@5w9;QI%r_Q0KTOW$+m^)H&`;+bt>@9Wq+o-cl`DMD>FM
zb0O{rC5P>*^K77;2Z;4JLv7#mi2J1VbkSHHHFC|9aoBD3{26K@2Z;f}sw4Pb^oGDy
z{7TzB$N!)<M|&`$(^4BfU6T(?Ji`TE5-B)aJ3l&j*me&ZBJ3$X(GZ<n=HI=S(yKy)
z%)=7q=n{RbyE&H9<M*v?I?6?rPIYmP&6{^`rg4#^S<<2ZJoq-uHTbwNC!_I70I`4P
z?ay{ZBY_3MQsV=sBAJm`1CX5}CWld|ny~3Kg}Tn%W@ci=T-Y<*>FmYavk3+B84=34
zeEFYqB!DSESnA6t%+Y9hA6a>=LX#DYMk}(A^*`MpJayje;?eM_gt$(AIE^oy<^+10
zmhIEjqJ4=aEZ%kQPqjf6*@vBtxIwk?epd&s6MBUD(8xlD3m74FUo=KVbD^y@zjW#}
z!`OT;93`0q#jdO{vIR^9oOSeN9il>#59(HP<H!a?sU=~geHE-U<O8zyhELt&&<PES
zot%)ei#4?p1X6t3OzWGhaAgM1bCSNXPwc|#x=mB%aysNCE2O-nap%GJlTr>vQkJO@
zcgSiTH{HHj%!smrg##5FLLUy1-d6t#kT(POlClgQNrYZFTO^LUj9mM4U!Sk$#=C*~
zwt<O4<?qQblsO8>(Zz3T1pI;NP(IRP<=exOkwreRE3+&Sl#kMbhoG|%F4BO@i@39Y
zGnr`Fk<mHl<`TGP?)ta1_#qa=6P+k`PQ+gEh~~mwe!sg3;Ql!Hq`YCpgwC<s3!YE7
ziMkRG^6u0?IC*s=)dJX7Z|#^mI;V`NPu7QisxvSN@XkCVLPn>d_}>TD*oar_O88U{
zyN-}w`=moVS1;~oFk|L(L6MGOm~hqg)+V{uu8jjy8Do`47WJDpep|=zhr*ze;sduj
z_uk}ArFx()K~rvrR`1Zf&hh8aQRBrMemYkEN<v+Kq>O6p__1QKpvH4hqkLJL+0&Jl
z+@kFS8bGhqg?vf6hzYHsv>&`hQ7IJ(bMBNwikJC`>fjDQ^9t?>Lr&*@hMWWq_gmd#
zA(^T>ME6*yv%@NrX@~GYF>>Bb+Z#3$z1zIf64(-K)N0ZE{1DFNJhJ6k0yDDFMJ8bp
z=#0*(xB;o!knY91QKck7NamHG)G7<00I{N31q;l#CPV$=pr+~WY$eh7X7!s+UsUW5
za|YZlIGfcmY26)&;IB+}q<m+a?fudOw}`BT2*^P~u|w|HN?i1V(gc3Rn?z|_Au;td
z&7k@hH0K@;rp<!`-&A@7cx3e0*Yx{O7*T7bmmH;#v$zi%tAOL_Q0Y)Z>dV+lIH+)}
zhnW`t!2;Axf%|-IDjKs(pgUin1ub|`D17yP#|XkH?4T0zvhLO#@!dQ@f~&z9F42<j
zHLi}@&i8Ilf(_><tv{^Ia1_|3esdm%87cyahYUqw43O@L1?zgcZWe7y=Fc{-_~>F9
zc^A3Z)nzZ1C<jc0#iSAmqA8-N$oH7fhi|c-{Mg2i;E#Dv`FG6Mi&J^NvSUZ)&i0q>
zL}nj@8}c;wz?$S)owUf~dpP31Wv|aEy6)~SH?i#;-k1Bg4mHFM$uwCjfeF;GW0BBI
z!dxI<7Ab7j1P#GQ2vX@7?(kJ7eX+qJFfJsd_BwQC0(=K~dN3Npr@8R3^|_2{$@JPu
zTcP~Y<*c1T20E@$tIX$8NWb3j(*Zb==~e$)P~+s_Vf}4wsPVR?%rs4#WcN7EMbAFw
zyCy<;NajL_Qs{(T7YHe**ZNymBlq>F_y)U15j8jxrg;S8`D%*>WOBq?3?&jPK5#Ru
zX7=>kkiPMPQ9TB}Kw-MetH5{#1h>36xX#;gj-PCm)jH3O*t~C%MsQC-tjn*3Pd!(G
zo2YW1+&ieU{}-#?XawIb$rP{mENN<2ol6a28YZx_KMZRze?RuBF?C9i0pxFdZu3SM
z_FBE2&qsQj3<3;TK}E<N&6&*r!yfVtPK#i05*k&}igHdqK}oxCU)fJX+CcLFcH~_U
zoIds|>(a`#iR{K&P&>FQ-%NfF*QO@JEfM+Y@OwT{o*v$Tcpd?TL9e;6mmx>PE4D45
z=cJ!cMr;3sLDVw$)#C@|Gx#>(oGG`-u``dyG=e=A`5hrYNHCjzKS~Zc=mI)IMhNk2
zSu_X7jd%{su?*TZ2;u6T=o02C6F&0lX6CJllvJXM5-TRZvZix&4>)I9XTrrnwWGIu
z^)2SR=}lkBay>X4@izHQF?0o1ZspS=ls4AeeR($o^Gmr6Z1E|bP$bwrF%qY*2l82m
zF>cu<x+IM2=rl;Xa4In=eCK47k1S|o<W#^BBU>9$)t7)x5VvN)X)<<BA@)oRNbD(K
z?-d<j@AO(b(qLZfy0V7-gQy2Ik16&CEstKQHjTk9&!ce+OW-s(0x}OQ2|_Y4IS|cR
zwNRtY1^6UXOl;7x<Bec(o(2=zCaZhGLi8szXg{s$5<=_MoBN4^cE`0Ouw&~gM+?za
za`~4o*}I}bjaTI=NZZ_xlqN-9&=rF*_7I!nl|UILLE<a>!0i5nn=49xH8MEV_5R$H
zV;{qwOXau)*k_!Rc&Wbqf+Hi;x6iknYone6lj@m!=@<iI188I<1l<cKNoLK0HMpit
z+s0LSI$~Hc8>?plv9mZ$TzgzI34WcuzE&~p3<`D1I$E1W^8n6*k6RiOfnKIP;1UqI
zflJkV8_F+=wCM?Moq4dZbA|cg$^Xzq#$=K*4p(QdVv~#akMILTs1bwoS?aBmW7a~*
z>^*ImTd;yw0c|u{gRevB`CzmiL6$w#Pv1nitNto?`uE@yr?O!Nz1znJ;SeL@&z3da
zJBmNt2F}6f*LUvY48bR;?t#?$-(*RW3ZGgp_fm^O^T2H9g<Dk<9;_@7M|7vG|MIxP
zExqG(=%lCeY`s9zp~((csGuQSvkKqMTX;~X=k%AgUS6*1uZ@bxvO$Y9*$|oofHggD
zF59~i+Qig{lpdH8cF8MxZ`6lIsf@4N`w&k28V_5yjoAEMB+-tyF*2MlSOv9N5Ju+q
zPh(Ep<^Tnh1L#SKFxwlC4rn4Wx7oVx=ys}{TkGfdt1!R)Bt91g(U*F^K0%rLyeQQ4
z<JY7!oAGb${<IxznEbqe-XOTa&rc$PuYp2Vu%+R;j7U|tG2wYgi73-=uW1aIU<i>Z
zTg4t7yQB|%BG5b+_&UGpMip21=4Ua?{)>){4h|8W&X*fMj-ef)r^^WCPP7T&N5J4^
zA5A*U_Ab5qC3YZ<kYR@IIi1kka9RvITPF2Q+uxUQ6w1en^%p<qb*nc%WYTf9^Pl2~
zK)Fn>`eoSqHhVl<e5xSniDIM<Kx}|e(DJi?7pqR^m}Zm8k2;K^7qb@EH)4LpuioD)
zM)sQ$;0Zh~%{Emjk(jhSXFm*VFCX|Z<35%9QW4yYnGUxPQEs_`ipP*QWKSN<yacxk
z#|G|69mYT>&CF2|4SCxv_9Yg!Jv>_O4Rem1aTFB*|CAHTqjoHBpH0Moo|XzA^{0o`
zXD|D|c>i44E&Y743KcFCm7<gJb%$+*20`}>7LviTRG2fO7$Z!+x9#trN7<C>f#S)R
z3CBF2qvCA#T22=4v=rVxQO@~@axpf!;|NRy#dV@uM<zqKz^TPE1X$ROK9!1+!p=4b
zZVC0&=OXd}(759p-uskBLC@eieS`ov$pw<#_}Bd`UR{Q(X~8dFq8ZQa^WC+j?S_#f
zv+}eZNHsblSbSM5D8mfZCT!WX)kffPA>yZtZK((^dY^klsl=qZ-UloNWL%I#0R4d4
zZ+E7Sr4MfKK6(sPf|1hcS3jv?Vl^#ySlRcaEOA{1nU2DfjGcB=NahB*_jK5)_P+N!
zRyT~%a|8In7i1(Rxz;!Mt8!Bq;oL6x^|~Bd&q_;F4J4A)34wSg0L+hH-e3IY>1tsO
zHqQ4jLeT_GnbOZ1{_^1pc;z%3WKXm}uBt#@dkehj71B}%fdqp0{8&co34M(~(c?Wk
zh!V9~1n9?-xSNEp*@;|5-(ApdPrp{9vY?)yec%{Al_Ar#!DI{zC)u*NkMN5Eo4k_K
z!}|gF8NS!5mIP@hq<$T;BKCs+@t|ZD?mZTMah(-UYtdSNq8D;bX&#SvWk98Cqxdk@
z#tZ~oNC9!6V^M&DHdv~_&(HJP-S19}13fV31AMku!0!y21ys6|;XJzN#Y^o5<!-i$
z-qo~J-{8lk38rZmh}%d%PXU&y=1w$PDfa~ZIO;W84=+}*g0?&({jof11P^TqNCvU`
zj$8kwpeLMc`K49UJLL0Us{}P3n@w{nFeRL5q|X%H1H4fkvx#^HfpAJw`4C}3T9N5}
zWI~VxW5{-Z9-<u7X0^3UB@Orm*H7POmM5|{lg!-dGn?M5{`k@H5y-PF22(HtArp^q
zeJ7?euY^fWEq<IX*IOD*yMf?#Zlcyvz0LM(X#zXRI8(jTc}0-9`O)#qpph<J&a!B^
zV}G{V8TKLb<R)2i1s&3da1}vqx(RUmd$6i5g6m692G@;0I!|QKedgE(sp9VeooX>J
zv&p3AdC}ldU7kA2Qf?-)-|kjt;TKj+)e@|iqQ}6p_R6wt2Z|CPnc*@a+B7&r%}<j-
zckQhnLt~Ou>)Mg!OPJpyEpI9&_^I*x!TrHg?;fr)1ikCps2*&Z1*jw+?w!f`yRoKc
z7_ap%j`)LjEps!6mXd%nrF}MSSJ)^$()UJ?HlC_ev+A|qYL)s1DW$XNhsLJU-uT?&
zAw=>G`pSW1$0>a9=@{L9pd(ZIIjW3lVABs?+~=u$VBNb>59(1^IMUvNk=r-|$_&}{
zf=Qa4CL~!4DOa>+@Z3#QbDOnEJAlau9_*wn|A=Zb!Kxu#Q1C*yu1q!lIMVjPYi3Ou
zD(P|(Y!w2{Bx{v|GlUbo9ogH`I1l;4csii05h75n2TcSigjW?^IhB~@q;Y;~jbjx%
zA7s8tMR56$>cs8fRUNMV@`7J1qB|@AvU(W8V%jxZpJP?4kkqB9>oO40{3ue6cTps`
z;T7sB4N^<{@D*J}FP>%kI=`9|2|0vVeANIy?lNz4R36z@M(X74=+BF1;?;&{!n*HA
z`}h7#OJ$OF)*ssmQ%UiO_sgqx&!N~;N$231ha5y+fGInj{#ilXoAdI!qriHxp-`Rk
z%C}T%oHcq!%>sTE0^znW&AdZ*qibw&pFE>Fk(Eui4SNlcp9d~-Xu&=a!-bhZru`@~
z2Y0!|=$0<d#qEApt%~5vddkW7U9m61Gd(VJcRt)d?pFQMbBoH}`?o1neH(?drnI~Q
zmOyoABP>K-k^L{hmjm^`qpQ%Ia6-`T?b)75C>P@gE{WijQAGHzGmuEGwqsl#cX4E)
z-U2J~@f}kp)+8yz&m{Yu03z4LAb^O=((W-NSl-AUd<Kpf#z&8?(J3S=(DuSGADU6?
za@&de0_A`C79y&icK}J!v4|H^>FC_s9UuIHTSsX9;YLgTyw>VCG)<GNTq5t0nV`-E
zCNoK8_M%z@zP`pa0B*}LD*AA3x@ldhpD8`)%!_4X*_ZoDws&cDk1+zN@=j=!s?BOy
zCTBg~ZKU7bkwNC6T3{!w%dEa1&1_mB@v}Xm(=cSz^g#}xtM5^r&G89#)mHkHw?>~E
zg@_j$#mwJ1%)e}AL;%%@yYiLNb(H*#$`e5{N+R~rd)9$o>sZD&)gP=pwyrl>e}ZO8
zB+}{i3H7=BF+#(#4~)&HkzoT9UGSOJQ_@!B{iFVrR!m3O0=rfrbiuQgwWUA8eKy%%
z{7_PPRC{0>03_70(3VC$<d_SKui~4${h13D3d1tT?viE=W4STEXF8=?ys&dlr(hl=
zo)+AlEN^gkqTKU<MT$Au^mznQ&uUT9^riwKbm>wbzGC$hHLdDiN5ogCh7Q~cP8;CA
zkA&EAKO_t!9w-WSKdK=kSS34}F|Rm2>xy4*A_daoy3uzsWrb)L1a#avKYMTgICy)C
zKB+rquu#JT@K_0=TKTul`Pn~c3zuruSWH{|7abmk5ht@#3*k4xbg0bHjNUL0pZq*9
zj2T}2VyIKE?$8+eb<SvPgcUge#Sny7mbUu?tNX~)x8vZwEO~{nmQKYp-<ed-+uNe*
zz~OEKTB6v<>&RQiHZW`IjsoI1X7y%lsBgz6e)(BViGE@8>Rd+YxS`dk$3){Hx%IJm
zH500B_{`OIFYX$0?odEQ`^J(N^Wz7~epc5jSUN5DkDKRX&q1g%zNJ(HV%QZ)VAXQ6
zMOl&(p-_ORQc#2n*eja`V}tiy_m^?lBdq{>4ajI*><pBL@t!QG-q^{cIr*Tpi5z~9
zbdn7a*!w#i7K#tbJY^3j$?Kt4ib#E>cpvR+p&^2%D~W)0tK-<s0N8Ew#_;UmJ@NJ8
zs*sVgzGcS|$C{1j%$6CZvkW=;W1YT@)I?!aJg*YGxCcG&J=I$WTwQ5GPget1ieAjT
zp^zO!4~$to;iU=kX&NKr_0~Voq0o46A9%>1v1_&c73utK7?+n_z`B(tLt%bRrz*1T
zLpynWlR`cc!t&5#PWiXLrYyqJgI+?uL}2e(8=n&i(MN+a@tgdH@=pE^`iy+7aEOgl
zReSq#P@$5m5v8T3BmqV%z?`~TUzVWYB=mtLCts~~_&QbQay~iMEscg;^>*wsE255C
zWQ+0YJ3m;@suk)bEqTqNuNpLOyHcI%7<mPpp6LDvK6zN*Ml^~$rMnT5$A}CEm5=?Y
zCD$1M^Sh6W6uKGgy1+3S7&0vN6`MI;Pe>14Nz3OW6YF>2hk@<wb$xU_v~FYl7J}&s
z&3-;=`PMDFE-!UJP94TYii5*~-whT^Rz##A9*Yb~MRB{oxY|TiksbDrKeB7xdPyVb
z$@1@G!~VOkXmf2N>F|uxw?|?*si#?GeUy%FIrn;Y7?0_J5kwNysv94Bp>W6)|Ard#
zo%WZOuFcFB(Ry;P_x#Xj3Vzu_9U5t>Qy(<{+wH*kk4Oy{$7*U%X%KxYG28A(_q6t<
zFCO-4e3(0^Vnhz~K)?^T_jyx!h)E5lEXM^C=rA+|7Xb1Mp38ynw|pL9$Jt45Hsc(h
z$5Q+R^|oy0$PvF<Myr~}I;n7VsAK&e_WBt1`XVo)OGQGKwo}QW2THm)XTXQ7z}%)s
z0$N1Yl*pPqj4lKIXUm19UFw;6YJKb2bizDkb;&ZMW1*$u-o(q(H(o9wL_zR=aHUgj
zhCfCB67(85%=Y#2^oY_ztht3Ig&$UZIQ(fZ%{3I$h+}XqQ3zxBl3UoR$v_-)d^<$Q
zcu}r^tsPrV77bcjZkqq`zEW%JTW9SOX_%)78A1c$l`M%h@$UzqhRP=E8cj~@pys(#
z+~L`1H1>oM$U84ldcxTUgdN}S(tfk@kW4mkS%wg<EnXPt`$gI{r2cYj1Lh|-{^!|9
z0$(3zpCB>>L+g>154x5M{EC6+suH4CK8x70rW8(lcU)sY-OzGYaQwSa+b#3Ux5r{H
zE+*i576$BrGFfuqp7_@HnqLDXJ_6@EFM50l@Dom20OFpliMX5kx)z#;(<_pA1h?el
ztt4Kd!pwCoc9ysNAe1{0o_C57M{xlMfv@IiVq4&{yd@noug8%H>{Y(nei$0!A)qF{
zDpKkV?5>yHfW)Pm)^dECj*Ogy5_v^<(}R7i@3&WL>?!(y!|vRJS8hw(oL>5#+sL{$
z_Ab*`?o%c>%kDJz2y|l?n6f8^7QMXh)?Ulho)+<`vCB=EJ4V|vc9RIP7?LDra9kUO
zN#W=OwDxhPJaF1Iqi%^zK*M42*Y$=%i)n_?IlXCs!hzrKS&2p>Z{3BOr)*qt^iPzg
zf3?Pxs|4ke4@5m`fz>`w6H}A*&|ryZECRXy$p`nd2CzG21lN1q1|CV5ouhlq*ZNyQ
zTJxRKMnIo1GK9uO)$dq!j%fJ>9eC69Y9acL5$o83u9#Q1$Xh=e!rqRsN9Dp0-y9z{
z=%2`o^8$fd?%L${=USKb1u@MPGM#K$db}ej^1NzrLhv!l75eQCAV~gcTPgSjr)kkA
zj0U@Mg)rtnsHWF!b9<z*F^diMm5);M+5G4s1THU*ms2E*3Qb1i1qfQTnKYefFAkju
zPQUNc3$Zrrg{Awv#iEEhN&w1VC;^l`gcRS_*sfh1vT+~S6%<nPnw!OP<`VM+n>sj-
z_9D!s!G$pV87=GYd=^P-(>8wN=C|UH-%YBVJVy9GT0nW28l)RQT~6PQ(k>Z`H{YBP
zyRbp{5Zbu=R>;ek+6&>H)z1X+5wsA%BUahtswwZ=pfSa!?m8zCNycMNrIT3gYbQR`
z_dmI`71XcEL}i8IH|*uLc{cOlF5-lr@Hu|#3!RIitlfW42?*Q4vAwfzx7klupzb*F
zwu|5xlf4+&*>_tsr|K+njc$z2X}_GI!zY4%Kyvw1l}Hdo^{YGWb$k}VwAX&0KR5(P
zsF;J$1J%pu3UuLo_)q?N*~Zok_Gjtl9q%Qw4;8;&eIp8KZOIPQkxv0U?4YM93vS2M
z+WZHILPqwFbGk#z7J^qBPAfPAr5~ixD>|ARlD1*6leQXKey1U6iC5+?TNXc9d_=6o
z(L<eWv*mOEsK%HnB}mA93N==i&)823OPjzho-b2sT=?x{+Kqgl{YH8ufNPx;VpP<B
zZftn}bg84LQsH~Iz#Q5`40S&mk_loR0#~W*&016r>ApTh+X3_?a=odPPeMVPc%pqU
zy_r4uPTnD5t9{t;YBz(55pJgY)w3gpyxGR9t(`j%C9vPf6@`fI3$+;UetBx5Cig2W
zi4#s$J?cWAZJL1kOMMY7zNai0v#7<s*L@BwrJ7V5QD5(UviY9W(~{(ZGfk_@?;Ptb
zrzKjTwwkyXz+z|~l)E29tTX|{6-~9y?<{@<P&R~;QcP?18hsrC*?j*+wGD3pn$PG9
z1k?%i^QAvMnS&xXcRQ{R{M>xI__6ggjlF>?+@J}hy&=mlr;3c$OkHneP;!E!+Ka#O
zRZ2n=FXgIH)kpAD<p(ojp5DVUw>OQ*U3e+hR)3>5z`@a(kIloflzX;eQoq9kIi(M2
z1A8`b#EGA}eyE4eT+c8A6NDVft&>+cjILEwIV!y0Zk+#M1sCl#=&(1)aEKrkZ(i;B
zROi#_Rob{z?``6bQRzccQbC5w4>v<^i$X+)i8fs5h&u>33V}Snqh68<$qJr(HD5yJ
z%%b9hisu<>&mq}V&UvNGns0j-R62TBBhP(Po7sijL$NkE5!KoKC(`POk*xcV-vj*c
z%`fTz#6>=wZl*Dm42`8mx-weI`<{=`oJ(xR7?z|Z5CdGaI`3BAFqYi4<%;dsdp&)7
zv?^f`a*})rvQUCB2F;r@9n0L6Y%<(oGS&(tLnLFQdL(UuS8-P$z<cc<gMHh4j^(Nm
z3;^yl!DFphUb@UW(ayha`aSY9_y>=M(L#-YBN!MUS{9>fF3o4>l&Y2?G|V!8TRLVf
z!xBb=d@cb&v9uP$C6%|Gom*w}+>mWlpRYU~L%u-&JahWFK<UOQu~M>NlV2>A4EoBY
zEb&a35l?Bg!a*$~f$20B^?p7VRzGoi<;a2HYWc@s#)l#$Z=qX-B<a7k)@`Uh@FJku
z%0MZjE*}%xbb-%yzGkT@?cOheMP%CrK+JiYr{)~mg8dJ(6P@dMfN7D`Uo*U!w@~M&
z+<)#>WHBfHJp_U?^E?!Fk%+GIN#nJ%BaZQT=j)>y^S6Qqqy6ii*{wcudFs>VHO|X~
ztAmIWF;|=I;NE~6p5LoQVpBxByRvk@C}`~ZD(ndZG{n3@V$2z&cT-p2Z<cedgv`(#
zE5}buD&8G)E38$+bN!tQtEHPZ>mD6O>uOsVq#8ziAYBp5GJ&s8D1XvUL=Nn=Tez!L
zg<4f4BNrSXd=J5~tVP4=8?tf+y%S&j(IH;`CxYt_%S3<0Ig07N7V8*(|KL{*CNv|c
z?Oclr^Y)B1<f95OkB(sT6!kqsiCq;GH?DX7FCjzldJ+3Ra+sgB3i0>%VI%KHR-Cx*
zN2T9C_&*kKz9!!rFAoz<YU#A9DkR(Vv?w@ik<F@v+ItSI6pELF49P5cdYJvV!tLPc
zqDqd?OxcCsYN;t#F0Q+z(aZ}c<j1T($>M$_fF!U2v7Yw6nR<}&>qx|XL@5T<MK}o&
zo3AI1F(-ps5^LMo>@Tf6h~$=QEDljk@IQTKCoT7VG|G-U;2~yYl#9irzauK><Dn3)
zk{h8k_CD}_ZTm&Ym_Fd38uszwHlcn*_`#w}0z%=BykWP$L5o;z$bMe$EfcV$QXjn|
zV)cFf(CG)+Jx{IU4?TG(AJTA4fkuO*s^gJKsUDzmxo(2nx`w5(-;J(oT01>eGChUw
z^XU$VxHpfk*;)NI;T)$S=%=y%sm<28s^p#a*Nv*V+zLmIP?-e`kvA4<N98`e9_%^x
zoAfh(y<Vkh^x9m|k^3-p0e2UD6Sq<AsS|S-7Y?NrQ(HW)^4PVm1nYw1kY;HgLI(>M
zFIcVD^_NthLhkffg}?dLxXV{i?zx?Z-1i&6d^HoBYbCJiliR;*=2vne#HNtglMso!
z4v6Tn^mK>qAnTHr(*Wv~tRG9$|C?g^hMj_cIiZjc%%gmzIfQ3r%XhqzdZdlA@nsTe
zUFI#l<+$NClTlt56CLVUTR3I3tUUK^2*|&78T>ZPU}g@JX~-3?Ar-w_Lf>;!S<`7G
zKaN#n>4IU?+fPb&14pZL#-3cIpDQwz1W!kPn8rbYA!T>$ro>KWM$-$p&)ylB&CkK&
zzM-$V|NFsj2r_ja=#UMJY0I5cY^^c2#WCYe=^@P+Qn2fjP(us<C~J00)YK4@4AQ3!
z=ljp&%X2`dN!dpuc?Co3Q76<^k1>NU_Ml&!?b?xZh+QL$KAmaSB-Q=G#Z2mQceus>
zJOynj!fX5nX*7`jPle&Oh<$a)@q;JnB4xgTxBj2U{(6~L#;L~FWEDetc#8-Q{4}Cw
zxDQdHfjAU`>czjGEGf6No3m9`9ljZ5!;U)vRR{-*J^2^&V5Np`&y9vh-v8Y&a=PhL
z@QX9ci<wcgvefuX@aAsBWdF&iMfS9RXaQGlFt}bYJ<!EYnkE4z4PN(-r!5U%IMng8
z$a4?ZanFBOE8pw(nJxEG5HtRJh&5>E)rD~j(@$z`>5gVa{ziKA9aKDo>`=%3|LoMB
zEj7g*v5vt8*-5G)3NjG8+UtQ+FqM9=KlA7*_NL~0i*_D%+luAy?+l%-O;EhJKy^uE
z|HzF{I>byDsDJwIh)=ts)_rEj>4xN1)oIWh|9S8)@2~LRQIh>W@cBHnQxqKpKcT=%
z!(cR1uL&zy96vqz&nKapSrAdck4r+iK=QTM@=sb_X@dmT!Dq6TgZ*LdcK}gwNMA?O
ztcg$V-|s->7iI7A)0Srf`F=VuzBrt2&I(xIf1gt(=>P9;v(=k&dmV+dtT*fpR5~5)
z$pK#Oe;=H64D{Yxe7|-sP$wul_}28P8FBfl&3~^H(*6eMI<gJU)9*O1weFYy_l)3;
zV&F~?f<qu5TZM)$$8WmV7cY*tUUwM$?+?H~tc1}IDM9Laf6maF+`S$Gd8Q_g_2K{B
ztZO{56z-L~k#T~INZl2s48Bg(c>niN+I$8!>D0G7tZRXZYr^{tlw#7y$a{sQ=y1FO
zrS4z<&sx1G9Z$z?iVdvr=Pd(%5Yk-9yrq7>XPW|^`vUw5J@G$}RV>Q#xE;>pWQI|X
z^`3-Ay_hoXC0T~He?J2!IEbF$e9vowFr+a5eOyO0;M@Q6;C7)O{hvo4==VR*@qgw0
zKY#xVW&i(qwtw+|{_<y;f5`ENHvdrK58wPli9a&+4<(TQx1v~cLo28!E1+?D;$Nfw
z-!J_6#~%v(p}-#s{Gq@f3jCqK9}4`T!2cf<SSdTEj3)RI)~_B7%?RLd|2%&v@P`6_
zDDZ~@e<<*W0)Hs*hXQ{n@P`6_DDZ~@e<<+(L4hDDo5YsUijOHUSOY=yb&YjOwd}+H
EABNN>p8x;=

diff --git a/docs_src/source/api.rst b/docs_src/source/api.rst
deleted file mode 100644
index 4dcac337..00000000
--- a/docs_src/source/api.rst
+++ /dev/null
@@ -1,11 +0,0 @@
--------------------------
-CUDA Python API Reference
--------------------------
-
-.. toctree::
-   :maxdepth: 3
-   :caption: CaptionHolder:
-
-   module/driver
-   module/runtime
-   module/nvrtc
diff --git a/docs_src/source/conduct.md b/docs_src/source/conduct.md
deleted file mode 100644
index ae72cd62..00000000
--- a/docs_src/source/conduct.md
+++ /dev/null
@@ -1,82 +0,0 @@
-# Code of Conduct
-
-## Overview
-
-Define the code of conduct followed and enforced for the CUDA Python project.
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
-
-## Our Standards
-
-Examples of behavior that contributes to creating a positive environment
-include:
-
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-
-Examples of unacceptable behavior by participants include:
-
-* The use of sexualized language or imagery and unwelcome sexual attention or
-  advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
-  address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
-  professional setting
-
-## Our Responsibilities
-
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-
-## Scope
-
-This Code of Conduct applies both within project spaces and in public spaces
-when an individual is representing the project or its community. Examples of
-representing a project or community include using an official project e-mail
-address, posting via an official social media account, or acting as an appointed
-representative at an online or offline event. Representation of a project may be
-further defined and clarified by project maintainers.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at
-[cuda-python-conduct@nvidia.com](mailto:cuda-python-conduct@nvidia.com) All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an
-incident. Further details of specific enforcement policies may be posted
-separately.
-
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
-
-[homepage]: https://www.contributor-covenant.org
-
-For answers to common questions about this code of conduct, see
-https://www.contributor-covenant.org/faq
diff --git a/docs_src/source/conf.py b/docs_src/source/conf.py
deleted file mode 100644
index 62de0de1..00000000
--- a/docs_src/source/conf.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-
-# -- Project information -----------------------------------------------------
-
-project = 'CUDA Python'
-copyright = '2021-2024, NVIDIA'
-author = 'NVIDIA'
-
-# The full version, including alpha/beta/rc tags
-release = '12.6.1'
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-	'sphinx.ext.autodoc',
-	'sphinx.ext.napoleon',
-	'myst_nb',
-	'enum_tools.autoenum'
-]
-
-jupyter_execute_notebooks = "force"
-numfig=True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_baseurl = 'docs'
-html_theme = 'furo'
-# html_theme = 'pydata_sphinx_theme'
-html_theme_options = {
-	"light_logo": "logo-light-mode.png",
-    "dark_logo": "logo-dark-mode.png",
-}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
diff --git a/docs_src/source/contribute.md b/docs_src/source/contribute.md
deleted file mode 100644
index 7a1bfa62..00000000
--- a/docs_src/source/contribute.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Contributing
-
-Thank you for your interest in contributing to CUDA Python! Based on the type of contribution, it will fall into two categories:
-
-1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/NVIDIA/cuda-python/issues/new)
-    describing what you encountered or what you want to see changed.
-    - The NVIDIA team will evaluate the issues and triage them, scheduling
-    them for a release. If you believe the issue needs priority attention
-    comment on the issue to notify the team.
-2. You want to implement a feature or bug-fix
-    - At this time we do not accept code contributions.
diff --git a/docs_src/source/index.rst b/docs_src/source/index.rst
deleted file mode 100644
index 278fa165..00000000
--- a/docs_src/source/index.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-.. CUDA Python documentation master file, created by
-   sphinx-quickstart on Wed Jul  7 12:14:05 2021.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-CUDA Python Manual
-=======================================
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   install.md
-   overview.md
-   motivation.md
-   conduct.md
-   contribute.md
-   release.md
-   api.rst
-
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
diff --git a/docs_src/source/install.md b/docs_src/source/install.md
deleted file mode 100644
index 6bd30719..00000000
--- a/docs_src/source/install.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# Installation
-
-## Runtime Requirements
-
-CUDA Python is supported on all platforms that CUDA is supported. Specific
-dependencies are as follows:
-
-* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
-* CUDA Toolkit 12.0 to 12.6
-
-```{note} Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer.
-```
-
-## Installing from PyPI
-
-```{code-block} shell
-pip install cuda-python
-```
-
-## Installing from Conda
-
-```{code-block} shell
-conda install -c nvidia cuda-python
-```
-
-Conda packages are assigned a dependency to CUDA Toolkit:
-
-* cuda-cudart (Provides CUDA headers to enable writting NVRTC kernels with CUDA types)
-* cuda-nvrtc (Provides NVRTC shared library)
-
-## Installing from Source
-
-### Build Requirements
-
-* CUDA Toolkit headers
-* Cython
-* pyclibrary
-
-Remaining build and test dependencies are outlined in [requirements.txt](https://github.com/NVIDIA/cuda-python/blob/main/requirements.txt)
-
-The version of CUDA Toolkit headers must match the major.minor of CUDA Python. Note that minor version compatibility will still be maintained.
-
-During the build process, environment variable `CUDA_HOME` or `CUDA_PATH` are used to find the location of CUDA headers. In particular, if your headers are located in path `/usr/local/cuda/include`, then you should set `CUDA_HOME` as follows:
-
-```
-export CUDA_HOME=/usr/local/cuda
-```
-
-### In-place
-
-To compile the extension in-place, run:
-
-```{code-block} shell
-python setup.py build_ext --inplace
-```
-
-To compile for debugging the extension modules with gdb, pass the `--debug`
-argument to setup.py.
-
-### Develop
-
-You can use
-
-```{code-block} shell
-pip install -e .
-```
-
-to install the module as editible in your current Python environment (e.g. for
-testing of porting other libraries to use the binding).
-
-## Build the Docs
-
-```{code-block} shell
-conda env create -f docs_src/environment-docs.yml
-conda activate cuda-python-docs
-```
-Then compile and install `cuda-python` following the steps above.
-
-```{code-block} shell
-cd docs_src
-make html
-open build/html/index.html
-```
-
-### Publish the Docs
-
-```{code-block} shell
-git checkout gh-pages
-cd docs_src
-make html
-cp -a build/html/. ../docs/
-```
diff --git a/docs_src/source/module/driver.rst b/docs_src/source/module/driver.rst
deleted file mode 100644
index 694c81c7..00000000
--- a/docs_src/source/module/driver.rst
+++ /dev/null
@@ -1,6792 +0,0 @@
-------
-driver
-------
-
-Data types used by CUDA driver
-------------------------------
-
-
-
-.. autoclass:: cuda.bindings.driver.CUuuid_st
-.. autoclass:: cuda.bindings.driver.CUmemFabricHandle_st
-.. autoclass:: cuda.bindings.driver.CUipcEventHandle_st
-.. autoclass:: cuda.bindings.driver.CUipcMemHandle_st
-.. autoclass:: cuda.bindings.driver.CUstreamBatchMemOpParams_union
-.. autoclass:: cuda.bindings.driver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1_st
-.. autoclass:: cuda.bindings.driver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUasyncNotificationInfo_st
-.. autoclass:: cuda.bindings.driver.CUdevprop_st
-.. autoclass:: cuda.bindings.driver.CUaccessPolicyWindow_st
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_v3_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMSET_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMSET_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUDA_HOST_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_HOST_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUDA_CONDITIONAL_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUgraphEdgeData_st
-.. autoclass:: cuda.bindings.driver.CUDA_GRAPH_INSTANTIATE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUlaunchMemSyncDomainMap_st
-.. autoclass:: cuda.bindings.driver.CUlaunchAttributeValue_union
-.. autoclass:: cuda.bindings.driver.CUlaunchAttribute_st
-.. autoclass:: cuda.bindings.driver.CUlaunchConfig_st
-.. autoclass:: cuda.bindings.driver.CUexecAffinitySmCount_st
-.. autoclass:: cuda.bindings.driver.CUexecAffinityParam_st
-.. autoclass:: cuda.bindings.driver.CUctxCigParam_st
-.. autoclass:: cuda.bindings.driver.CUctxCreateParams_st
-.. autoclass:: cuda.bindings.driver.CUlibraryHostUniversalFunctionAndDataTable_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY2D_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D_PEER_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_DESCRIPTOR_st
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY3D_DESCRIPTOR_st
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_SPARSE_PROPERTIES_st
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_MEMORY_REQUIREMENTS_st
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_TEXTURE_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_VIEW_DESC_st
-.. autoclass:: cuda.bindings.driver.CUtensorMap_st
-.. autoclass:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st
-.. autoclass:: cuda.bindings.driver.CUDA_LAUNCH_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUarrayMapInfo_st
-.. autoclass:: cuda.bindings.driver.CUmemLocation_st
-.. autoclass:: cuda.bindings.driver.CUmemAllocationProp_st
-.. autoclass:: cuda.bindings.driver.CUmulticastObjectProp_st
-.. autoclass:: cuda.bindings.driver.CUmemAccessDesc_st
-.. autoclass:: cuda.bindings.driver.CUgraphExecUpdateResultInfo_st
-.. autoclass:: cuda.bindings.driver.CUmemPoolProps_st
-.. autoclass:: cuda.bindings.driver.CUmemPoolPtrExportData_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_ALLOC_NODE_PARAMS_v1_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_ALLOC_NODE_PARAMS_v2_st
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_FREE_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_CHILD_GRAPH_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EVENT_RECORD_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUDA_EVENT_WAIT_NODE_PARAMS_st
-.. autoclass:: cuda.bindings.driver.CUgraphNodeParams_st
-.. autoclass:: cuda.bindings.driver.CUeglFrame_st
-.. autoclass:: cuda.bindings.driver.CUipcMem_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUipcMem_flags.CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS
-
-
-        Automatically enable peer access between remote devices as needed
-
-.. autoclass:: cuda.bindings.driver.CUmemAttach_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL
-
-
-        Memory can be accessed by any stream on any device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAttach_flags.CU_MEM_ATTACH_HOST
-
-
-        Memory cannot be accessed by any stream on any device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAttach_flags.CU_MEM_ATTACH_SINGLE
-
-
-        Memory can only be accessed by a single stream on the associated device
-
-.. autoclass:: cuda.bindings.driver.CUctx_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SCHED_AUTO
-
-
-        Automatic scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SCHED_SPIN
-
-
-        Set spin as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SCHED_YIELD
-
-
-        Set yield as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC
-
-
-        Set blocking synchronization as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_BLOCKING_SYNC
-
-
-        Set blocking synchronization as default scheduling [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SCHED_MASK
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_MAP_HOST
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_LMEM_RESIZE_TO_MAX
-
-
-        Keep local memory allocation after launch
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_COREDUMP_ENABLE
-
-
-        Trigger coredumps from exceptions in this context
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_USER_COREDUMP_ENABLE
-
-
-        Enable user pipe to trigger coredumps in this context
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_SYNC_MEMOPS
-
-
-        Ensure synchronous memory operations on this context will synchronize
-
-
-    .. autoattribute:: cuda.bindings.driver.CUctx_flags.CU_CTX_FLAGS_MASK
-
-.. autoclass:: cuda.bindings.driver.CUevent_sched_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_sched_flags.CU_EVENT_SCHED_AUTO
-
-
-        Automatic scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_sched_flags.CU_EVENT_SCHED_SPIN
-
-
-        Set spin as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_sched_flags.CU_EVENT_SCHED_YIELD
-
-
-        Set yield as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_sched_flags.CU_EVENT_SCHED_BLOCKING_SYNC
-
-
-        Set blocking synchronization as default scheduling
-
-.. autoclass:: cuda.bindings.driver.cl_event_flags
-
-    .. autoattribute:: cuda.bindings.driver.cl_event_flags.NVCL_EVENT_SCHED_AUTO
-
-
-        Automatic scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_event_flags.NVCL_EVENT_SCHED_SPIN
-
-
-        Set spin as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_event_flags.NVCL_EVENT_SCHED_YIELD
-
-
-        Set yield as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_event_flags.NVCL_EVENT_SCHED_BLOCKING_SYNC
-
-
-        Set blocking synchronization as default scheduling
-
-.. autoclass:: cuda.bindings.driver.cl_context_flags
-
-    .. autoattribute:: cuda.bindings.driver.cl_context_flags.NVCL_CTX_SCHED_AUTO
-
-
-        Automatic scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_context_flags.NVCL_CTX_SCHED_SPIN
-
-
-        Set spin as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_context_flags.NVCL_CTX_SCHED_YIELD
-
-
-        Set yield as default scheduling
-
-
-    .. autoattribute:: cuda.bindings.driver.cl_context_flags.NVCL_CTX_SCHED_BLOCKING_SYNC
-
-
-        Set blocking synchronization as default scheduling
-
-.. autoclass:: cuda.bindings.driver.CUstream_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUstream_flags.CU_STREAM_DEFAULT
-
-
-        Default stream flag
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstream_flags.CU_STREAM_NON_BLOCKING
-
-
-        Stream does not synchronize with stream 0 (the NULL stream)
-
-.. autoclass:: cuda.bindings.driver.CUevent_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_flags.CU_EVENT_DEFAULT
-
-
-        Default event flag
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_flags.CU_EVENT_BLOCKING_SYNC
-
-
-        Event uses blocking synchronization
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_flags.CU_EVENT_DISABLE_TIMING
-
-
-        Event will not record timing data
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_flags.CU_EVENT_INTERPROCESS
-
-
-        Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set
-
-.. autoclass:: cuda.bindings.driver.CUevent_record_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_record_flags.CU_EVENT_RECORD_DEFAULT
-
-
-        Default event record flag
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_record_flags.CU_EVENT_RECORD_EXTERNAL
-
-
-        When using stream capture, create an event record node instead of the default behavior. This flag is invalid when used outside of capture.
-
-.. autoclass:: cuda.bindings.driver.CUevent_wait_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_wait_flags.CU_EVENT_WAIT_DEFAULT
-
-
-        Default event wait flag
-
-
-    .. autoattribute:: cuda.bindings.driver.CUevent_wait_flags.CU_EVENT_WAIT_EXTERNAL
-
-
-        When using stream capture, create an event wait node instead of the default behavior. This flag is invalid when used outside of capture.
-
-.. autoclass:: cuda.bindings.driver.CUstreamWaitValue_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWaitValue_flags.CU_STREAM_WAIT_VALUE_GEQ
-
-
-        Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit values). Note this is a cyclic comparison which ignores wraparound. (Default behavior.)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWaitValue_flags.CU_STREAM_WAIT_VALUE_EQ
-
-
-        Wait until *addr == value.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWaitValue_flags.CU_STREAM_WAIT_VALUE_AND
-
-
-        Wait until (*addr & value) != 0.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWaitValue_flags.CU_STREAM_WAIT_VALUE_NOR
-
-
-        Wait until ~(*addr | value) != 0. Support for this operation can be queried with :py:obj:`~.cuDeviceGetAttribute()` and :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWaitValue_flags.CU_STREAM_WAIT_VALUE_FLUSH
-
-
-        Follow the wait operation with a flush of outstanding remote writes. This means that, if a remote write operation is guaranteed to have reached the device before the wait can be satisfied, that write is guaranteed to be visible to downstream device work. The device is permitted to reorder remote writes internally. For example, this flag would be required if two remote writes arrive in a defined order, the wait is satisfied by the second write, and downstream work needs to observe the first write. Support for this operation is restricted to selected platforms and can be queried with :py:obj:`~.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES`.
-
-.. autoclass:: cuda.bindings.driver.CUstreamWriteValue_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWriteValue_flags.CU_STREAM_WRITE_VALUE_DEFAULT
-
-
-        Default behavior
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamWriteValue_flags.CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER
-
-
-        Permits the write to be reordered with writes which were issued before it, as a performance optimization. Normally, :py:obj:`~.cuStreamWriteValue32` will provide a memory fence before the write, which has similar semantics to __threadfence_system() but is scoped to the stream rather than a CUDA thread. This flag is not supported in the v2 API.
-
-.. autoclass:: cuda.bindings.driver.CUstreamBatchMemOpType
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_32
-
-
-        Represents a :py:obj:`~.cuStreamWaitValue32` operation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WRITE_VALUE_32
-
-
-        Represents a :py:obj:`~.cuStreamWriteValue32` operation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WAIT_VALUE_64
-
-
-        Represents a :py:obj:`~.cuStreamWaitValue64` operation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_WRITE_VALUE_64
-
-
-        Represents a :py:obj:`~.cuStreamWriteValue64` operation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_BARRIER
-
-
-        Insert a memory barrier of the specified type
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamBatchMemOpType.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES
-
-
-        This has the same effect as :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH`, but as a standalone operation.
-
-.. autoclass:: cuda.bindings.driver.CUstreamMemoryBarrier_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamMemoryBarrier_flags.CU_STREAM_MEMORY_BARRIER_TYPE_SYS
-
-
-        System-wide memory barrier.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamMemoryBarrier_flags.CU_STREAM_MEMORY_BARRIER_TYPE_GPU
-
-
-        Limit memory barrier scope to the GPU.
-
-.. autoclass:: cuda.bindings.driver.CUoccupancy_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUoccupancy_flags.CU_OCCUPANCY_DEFAULT
-
-
-        Default behavior
-
-
-    .. autoattribute:: cuda.bindings.driver.CUoccupancy_flags.CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE
-
-
-        Assume global caching is enabled and cannot be automatically turned off
-
-.. autoclass:: cuda.bindings.driver.CUstreamUpdateCaptureDependencies_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamUpdateCaptureDependencies_flags.CU_STREAM_ADD_CAPTURE_DEPENDENCIES
-
-
-        Add new nodes to the dependency set
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamUpdateCaptureDependencies_flags.CU_STREAM_SET_CAPTURE_DEPENDENCIES
-
-
-        Replace the dependency set with the new nodes
-
-.. autoclass:: cuda.bindings.driver.CUasyncNotificationType
-
-    .. autoattribute:: cuda.bindings.driver.CUasyncNotificationType.CU_ASYNC_NOTIFICATION_TYPE_OVER_BUDGET
-
-.. autoclass:: cuda.bindings.driver.CUarray_format
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNSIGNED_INT8
-
-
-        Unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNSIGNED_INT16
-
-
-        Unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNSIGNED_INT32
-
-
-        Unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SIGNED_INT8
-
-
-        Signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SIGNED_INT16
-
-
-        Signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SIGNED_INT32
-
-
-        Signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_HALF
-
-
-        16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_FLOAT
-
-
-        32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_NV12
-
-
-        8-bit YUV planar format, with 4:2:0 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT8X1
-
-
-        1 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT8X2
-
-
-        2 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT8X4
-
-
-        4 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT16X1
-
-
-        1 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT16X2
-
-
-        2 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_UNORM_INT16X4
-
-
-        4 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT8X1
-
-
-        1 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT8X2
-
-
-        2 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT8X4
-
-
-        4 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT16X1
-
-
-        1 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT16X2
-
-
-        2 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_SNORM_INT16X4
-
-
-        4 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC1_UNORM
-
-
-        4 channel unsigned normalized block-compressed (BC1 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC1_UNORM_SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC2_UNORM
-
-
-        4 channel unsigned normalized block-compressed (BC2 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC2_UNORM_SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC3_UNORM
-
-
-        4 channel unsigned normalized block-compressed (BC3 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC3_UNORM_SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC4_UNORM
-
-
-        1 channel unsigned normalized block-compressed (BC4 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC4_SNORM
-
-
-        1 channel signed normalized block-compressed (BC4 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC5_UNORM
-
-
-        2 channel unsigned normalized block-compressed (BC5 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC5_SNORM
-
-
-        2 channel signed normalized block-compressed (BC5 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC6H_UF16
-
-
-        3 channel unsigned half-float block-compressed (BC6H compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC6H_SF16
-
-
-        3 channel signed half-float block-compressed (BC6H compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC7_UNORM
-
-
-        4 channel unsigned normalized block-compressed (BC7 compression) format
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_BC7_UNORM_SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_P010
-
-
-        10-bit YUV planar format, with 4:2:0 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_P016
-
-
-        16-bit YUV planar format, with 4:2:0 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_NV16
-
-
-        8-bit YUV planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_P210
-
-
-        10-bit YUV planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_P216
-
-
-        16-bit YUV planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_YUY2
-
-
-        2 channel, 8-bit YUV packed planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y210
-
-
-        2 channel, 10-bit YUV packed planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y216
-
-
-        2 channel, 16-bit YUV packed planar format, with 4:2:2 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_AYUV
-
-
-        4 channel, 8-bit YUV packed planar format, with 4:4:4 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y410
-
-
-        10-bit YUV packed planar format, with 4:4:4 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y416
-
-
-        4 channel, 12-bit YUV packed planar format, with 4:4:4 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y444_PLANAR8
-
-
-        3 channel 8-bit YUV planar format, with 4:4:4 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_Y444_PLANAR10
-
-
-        3 channel 10-bit YUV planar format, with 4:4:4 sampling
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_format.CU_AD_FORMAT_MAX
-
-.. autoclass:: cuda.bindings.driver.CUaddress_mode
-
-    .. autoattribute:: cuda.bindings.driver.CUaddress_mode.CU_TR_ADDRESS_MODE_WRAP
-
-
-        Wrapping address mode
-
-
-    .. autoattribute:: cuda.bindings.driver.CUaddress_mode.CU_TR_ADDRESS_MODE_CLAMP
-
-
-        Clamp to edge address mode
-
-
-    .. autoattribute:: cuda.bindings.driver.CUaddress_mode.CU_TR_ADDRESS_MODE_MIRROR
-
-
-        Mirror address mode
-
-
-    .. autoattribute:: cuda.bindings.driver.CUaddress_mode.CU_TR_ADDRESS_MODE_BORDER
-
-
-        Border address mode
-
-.. autoclass:: cuda.bindings.driver.CUfilter_mode
-
-    .. autoattribute:: cuda.bindings.driver.CUfilter_mode.CU_TR_FILTER_MODE_POINT
-
-
-        Point filter mode
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfilter_mode.CU_TR_FILTER_MODE_LINEAR
-
-
-        Linear filter mode
-
-.. autoclass:: cuda.bindings.driver.CUdevice_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK
-
-
-        Maximum number of threads per block
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X
-
-
-        Maximum block dimension X
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y
-
-
-        Maximum block dimension Y
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z
-
-
-        Maximum block dimension Z
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X
-
-
-        Maximum grid dimension X
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y
-
-
-        Maximum grid dimension Y
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z
-
-
-        Maximum grid dimension Z
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
-
-
-        Maximum shared memory available per block in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK
-
-
-        Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY
-
-
-        Memory available on device for constant variables in a CUDA C kernel in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE
-
-
-        Warp size in threads
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH
-
-
-        Maximum pitch in bytes allowed by memory copies
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
-
-
-        Maximum number of 32-bit registers available per block
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK
-
-
-        Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE
-
-
-        Typical clock frequency in kilohertz
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT
-
-
-        Alignment requirement for textures
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_OVERLAP
-
-
-        Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
-
-
-        Number of multiprocessors on device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT
-
-
-        Specifies whether there is a run time limit on kernels
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_INTEGRATED
-
-
-        Device is integrated with host memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY
-
-
-        Device can map host memory into CUDA address space
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE
-
-
-        Compute mode (See :py:obj:`~.CUcomputemode` for details)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH
-
-
-        Maximum 1D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH
-
-
-        Maximum 2D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT
-
-
-        Maximum 2D texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH
-
-
-        Maximum 3D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT
-
-
-        Maximum 3D texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH
-
-
-        Maximum 3D texture depth
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
-
-
-        Maximum 2D layered texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
-
-
-        Maximum 2D layered texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
-
-
-        Maximum layers in a 2D layered texture
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH
-
-
-        Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT
-
-
-        Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES
-
-
-        Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT
-
-
-        Alignment requirement for surfaces
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS
-
-
-        Device can possibly execute multiple kernels concurrently
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED
-
-
-        Device has ECC support enabled
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID
-
-
-        PCI bus ID of the device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID
-
-
-        PCI device ID of the device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER
-
-
-        Device is using TCC driver model
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE
-
-
-        Peak memory clock frequency in kilohertz
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH
-
-
-        Global memory bus width in bits
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE
-
-
-        Size of L2 cache in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR
-
-
-        Maximum resident threads per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT
-
-
-        Number of asynchronous engines
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING
-
-
-        Device shares a unified address space with the host
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH
-
-
-        Maximum 1D layered texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS
-
-
-        Maximum layers in a 1D layered texture
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER
-
-
-        Deprecated, do not use.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH
-
-
-        Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT
-
-
-        Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE
-
-
-        Alternate maximum 3D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE
-
-
-        Alternate maximum 3D texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE
-
-
-        Alternate maximum 3D texture depth
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID
-
-
-        PCI domain ID of the device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT
-
-
-        Pitch alignment requirement for textures
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH
-
-
-        Maximum cubemap texture width/height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH
-
-
-        Maximum cubemap layered texture width/height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS
-
-
-        Maximum layers in a cubemap layered texture
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH
-
-
-        Maximum 1D surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH
-
-
-        Maximum 2D surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT
-
-
-        Maximum 2D surface height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH
-
-
-        Maximum 3D surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT
-
-
-        Maximum 3D surface height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH
-
-
-        Maximum 3D surface depth
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH
-
-
-        Maximum 1D layered surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS
-
-
-        Maximum layers in a 1D layered surface
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH
-
-
-        Maximum 2D layered surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT
-
-
-        Maximum 2D layered surface height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS
-
-
-        Maximum layers in a 2D layered surface
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH
-
-
-        Maximum cubemap surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH
-
-
-        Maximum cubemap layered surface width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS
-
-
-        Maximum layers in a cubemap layered surface
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH
-
-
-        Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth()` instead.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH
-
-
-        Maximum 2D linear texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT
-
-
-        Maximum 2D linear texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH
-
-
-        Maximum 2D linear texture pitch in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH
-
-
-        Maximum mipmapped 2D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT
-
-
-        Maximum mipmapped 2D texture height
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR
-
-
-        Major compute capability version number
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR
-
-
-        Minor compute capability version number
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH
-
-
-        Maximum mipmapped 1D texture width
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED
-
-
-        Device supports stream priorities
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED
-
-
-        Device supports caching globals in L1
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED
-
-
-        Device supports caching locals in L1
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR
-
-
-        Maximum shared memory available per multiprocessor in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR
-
-
-        Maximum number of 32-bit registers available per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY
-
-
-        Device can allocate managed memory on this system
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD
-
-
-        Device is on a multi-GPU board
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID
-
-
-        Unique id for a group of devices on the same multi-GPU board
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED
-
-
-        Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
-
-
-        Ratio of single precision performance (in floating-point operations per second) to double precision performance
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS
-
-
-        Device supports coherently accessing pageable memory without calling cudaHostRegister on it
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS
-
-
-        Device can coherently access managed memory concurrently with the CPU
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED
-
-
-        Device supports compute preemption.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM
-
-
-        Device can access host registered memory at the same virtual address as the CPU
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1
-
-
-        Deprecated, along with v1 MemOps API, :py:obj:`~.cuStreamBatchMemOp` and related APIs are supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1
-
-
-        Deprecated, along with v1 MemOps API, 64-bit operations are supported in :py:obj:`~.cuStreamBatchMemOp` and related APIs.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1
-
-
-        Deprecated, along with v1 MemOps API, :py:obj:`~.CU_STREAM_WAIT_VALUE_NOR` is supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH
-
-
-        Device supports launching cooperative kernels via :py:obj:`~.cuLaunchCooperativeKernel`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH
-
-
-        Deprecated, :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` is deprecated.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
-
-
-        Maximum optin shared memory per block
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES
-
-
-        The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported on the device. See :py:obj:`~.Stream Memory Operations` for additional details.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED
-
-
-        Device supports host memory registration via :py:obj:`~.cudaHostRegister`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES
-
-
-        Device accesses pageable memory via the host's page tables.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST
-
-
-        The host can directly access managed memory on the device without migration.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED
-
-
-        Deprecated, Use CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
-
-
-        Device supports virtual memory management APIs like :py:obj:`~.cuMemAddressReserve`, :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemMap` and related APIs
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED
-
-
-        Device supports exporting memory to a posix file descriptor with :py:obj:`~.cuMemExportToShareableHandle`, if requested via :py:obj:`~.cuMemCreate`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED
-
-
-        Device supports exporting memory to a Win32 NT handle with :py:obj:`~.cuMemExportToShareableHandle`, if requested via :py:obj:`~.cuMemCreate`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED
-
-
-        Device supports exporting memory to a Win32 KMT handle with :py:obj:`~.cuMemExportToShareableHandle`, if requested via :py:obj:`~.cuMemCreate`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR
-
-
-        Maximum number of blocks per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED
-
-
-        Device supports compression of memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE
-
-
-        Maximum L2 persisting lines capacity setting in bytes.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE
-
-
-        Maximum value of :py:obj:`~.CUaccessPolicyWindow.num_bytes`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED
-
-
-        Device supports specifying the GPUDirect RDMA flag with :py:obj:`~.cuMemCreate`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK
-
-
-        Shared memory reserved by CUDA driver per block in bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED
-
-
-        Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED
-
-
-        Device supports using the :py:obj:`~.cuMemHostRegister` flag :py:obj:`~.CU_MEMHOSTERGISTER_READ_ONLY` to register memory that must be mapped as read-only to the GPU
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED
-
-
-        External timeline semaphore interop is supported on the device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED
-
-
-        Device supports using the :py:obj:`~.cuMemAllocAsync` and :py:obj:`~.cuMemPool` family of APIs
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED
-
-
-        Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS
-
-
-        The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the :py:obj:`~.CUflushGPUDirectRDMAWritesOptions` enum
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING
-
-
-        GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See :py:obj:`~.CUGPUDirectRDMAWritesOrdering` for the numerical values returned here.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES
-
-
-        Handle types supported with mempool based IPC
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH
-
-
-        Indicates device supports cluster launch
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED
-
-
-        Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS
-
-
-        64-bit operations are supported in :py:obj:`~.cuStreamBatchMemOp` and related MemOp APIs.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR
-
-
-        :py:obj:`~.CU_STREAM_WAIT_VALUE_NOR` is supported by MemOp APIs.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED
-
-
-        Device supports buffer sharing with dma_buf mechanism.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED
-
-
-        Device supports IPC Events.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT
-
-
-        Number of memory domains the device supports.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED
-
-
-        Device supports accessing memory using Tensor Map.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED
-
-
-        Device supports exporting memory to a fabric handle with :py:obj:`~.cuMemExportToShareableHandle()` or requested with :py:obj:`~.cuMemCreate()`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS
-
-
-        Device supports unified function pointers.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_CONFIG
-
-
-        NUMA configuration of a device: value is of type :py:obj:`~.CUdeviceNumaConfig` enum
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_NUMA_ID
-
-
-        NUMA node ID of the GPU memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED
-
-
-        Device supports switch multicast and reduction operations.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MPS_ENABLED
-
-
-        Indicates if contexts created on this device will be shared via MPS
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID
-
-
-        NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED
-
-
-        Device supports CIG with D3D12.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUpointer_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_CONTEXT
-
-
-        The :py:obj:`~.CUcontext` on which a pointer was allocated or registered
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
-
-
-        The :py:obj:`~.CUmemorytype` describing the physical location of a pointer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
-
-
-        The address at which a pointer's memory may be accessed on the device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_HOST_POINTER
-
-
-        The address at which a pointer's memory may be accessed on the host
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_P2P_TOKENS
-
-
-        A pair of tokens for use with the nv-p2p.h Linux kernel interface
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_SYNC_MEMOPS
-
-
-        Synchronize every synchronous memory operation initiated on this region
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_BUFFER_ID
-
-
-        A process-wide unique ID for an allocated memory region
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_MANAGED
-
-
-        Indicates if the pointer points to managed memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL
-
-
-        A device ordinal of a device on which a pointer was allocated or registered
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE
-
-
-        1 if this pointer maps to an allocation that is suitable for :py:obj:`~.cudaIpcGetMemHandle`, 0 otherwise
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_START_ADDR
-
-
-        Starting address for this requested pointer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_RANGE_SIZE
-
-
-        Size of the address range for this requested pointer
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MAPPED
-
-
-        1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES
-
-
-        Bitmask of allowed :py:obj:`~.CUmemAllocationHandleType` for this allocation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE
-
-
-        1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_ACCESS_FLAGS
-
-
-        Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE
-
-
-        Returns the mempool handle for the allocation if it was allocated from a mempool. Otherwise returns NULL.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MAPPING_SIZE
-
-
-        Size of the actual underlying mapping that the pointer belongs to
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR
-
-
-        The start address of the mapping that the pointer belongs to
-
-
-    .. autoattribute:: cuda.bindings.driver.CUpointer_attribute.CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID
-
-
-        A process-wide unique id corresponding to the physical allocation the pointer belongs to
-
-.. autoclass:: cuda.bindings.driver.CUfunction_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK
-
-
-        The maximum number of threads per block, beyond which a launch of the function would fail. This number depends on both the function and the device on which the function is currently loaded.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES
-
-
-        The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory requested by the user at runtime.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES
-
-
-        The size in bytes of user-allocated constant memory required by this function.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES
-
-
-        The size in bytes of local memory used by each thread of this function.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NUM_REGS
-
-
-        The number of registers used by each thread of this function.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PTX_VERSION
-
-
-        The PTX virtual architecture version for which the function was compiled. This value is the major PTX version * 10 + the minor PTX version, so a PTX version 1.3 function would return the value 13. Note that this may return the undefined value of 0 for cubins compiled prior to CUDA 3.0.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_BINARY_VERSION
-
-
-        The binary architecture version for which the function was compiled. This value is the major binary version * 10 + the minor binary version, so a binary version 1.3 function would return the value 13. Note that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary architecture version.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA
-
-
-        The attribute to indicate whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set .
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES
-
-
-        The maximum size in bytes of dynamically-allocated shared memory that can be used by this function. If the user-specified dynamic shared memory size is larger than this value, the launch will fail. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-
-
-        On devices where the L1 cache and shared memory use the same hardware resources, this sets the shared memory carveout preference, in percent of the total shared memory. Refer to :py:obj:`~.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`. This is only a hint, and the driver can choose a different ratio if required to execute the function. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET
-
-
-        If this attribute is set, the kernel must launch with a valid cluster size specified. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH
-
-
-        The required cluster width in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.
-
-
-
-        If the value is set during compile time, it cannot be set at runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT
-
-
-        The required cluster height in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.
-
-
-
-        If the value is set during compile time, it cannot be set at runtime. Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH
-
-
-        The required cluster depth in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.
-
-
-
-        If the value is set during compile time, it cannot be set at runtime. Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED
-
-
-        Whether the function can be launched with non-portable cluster size. 1 is allowed, 0 is disallowed. A non-portable cluster size may only function on the specific SKUs the program is tested on. The launch might fail if the program is run on a different hardware platform.
-
-
-
-        CUDA API provides cudaOccupancyMaxActiveClusters to assist with checking whether the desired size can be launched on the current device.
-
-
-
-        Portable Cluster Size
-
-
-
-        A portable cluster size is guaranteed to be functional on all compute capabilities higher than the target compute capability. The portable cluster size for sm_90 is 8 blocks per cluster. This value may increase for future compute capabilities.
-
-
-
-        The specific hardware unit may support higher cluster sizes that’s not guaranteed to be portable. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
-
-
-        The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy. See :py:obj:`~.cuFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunction_attribute.CU_FUNC_ATTRIBUTE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUfunc_cache
-
-    .. autoattribute:: cuda.bindings.driver.CUfunc_cache.CU_FUNC_CACHE_PREFER_NONE
-
-
-        no preference for shared memory or L1 (default)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunc_cache.CU_FUNC_CACHE_PREFER_SHARED
-
-
-        prefer larger shared memory and smaller L1 cache
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunc_cache.CU_FUNC_CACHE_PREFER_L1
-
-
-        prefer larger L1 cache and smaller shared memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunc_cache.CU_FUNC_CACHE_PREFER_EQUAL
-
-
-        prefer equal sized L1 cache and shared memory
-
-.. autoclass:: cuda.bindings.driver.CUsharedconfig
-
-    .. autoattribute:: cuda.bindings.driver.CUsharedconfig.CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE
-
-
-        set default shared memory bank size
-
-
-    .. autoattribute:: cuda.bindings.driver.CUsharedconfig.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE
-
-
-        set shared memory bank width to four bytes
-
-
-    .. autoattribute:: cuda.bindings.driver.CUsharedconfig.CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE
-
-
-        set shared memory bank width to eight bytes
-
-.. autoclass:: cuda.bindings.driver.CUshared_carveout
-
-    .. autoattribute:: cuda.bindings.driver.CUshared_carveout.CU_SHAREDMEM_CARVEOUT_DEFAULT
-
-
-        No preference for shared memory or L1 (default)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUshared_carveout.CU_SHAREDMEM_CARVEOUT_MAX_SHARED
-
-
-        Prefer maximum available shared memory, minimum L1 cache
-
-
-    .. autoattribute:: cuda.bindings.driver.CUshared_carveout.CU_SHAREDMEM_CARVEOUT_MAX_L1
-
-
-        Prefer maximum available L1 cache, minimum shared memory
-
-.. autoclass:: cuda.bindings.driver.CUmemorytype
-
-    .. autoattribute:: cuda.bindings.driver.CUmemorytype.CU_MEMORYTYPE_HOST
-
-
-        Host memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemorytype.CU_MEMORYTYPE_DEVICE
-
-
-        Device memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemorytype.CU_MEMORYTYPE_ARRAY
-
-
-        Array memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemorytype.CU_MEMORYTYPE_UNIFIED
-
-
-        Unified device or host memory
-
-.. autoclass:: cuda.bindings.driver.CUcomputemode
-
-    .. autoattribute:: cuda.bindings.driver.CUcomputemode.CU_COMPUTEMODE_DEFAULT
-
-
-        Default compute mode (Multiple contexts allowed per device)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcomputemode.CU_COMPUTEMODE_PROHIBITED
-
-
-        Compute-prohibited mode (No contexts can be created on this device at this time)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcomputemode.CU_COMPUTEMODE_EXCLUSIVE_PROCESS
-
-
-        Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time)
-
-.. autoclass:: cuda.bindings.driver.CUmem_advise
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_SET_READ_MOSTLY
-
-
-        Data will mostly be read and only occasionally be written to
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_UNSET_READ_MOSTLY
-
-
-        Undo the effect of :py:obj:`~.CU_MEM_ADVISE_SET_READ_MOSTLY`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_SET_PREFERRED_LOCATION
-
-
-        Set the preferred location for the data as the specified device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION
-
-
-        Clear the preferred location for the data
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_SET_ACCESSED_BY
-
-
-        Data will be accessed by the specified device, so prevent page faults as much as possible
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_advise.CU_MEM_ADVISE_UNSET_ACCESSED_BY
-
-
-        Let the Unified Memory subsystem decide on the page faulting policy for the specified device
-
-.. autoclass:: cuda.bindings.driver.CUmem_range_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY
-
-
-        Whether the range will mostly be read and only occasionally be written to
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION
-
-
-        The preferred location of the range
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY
-
-
-        Memory range has :py:obj:`~.CU_MEM_ADVISE_SET_ACCESSED_BY` set for specified device
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION
-
-
-        The last location to which the range was prefetched
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_TYPE
-
-
-        The preferred location type of the range
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION_ID
-
-
-        The preferred location id of the range
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_TYPE
-
-
-        The last location type to which the range was prefetched
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmem_range_attribute.CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION_ID
-
-
-        The last location id to which the range was prefetched
-
-.. autoclass:: cuda.bindings.driver.CUjit_option
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_MAX_REGISTERS
-
-
-        Max number of registers that a thread may use.
-
-        Option type: unsigned int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_THREADS_PER_BLOCK
-
-
-        IN: Specifies minimum number of threads per block to target compilation for
-
-        OUT: Returns the number of threads the compiler actually targeted. This restricts the resource utilization of the compiler (e.g. max registers) such that a block with the given number of threads should be able to launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory utilization.
-
-        Cannot be combined with :py:obj:`~.CU_JIT_TARGET`.
-
-        Option type: unsigned int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_WALL_TIME
-
-
-        Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker
-
-        Option type: float
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER
-
-
-        Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option :py:obj:`~.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`)
-
-        Option type: char *
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
-
-
-        IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator)
-
-        OUT: Amount of log buffer filled with messages
-
-        Option type: unsigned int
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_ERROR_LOG_BUFFER
-
-
-        Pointer to a buffer in which to print any log messages that reflect errors (the buffer size is specified via option :py:obj:`~.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES`)
-
-        Option type: char *
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
-
-
-        IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator)
-
-        OUT: Amount of log buffer filled with messages
-
-        Option type: unsigned int
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL
-
-
-        Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations.
-
-        Option type: unsigned int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_TARGET_FROM_CUCONTEXT
-
-
-        No option value required. Determines the target based on the current attached context (default)
-
-        Option type: No option value needed
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_TARGET
-
-
-        Target is chosen based on supplied :py:obj:`~.CUjit_target`. Cannot be combined with :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`.
-
-        Option type: unsigned int for enumerated type :py:obj:`~.CUjit_target`
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_FALLBACK_STRATEGY
-
-
-        Specifies choice of fallback strategy if matching cubin is not found. Choice is based on supplied :py:obj:`~.CUjit_fallback`. This option cannot be used with cuLink* APIs as the linker requires exact matches.
-
-        Option type: unsigned int for enumerated type :py:obj:`~.CUjit_fallback`
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO
-
-
-        Specifies whether to create debug information in output (-g) (0: false, default)
-
-        Option type: int
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_LOG_VERBOSE
-
-
-        Generate verbose log messages (0: false, default)
-
-        Option type: int
-
-        Applies to: compiler and linker
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO
-
-
-        Generate line number information (-lineinfo) (0: false, default)
-
-        Option type: int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_CACHE_MODE
-
-
-        Specifies whether to enable caching explicitly (-dlcm) 
-
-        Choice is based on supplied :py:obj:`~.CUjit_cacheMode_enum`.
-
-        Option type: unsigned int for enumerated type :py:obj:`~.CUjit_cacheMode_enum`
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_NEW_SM3X_OPT
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_FAST_COMPILE
-
-
-        This jit option is used for internal purpose only.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_GLOBAL_SYMBOL_NAMES
-
-
-        Array of device symbol names that will be relocated to the corresponding host addresses stored in :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_ADDRESSES`.
-
-        Must contain :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_COUNT` entries.
-
-        When loading a device module, driver will relocate all encountered unresolved symbols to the host addresses.
-
-        It is only allowed to register symbols that correspond to unresolved global variables.
-
-        It is illegal to register the same device symbol at multiple addresses.
-
-        Option type: const char **
-
-        Applies to: dynamic linker only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_GLOBAL_SYMBOL_ADDRESSES
-
-
-        Array of host addresses that will be used to relocate corresponding device symbols stored in :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_NAMES`.
-
-        Must contain :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_COUNT` entries.
-
-        Option type: void **
-
-        Applies to: dynamic linker only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_GLOBAL_SYMBOL_COUNT
-
-
-        Number of entries in :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_NAMES` and :py:obj:`~.CU_JIT_GLOBAL_SYMBOL_ADDRESSES` arrays.
-
-        Option type: unsigned int
-
-        Applies to: dynamic linker only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_LTO
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_FTZ
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_PREC_DIV
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_PREC_SQRT
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_FMA
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_REFERENCED_KERNEL_NAMES
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_REFERENCED_KERNEL_COUNT
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_REFERENCED_VARIABLE_NAMES
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_REFERENCED_VARIABLE_COUNT
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_POSITION_INDEPENDENT_CODE
-
-
-        Generate position independent code (0: false)
-
-        Option type: int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_MIN_CTA_PER_SM
-
-
-        This option hints to the JIT compiler the minimum number of CTAs from the kernel’s grid to be mapped to a SM. This option is ignored when used together with :py:obj:`~.CU_JIT_MAX_REGISTERS` or :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`. Optimizations based on this option need :py:obj:`~.CU_JIT_MAX_THREADS_PER_BLOCK` to be specified as well. For kernels already using PTX directive .minnctapersm, this option will be ignored by default. Use :py:obj:`~.CU_JIT_OVERRIDE_DIRECTIVE_VALUES` to let this option take precedence over the PTX directive. Option type: unsigned int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_MAX_THREADS_PER_BLOCK
-
-
-        Maximum number threads in a thread block, computed as the product of the maximum extent specifed for each dimension of the block. This limit is guaranteed not to be exeeded in any invocation of the kernel. Exceeding the the maximum number of threads results in runtime error or kernel launch failure. For kernels already using PTX directive .maxntid, this option will be ignored by default. Use :py:obj:`~.CU_JIT_OVERRIDE_DIRECTIVE_VALUES` to let this option take precedence over the PTX directive. Option type: int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_OVERRIDE_DIRECTIVE_VALUES
-
-
-        This option lets the values specified using :py:obj:`~.CU_JIT_MAX_REGISTERS`, :py:obj:`~.CU_JIT_THREADS_PER_BLOCK`, :py:obj:`~.CU_JIT_MAX_THREADS_PER_BLOCK` and :py:obj:`~.CU_JIT_MIN_CTA_PER_SM` take precedence over any PTX directives. (0: Disable, default; 1: Enable) Option type: int
-
-        Applies to: compiler only
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_option.CU_JIT_NUM_OPTIONS
-
-.. autoclass:: cuda.bindings.driver.CUjit_target
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_30
-
-
-        Compute device class 3.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_32
-
-
-        Compute device class 3.2
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_35
-
-
-        Compute device class 3.5
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_37
-
-
-        Compute device class 3.7
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_50
-
-
-        Compute device class 5.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_52
-
-
-        Compute device class 5.2
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_53
-
-
-        Compute device class 5.3
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_60
-
-
-        Compute device class 6.0.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_61
-
-
-        Compute device class 6.1.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_62
-
-
-        Compute device class 6.2.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_70
-
-
-        Compute device class 7.0.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_72
-
-
-        Compute device class 7.2.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_75
-
-
-        Compute device class 7.5.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_80
-
-
-        Compute device class 8.0.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_86
-
-
-        Compute device class 8.6.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_87
-
-
-        Compute device class 8.7.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_89
-
-
-        Compute device class 8.9.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_90
-
-
-        Compute device class 9.0. Compute device class 9.0. with accelerated features.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_target.CU_TARGET_COMPUTE_90A
-
-.. autoclass:: cuda.bindings.driver.CUjit_fallback
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_fallback.CU_PREFER_PTX
-
-
-        Prefer to compile ptx if exact binary match not found
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_fallback.CU_PREFER_BINARY
-
-
-        Prefer to fall back to compatible binary code if exact match not found
-
-.. autoclass:: cuda.bindings.driver.CUjit_cacheMode
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE
-
-
-        Compile with no -dlcm flag specified
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_CG
-
-
-        Compile with L1 cache disabled
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_CA
-
-
-        Compile with L1 cache enabled
-
-.. autoclass:: cuda.bindings.driver.CUjitInputType
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_CUBIN
-
-
-        Compiled device-class-specific device code
-
-        Applicable options: none
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_PTX
-
-
-        PTX source code
-
-        Applicable options: PTX compiler options
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_FATBINARY
-
-
-        Bundle of multiple cubins and/or PTX of some device code
-
-        Applicable options: PTX compiler options, :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_OBJECT
-
-
-        Host object with embedded device code
-
-        Applicable options: PTX compiler options, :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_LIBRARY
-
-
-        Archive of host objects with embedded device code
-
-        Applicable options: PTX compiler options, :py:obj:`~.CU_JIT_FALLBACK_STRATEGY`
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_INPUT_NVVM
-
-
-        [Deprecated]
-
-
-
-        Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
-
-
-    .. autoattribute:: cuda.bindings.driver.CUjitInputType.CU_JIT_NUM_INPUT_TYPES
-
-.. autoclass:: cuda.bindings.driver.CUgraphicsRegisterFlags
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsRegisterFlags.CU_GRAPHICS_REGISTER_FLAGS_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsRegisterFlags.CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsRegisterFlags.CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsRegisterFlags.CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsRegisterFlags.CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER
-
-.. autoclass:: cuda.bindings.driver.CUgraphicsMapResourceFlags
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsMapResourceFlags.CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsMapResourceFlags.CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphicsMapResourceFlags.CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD
-
-.. autoclass:: cuda.bindings.driver.CUarray_cubemap_face
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_POSITIVE_X
-
-
-        Positive X face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_NEGATIVE_X
-
-
-        Negative X face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_POSITIVE_Y
-
-
-        Positive Y face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_NEGATIVE_Y
-
-
-        Negative Y face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_POSITIVE_Z
-
-
-        Positive Z face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarray_cubemap_face.CU_CUBEMAP_FACE_NEGATIVE_Z
-
-
-        Negative Z face of cubemap
-
-.. autoclass:: cuda.bindings.driver.CUlimit
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_STACK_SIZE
-
-
-        GPU thread stack size
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_PRINTF_FIFO_SIZE
-
-
-        GPU printf FIFO size
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_MALLOC_HEAP_SIZE
-
-
-        GPU malloc heap size
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH
-
-
-        GPU device runtime launch synchronize depth
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT
-
-
-        GPU device runtime pending launch count
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_MAX_L2_FETCH_GRANULARITY
-
-
-        A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_PERSISTING_L2_CACHE_SIZE
-
-
-        A size in bytes for L2 persisting lines cache size
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_SHMEM_SIZE
-
-
-        A maximum size in bytes of shared memory available to CUDA kernels on a CIG context. Can only be queried, cannot be set
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_CIG_ENABLED
-
-
-        A non-zero value indicates this CUDA context is a CIG-enabled context. Can only be queried, cannot be set
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED
-
-
-        When set to a non-zero value, CUDA will fail to launch a kernel on a CIG context, instead of using the fallback path, if the kernel uses more shared memory than available
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlimit.CU_LIMIT_MAX
-
-.. autoclass:: cuda.bindings.driver.CUresourcetype
-
-    .. autoattribute:: cuda.bindings.driver.CUresourcetype.CU_RESOURCE_TYPE_ARRAY
-
-
-        Array resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourcetype.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY
-
-
-        Mipmapped array resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourcetype.CU_RESOURCE_TYPE_LINEAR
-
-
-        Linear resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourcetype.CU_RESOURCE_TYPE_PITCH2D
-
-
-        Pitch 2D resource
-
-.. autoclass:: cuda.bindings.driver.CUaccessProperty
-
-    .. autoattribute:: cuda.bindings.driver.CUaccessProperty.CU_ACCESS_PROPERTY_NORMAL
-
-
-        Normal cache persistence.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUaccessProperty.CU_ACCESS_PROPERTY_STREAMING
-
-
-        Streaming access is less likely to persit from cache.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUaccessProperty.CU_ACCESS_PROPERTY_PERSISTING
-
-
-        Persisting access is more likely to persist in cache.
-
-.. autoclass:: cuda.bindings.driver.CUgraphConditionalNodeType
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphConditionalNodeType.CU_GRAPH_COND_TYPE_IF
-
-
-        Conditional 'if' Node. Body executed once if condition value is non-zero.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphConditionalNodeType.CU_GRAPH_COND_TYPE_WHILE
-
-
-        Conditional 'while' Node. Body executed repeatedly while condition value is non-zero.
-
-.. autoclass:: cuda.bindings.driver.CUgraphNodeType
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_KERNEL
-
-
-        GPU kernel node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEMCPY
-
-
-        Memcpy node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEMSET
-
-
-        Memset node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_HOST
-
-
-        Host (executable) node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_GRAPH
-
-
-        Node which executes an embedded graph
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_EMPTY
-
-
-        Empty (no-op) node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_WAIT_EVENT
-
-
-        External event wait node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_EVENT_RECORD
-
-
-        External event record node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL
-
-
-        External semaphore signal node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT
-
-
-        External semaphore wait node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEM_ALLOC
-
-
-        Memory Allocation Node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_MEM_FREE
-
-
-        Memory Free Node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_BATCH_MEM_OP
-
-
-        Batch MemOp Node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
-
-
-        Conditional Node                                         May be used to implement a conditional execution path or loop
-
-                                                inside of a graph. The graph(s) contained within the body of the conditional node
-
-                                                can be selectively executed or iterated upon based on the value of a conditional
-
-                                                variable.
-
-
-
-                                                Handles must be created in advance of creating the node
-
-                                                using :py:obj:`~.cuGraphConditionalHandleCreate`.
-
-
-
-                                                The following restrictions apply to graphs which contain conditional nodes:
-
-                                                 The graph cannot be used in a child node.
-
-                                                 Only one instantiation of the graph may exist at any point in time.
-
-                                                 The graph cannot be cloned.
-
-
-
-                                                To set the control value, supply a default value when creating the handle and/or
-
-                                                call :py:obj:`~.cudaGraphSetConditional` from device code.
-
-.. autoclass:: cuda.bindings.driver.CUgraphDependencyType
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDependencyType.CU_GRAPH_DEPENDENCY_TYPE_DEFAULT
-
-
-        This is an ordinary dependency.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDependencyType.CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC
-
-
-        This dependency type allows the downstream node to use `cudaGridDependencySynchronize()`. It may only be used between kernel nodes, and must be used with either the :py:obj:`~.CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC` or :py:obj:`~.CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER` outgoing port.
-
-.. autoclass:: cuda.bindings.driver.CUgraphInstantiateResult
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_SUCCESS
-
-
-        Instantiation succeeded
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_ERROR
-
-
-        Instantiation failed for an unexpected reason which is described in the return value of the function
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE
-
-
-        Instantiation failed due to invalid structure, such as cycles
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED
-
-
-        Instantiation for device launch failed because the graph contained an unsupported operation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED
-
-
-        Instantiation for device launch failed due to the nodes belonging to different contexts
-
-.. autoclass:: cuda.bindings.driver.CUsynchronizationPolicy
-
-    .. autoattribute:: cuda.bindings.driver.CUsynchronizationPolicy.CU_SYNC_POLICY_AUTO
-
-
-    .. autoattribute:: cuda.bindings.driver.CUsynchronizationPolicy.CU_SYNC_POLICY_SPIN
-
-
-    .. autoattribute:: cuda.bindings.driver.CUsynchronizationPolicy.CU_SYNC_POLICY_YIELD
-
-
-    .. autoattribute:: cuda.bindings.driver.CUsynchronizationPolicy.CU_SYNC_POLICY_BLOCKING_SYNC
-
-.. autoclass:: cuda.bindings.driver.CUclusterSchedulingPolicy
-
-    .. autoattribute:: cuda.bindings.driver.CUclusterSchedulingPolicy.CU_CLUSTER_SCHEDULING_POLICY_DEFAULT
-
-
-        the default policy
-
-
-    .. autoattribute:: cuda.bindings.driver.CUclusterSchedulingPolicy.CU_CLUSTER_SCHEDULING_POLICY_SPREAD
-
-
-        spread the blocks within a cluster to the SMs
-
-
-    .. autoattribute:: cuda.bindings.driver.CUclusterSchedulingPolicy.CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING
-
-
-        allow the hardware to load-balance the blocks in a cluster to the SMs
-
-.. autoclass:: cuda.bindings.driver.CUlaunchMemSyncDomain
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchMemSyncDomain.CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT
-
-
-        Launch kernels in the default domain
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchMemSyncDomain.CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE
-
-
-        Launch kernels in the remote domain
-
-.. autoclass:: cuda.bindings.driver.CUlaunchAttributeID
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_IGNORE
-
-
-        Ignored entry, for convenient composition
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.accessPolicyWindow`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.cooperative`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY
-
-
-        Valid for streams. See :py:obj:`~.CUlaunchAttributeValue.syncPolicy`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.clusterDim`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.clusterSchedulingPolicyPreference`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION
-
-
-        Valid for launches. Setting :py:obj:`~.CUlaunchAttributeValue.programmaticStreamSerializationAllowed` to non-0 signals that the kernel will use programmatic means to resolve its stream dependency, so that the CUDA runtime should opportunistically allow the grid's execution to overlap with the previous kernel in the stream, if that kernel requests the overlap. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions).
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT
-
-
-        Valid for launches. Set :py:obj:`~.CUlaunchAttributeValue.programmaticEvent` to record the event. Event recorded through this launch attribute is guaranteed to only trigger after all block in the associated kernel trigger the event. A block can trigger the event through PTX launchdep.release or CUDA builtin function cudaTriggerProgrammaticLaunchCompletion(). A trigger can also be inserted at the beginning of each block's execution if triggerAtBlockStart is set to non-0. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions). Note that dependents (including the CPU thread calling :py:obj:`~.cuEventSynchronize()`) are not guaranteed to observe the release precisely when it is released. For example, :py:obj:`~.cuEventSynchronize()` may only observe the event trigger long after the associated kernel has completed. This recording type is primarily meant for establishing programmatic dependency between device tasks. Note also this type of dependency allows, but does not guarantee, concurrent execution of tasks. 
-
-         The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_PRIORITY
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.priority`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.memSyncDomainMap`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.CUlaunchAttributeValue.memSyncDomain`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT
-
-
-        Valid for launches. Set :py:obj:`~.CUlaunchAttributeValue.launchCompletionEvent` to record the event. 
-
-         Nominally, the event is triggered once all blocks of the kernel have begun execution. Currently this is a best effort. If a kernel B has a launch completion dependency on a kernel A, B may wait until A is complete. Alternatively, blocks of B may begin before all blocks of A have begun, for example if B can claim execution resources unavailable to A (e.g. they run on different GPUs) or if B is a higher priority than A. Exercise caution if such an ordering inversion could lead to deadlock. 
-
-         A launch completion event is nominally similar to a programmatic event with `triggerAtBlockStart` set except that it is not visible to `cudaGridDependencySynchronize()` and can be used with compute capability less than 9.0. 
-
-         The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the :py:obj:`~.CU_EVENT_DISABLE_TIMING` flag set).
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE
-
-
-        Valid for graph nodes, launches. This attribute is graphs-only, and passing it to a launch in a non-capturing stream will result in an error. 
-
-         :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::deviceUpdatable can only be set to 0 or 1. Setting the field to 1 indicates that the corresponding kernel node should be device-updatable. On success, a handle will be returned via :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode which can be passed to the various device-side update functions to update the node's kernel parameters from within another kernel. For more information on the types of device updates that can be made, as well as the relevant limitations thereof, see :py:obj:`~.cudaGraphKernelNodeUpdatesApply`. 
-
-         Nodes which are device-updatable have additional restrictions compared to regular kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via :py:obj:`~.cuGraphDestroyNode`. Additionally, once opted-in to this functionality, a node cannot opt out, and any attempt to set the deviceUpdatable attribute to 0 will result in an error. Device-updatable kernel nodes also cannot have their attributes copied to/from another kernel node via :py:obj:`~.cuGraphKernelNodeCopyAttributes`. Graphs containing one or more device-updatable nodes also do not allow multiple instantiation, and neither the graph nor its instantiated version can be passed to :py:obj:`~.cuGraphExecUpdate`. 
-
-         If a graph contains device-updatable nodes and updates those nodes from the device from within the graph, the graph must be uploaded with :py:obj:`~.cuGraphUpload` before it is launched. For such a graph, if host-side executable graph updates are made to the device-updatable nodes, the graph must be uploaded before it is launched again.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-
-
-        Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting :py:obj:`~.CUlaunchAttributeValue.sharedMemCarveout` to a percentage between 0-100 signals the CUDA driver to set the shared memory carveout preference, in percent of the total shared memory for that kernel launch. This attribute takes precedence over :py:obj:`~.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT`. This is only a hint, and the CUDA driver can choose a different configuration if required for the launch.
-
-.. autoclass:: cuda.bindings.driver.CUstreamCaptureStatus
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_NONE
-
-
-        Stream is not capturing
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_ACTIVE
-
-
-        Stream is actively capturing
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_INVALIDATED
-
-
-        Stream is part of a capture sequence that has been invalidated, but not terminated
-
-.. autoclass:: cuda.bindings.driver.CUstreamCaptureMode
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_GLOBAL
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_THREAD_LOCAL
-
-
-    .. autoattribute:: cuda.bindings.driver.CUstreamCaptureMode.CU_STREAM_CAPTURE_MODE_RELAXED
-
-.. autoclass:: cuda.bindings.driver.CUdriverProcAddress_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddress_flags.CU_GET_PROC_ADDRESS_DEFAULT
-
-
-        Default search mode for driver symbols.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddress_flags.CU_GET_PROC_ADDRESS_LEGACY_STREAM
-
-
-        Search for legacy versions of driver symbols.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddress_flags.CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM
-
-
-        Search for per-thread versions of driver symbols.
-
-.. autoclass:: cuda.bindings.driver.CUdriverProcAddressQueryResult
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddressQueryResult.CU_GET_PROC_ADDRESS_SUCCESS
-
-
-        Symbol was succesfully found
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddressQueryResult.CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND
-
-
-        Symbol was not found in search
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdriverProcAddressQueryResult.CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT
-
-
-        Symbol was found but version supplied was not sufficient
-
-.. autoclass:: cuda.bindings.driver.CUexecAffinityType
-
-    .. autoattribute:: cuda.bindings.driver.CUexecAffinityType.CU_EXEC_AFFINITY_TYPE_SM_COUNT
-
-
-        Create a context with limited SMs.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexecAffinityType.CU_EXEC_AFFINITY_TYPE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUcigDataType
-
-    .. autoattribute:: cuda.bindings.driver.CUcigDataType.CIG_DATA_TYPE_D3D12_COMMAND_QUEUE
-
-.. autoclass:: cuda.bindings.driver.CUlibraryOption
-
-    .. autoattribute:: cuda.bindings.driver.CUlibraryOption.CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlibraryOption.CU_LIBRARY_BINARY_IS_PRESERVED
-
-
-        Specifes that the argument `code` passed to :py:obj:`~.cuLibraryLoadData()` will be preserved. Specifying this option will let the driver know that `code` can be accessed at any point until :py:obj:`~.cuLibraryUnload()`. The default behavior is for the driver to allocate and maintain its own copy of `code`. Note that this is only a memory usage optimization hint and the driver can choose to ignore it if required. Specifying this option with :py:obj:`~.cuLibraryLoadFromFile()` is invalid and will return :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUlibraryOption.CU_LIBRARY_NUM_OPTIONS
-
-.. autoclass:: cuda.bindings.driver.CUresult
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_SUCCESS
-
-
-        The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see :py:obj:`~.cuEventQuery()` and :py:obj:`~.cuStreamQuery()`).
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_VALUE
-
-
-        This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_OUT_OF_MEMORY
-
-
-        The API call failed because it was unable to allocate enough memory or other resources to perform the requested operation.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_INITIALIZED
-
-
-        This indicates that the CUDA driver has not been initialized with :py:obj:`~.cuInit()` or that initialization has failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_DEINITIALIZED
-
-
-        This indicates that the CUDA driver is in the process of shutting down.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PROFILER_DISABLED
-
-
-        This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PROFILER_NOT_INITIALIZED
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PROFILER_ALREADY_STARTED
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PROFILER_ALREADY_STOPPED
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STUB_LIBRARY
-
-
-        This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_DEVICE_UNAVAILABLE
-
-
-        This indicates that requested CUDA device is unavailable at the current time. Devices are often unavailable due to use of :py:obj:`~.CU_COMPUTEMODE_EXCLUSIVE_PROCESS` or :py:obj:`~.CU_COMPUTEMODE_PROHIBITED`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NO_DEVICE
-
-
-        This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_DEVICE
-
-
-        This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for the specified device.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_DEVICE_NOT_LICENSED
-
-
-        This error indicates that the Grid license is not applied.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_IMAGE
-
-
-        This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_CONTEXT
-
-
-        This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call is not a valid handle (such as a context that has had :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details. This can also be returned if the green context passed to an API call was not converted to a :py:obj:`~.CUcontext` using :py:obj:`~.cuCtxFromGreenCtx` API.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CONTEXT_ALREADY_CURRENT
-
-
-        This indicated that the context being supplied as a parameter to the API call was already the active context. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MAP_FAILED
-
-
-        This indicates that a map or register operation has failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNMAP_FAILED
-
-
-        This indicates that an unmap or unregister operation has failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ARRAY_IS_MAPPED
-
-
-        This indicates that the specified array is currently mapped and thus cannot be destroyed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ALREADY_MAPPED
-
-
-        This indicates that the resource is already mapped.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NO_BINARY_FOR_GPU
-
-
-        This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ALREADY_ACQUIRED
-
-
-        This indicates that a resource has already been acquired.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_MAPPED
-
-
-        This indicates that a resource is not mapped.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_MAPPED_AS_ARRAY
-
-
-        This indicates that a mapped resource is not available for access as an array.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_MAPPED_AS_POINTER
-
-
-        This indicates that a mapped resource is not available for access as a pointer.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ECC_UNCORRECTABLE
-
-
-        This indicates that an uncorrectable ECC error was detected during execution.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNSUPPORTED_LIMIT
-
-
-        This indicates that the :py:obj:`~.CUlimit` passed to the API call is not supported by the active device.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CONTEXT_ALREADY_IN_USE
-
-
-        This indicates that the :py:obj:`~.CUcontext` passed to the API call can only be bound to a single CPU thread at a time but is already bound to a CPU thread.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PEER_ACCESS_UNSUPPORTED
-
-
-        This indicates that peer access is not supported across the given devices.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_PTX
-
-
-        This indicates that a PTX JIT compilation failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_GRAPHICS_CONTEXT
-
-
-        This indicates an error with OpenGL or DirectX context.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NVLINK_UNCORRECTABLE
-
-
-        This indicates that an uncorrectable NVLink error was detected during the execution.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_JIT_COMPILER_NOT_FOUND
-
-
-        This indicates that the PTX JIT compiler library was not found.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNSUPPORTED_PTX_VERSION
-
-
-        This indicates that the provided PTX was compiled with an unsupported toolchain.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_JIT_COMPILATION_DISABLED
-
-
-        This indicates that the PTX JIT compilation was disabled.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY
-
-
-        This indicates that the :py:obj:`~.CUexecAffinityType` passed to the API call is not supported by the active device.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC
-
-
-        This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_SOURCE
-
-
-        This indicates that the device kernel source is invalid. This includes compilation/linker errors encountered in device code or user error.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_FILE_NOT_FOUND
-
-
-        This indicates that the file specified was not found.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND
-
-
-        This indicates that a link to a shared object failed to resolve.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_SHARED_OBJECT_INIT_FAILED
-
-
-        This indicates that initialization of a shared object failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_OPERATING_SYSTEM
-
-
-        This indicates that an OS call failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_HANDLE
-
-
-        This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like :py:obj:`~.CUstream` and :py:obj:`~.CUevent`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ILLEGAL_STATE
-
-
-        This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_LOSSY_QUERY
-
-
-        This indicates an attempt was made to introspect an object in a way that would discard semantically important information. This is either due to the object using funtionality newer than the API version used to introspect it or omission of optional return arguments.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_FOUND
-
-
-        This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and surface names.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_READY
-
-
-        This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than :py:obj:`~.CUDA_SUCCESS` (which indicates completion). Calls that may return this value include :py:obj:`~.cuEventQuery()` and :py:obj:`~.cuStreamQuery()`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ILLEGAL_ADDRESS
-
-
-        While executing a kernel, the device encountered a load or store instruction on an invalid memory address. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES
-
-
-        This indicates that a launch did not occur because it did not have appropriate resources. This error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count. Passing arguments of the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many arguments and can also result in this error.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_LAUNCH_TIMEOUT
-
-
-        This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT` for more information. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING
-
-
-        This error indicates a kernel launch that uses an incompatible texturing mode.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED
-
-
-        This error indicates that a call to :py:obj:`~.cuCtxEnablePeerAccess()` is trying to re-enable peer access to a context which has already had peer access to it enabled.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PEER_ACCESS_NOT_ENABLED
-
-
-        This error indicates that :py:obj:`~.cuCtxDisablePeerAccess()` is trying to disable peer access which has not been enabled yet via :py:obj:`~.cuCtxEnablePeerAccess()`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE
-
-
-        This error indicates that the primary context for the specified device has already been initialized.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CONTEXT_IS_DESTROYED
-
-
-        This error indicates that the context current to the calling thread has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary context which has not yet been initialized.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ASSERT
-
-
-        A device-side assert triggered during kernel execution. The context cannot be used anymore, and must be destroyed. All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_TOO_MANY_PEERS
-
-
-        This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to :py:obj:`~.cuCtxEnablePeerAccess()`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED
-
-
-        This error indicates that the memory range passed to :py:obj:`~.cuMemHostRegister()` has already been registered.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED
-
-
-        This error indicates that the pointer passed to :py:obj:`~.cuMemHostUnregister()` does not correspond to any currently registered memory region.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_HARDWARE_STACK_ERROR
-
-
-        While executing a kernel, the device encountered a stack error. This can be due to stack corruption or exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_ILLEGAL_INSTRUCTION
-
-
-        While executing a kernel, the device encountered an illegal instruction. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MISALIGNED_ADDRESS
-
-
-        While executing a kernel, the device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_ADDRESS_SPACE
-
-
-        While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_PC
-
-
-        While executing a kernel, the device program counter wrapped its address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_LAUNCH_FAILED
-
-
-        An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE
-
-
-        This error indicates that the number of blocks launched per grid for a kernel that was launched via either :py:obj:`~.cuLaunchCooperativeKernel` or :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` exceeds the maximum number of blocks as allowed by :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor` or :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` times the number of multiprocessors as specified by the device attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_PERMITTED
-
-
-        This error indicates that the attempted operation is not permitted.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_NOT_SUPPORTED
-
-
-        This error indicates that the attempted operation is not supported on the current system or device.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_SYSTEM_NOT_READY
-
-
-        This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH
-
-
-        This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility documentation for supported versions.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE
-
-
-        This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES environment variable.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_CONNECTION_FAILED
-
-
-        This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_RPC_FAILURE
-
-
-        This error indicates that the remote procedural call between the MPS server and the MPS client failed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_SERVER_NOT_READY
-
-
-        This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the process of recovering from a fatal failure.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_MAX_CLIENTS_REACHED
-
-
-        This error indicates that the hardware resources required to create MPS client have been exhausted.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED
-
-
-        This error indicates the the hardware resources required to support device connections have been exhausted.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_MPS_CLIENT_TERMINATED
-
-
-        This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CDP_NOT_SUPPORTED
-
-
-        This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CDP_VERSION_MISMATCH
-
-
-        This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED
-
-
-        This error indicates that the operation is not permitted when the stream is capturing.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_INVALIDATED
-
-
-        This error indicates that the current capture sequence on the stream has been invalidated due to a previous error.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_MERGE
-
-
-        This error indicates that the operation would have resulted in a merge of two independent capture sequences.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_UNMATCHED
-
-
-        This error indicates that the capture was not initiated in this stream.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_UNJOINED
-
-
-        This error indicates that the capture sequence contains a fork that was not joined to the primary stream.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_ISOLATION
-
-
-        This error indicates that a dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_IMPLICIT
-
-
-        This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_CAPTURED_EVENT
-
-
-        This error indicates that the operation is not permitted on an event which was last recorded in a capturing stream.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD
-
-
-        A stream capture sequence not initiated with the :py:obj:`~.CU_STREAM_CAPTURE_MODE_RELAXED` argument to :py:obj:`~.cuStreamBeginCapture` was passed to :py:obj:`~.cuStreamEndCapture` in a different thread.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_TIMEOUT
-
-
-        This error indicates that the timeout specified for the wait operation has lapsed.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE
-
-
-        This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph update.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_EXTERNAL_DEVICE
-
-
-        This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_CLUSTER_SIZE
-
-
-        Indicates a kernel launch error due to cluster misconfiguration.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_FUNCTION_NOT_LOADED
-
-
-        Indiciates a function handle is not loaded when calling an API that requires a loaded function.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_RESOURCE_TYPE
-
-
-        This error indicates one or more resources passed in are not valid resource types for the operation.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION
-
-
-        This error indicates one or more resources are insufficient or non-applicable for the operation.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresult.CUDA_ERROR_UNKNOWN
-
-
-        This indicates that an unknown internal error has occurred.
-
-.. autoclass:: cuda.bindings.driver.CUdevice_P2PAttribute
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_P2PAttribute.CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK
-
-
-        A relative value indicating the performance of the link between two devices
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_P2PAttribute.CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED
-
-
-        P2P Access is enable
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_P2PAttribute.CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED
-
-
-        Atomic operation over the link supported
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_P2PAttribute.CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevice_P2PAttribute.CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED
-
-
-        Accessing CUDA arrays over the link supported
-
-.. autoclass:: cuda.bindings.driver.CUresourceViewFormat
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_NONE
-
-
-        No resource view format (use underlying resource format)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_1X8
-
-
-        1 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_2X8
-
-
-        2 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_4X8
-
-
-        4 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_1X8
-
-
-        1 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_2X8
-
-
-        2 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_4X8
-
-
-        4 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_1X16
-
-
-        1 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_2X16
-
-
-        2 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_4X16
-
-
-        4 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_1X16
-
-
-        1 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_2X16
-
-
-        2 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_4X16
-
-
-        4 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_1X32
-
-
-        1 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_2X32
-
-
-        2 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UINT_4X32
-
-
-        4 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_1X32
-
-
-        1 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_2X32
-
-
-        2 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SINT_4X32
-
-
-        4 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_1X16
-
-
-        1 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_2X16
-
-
-        2 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_4X16
-
-
-        4 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_1X32
-
-
-        1 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_2X32
-
-
-        2 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_FLOAT_4X32
-
-
-        4 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC1
-
-
-        Block compressed 1
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC2
-
-
-        Block compressed 2
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC3
-
-
-        Block compressed 3
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC4
-
-
-        Block compressed 4 unsigned
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SIGNED_BC4
-
-
-        Block compressed 4 signed
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC5
-
-
-        Block compressed 5 unsigned
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SIGNED_BC5
-
-
-        Block compressed 5 signed
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC6H
-
-
-        Block compressed 6 unsigned half-float
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_SIGNED_BC6H
-
-
-        Block compressed 6 signed half-float
-
-
-    .. autoattribute:: cuda.bindings.driver.CUresourceViewFormat.CU_RES_VIEW_FORMAT_UNSIGNED_BC7
-
-
-        Block compressed 7
-
-.. autoclass:: cuda.bindings.driver.CUtensorMapDataType
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_UINT8
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_UINT16
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_UINT32
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_INT32
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_UINT64
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_INT64
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_FLOAT16
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_FLOAT32
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_FLOAT64
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_BFLOAT16
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_TFLOAT32
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapDataType.CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ
-
-.. autoclass:: cuda.bindings.driver.CUtensorMapInterleave
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapInterleave.CU_TENSOR_MAP_INTERLEAVE_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapInterleave.CU_TENSOR_MAP_INTERLEAVE_16B
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapInterleave.CU_TENSOR_MAP_INTERLEAVE_32B
-
-.. autoclass:: cuda.bindings.driver.CUtensorMapSwizzle
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapSwizzle.CU_TENSOR_MAP_SWIZZLE_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapSwizzle.CU_TENSOR_MAP_SWIZZLE_32B
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapSwizzle.CU_TENSOR_MAP_SWIZZLE_64B
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapSwizzle.CU_TENSOR_MAP_SWIZZLE_128B
-
-.. autoclass:: cuda.bindings.driver.CUtensorMapL2promotion
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapL2promotion.CU_TENSOR_MAP_L2_PROMOTION_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapL2promotion.CU_TENSOR_MAP_L2_PROMOTION_L2_64B
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapL2promotion.CU_TENSOR_MAP_L2_PROMOTION_L2_128B
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapL2promotion.CU_TENSOR_MAP_L2_PROMOTION_L2_256B
-
-.. autoclass:: cuda.bindings.driver.CUtensorMapFloatOOBfill
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapFloatOOBfill.CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUtensorMapFloatOOBfill.CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
-
-.. autoclass:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS
-
-    .. autoattribute:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE
-
-
-        No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain operations
-
-
-    .. autoattribute:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ
-
-
-        Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS.CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE
-
-
-        Read-write access, the device has full read-write access to the memory
-
-.. autoclass:: cuda.bindings.driver.CUexternalMemoryHandleType
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD
-
-
-        Handle is an opaque file descriptor
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
-
-
-        Handle is an opaque shared NT handle
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
-
-
-        Handle is an opaque, globally shared handle
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
-
-
-        Handle is a D3D12 heap object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
-
-
-        Handle is a D3D12 committed resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE
-
-
-        Handle is a shared NT handle to a D3D11 resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
-
-
-        Handle is a globally shared handle to a D3D11 resource
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalMemoryHandleType.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF
-
-
-        Handle is an NvSciBuf object
-
-.. autoclass:: cuda.bindings.driver.CUexternalSemaphoreHandleType
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD
-
-
-        Handle is an opaque file descriptor
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
-
-
-        Handle is an opaque shared NT handle
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
-
-
-        Handle is an opaque, globally shared handle
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
-
-
-        Handle is a shared NT handle referencing a D3D12 fence object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
-
-
-        Handle is a shared NT handle referencing a D3D11 fence object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC
-
-
-        Opaque handle to NvSciSync Object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
-
-
-        Handle is a shared NT handle referencing a D3D11 keyed mutex object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
-
-
-        Handle is a globally shared handle referencing a D3D11 keyed mutex object
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD
-
-
-        Handle is an opaque file descriptor referencing a timeline semaphore
-
-
-    .. autoattribute:: cuda.bindings.driver.CUexternalSemaphoreHandleType.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
-
-
-        Handle is an opaque shared NT handle referencing a timeline semaphore
-
-.. autoclass:: cuda.bindings.driver.CUmemAllocationHandleType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE
-
-
-        Does not allow any export mechanism. >
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
-
-
-        Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_WIN32
-
-
-        Allows a Win32 NT handle to be used for exporting. (HANDLE)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_WIN32_KMT
-
-
-        Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_FABRIC
-
-
-        Allows a fabric handle to be used for exporting. (CUmemFabricHandle)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUmemAccess_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
-
-
-        Default, make the address range not accessible
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READ
-
-
-        Make the address range read accessible
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
-
-
-        Make the address range read-write accessible
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_MAX
-
-.. autoclass:: cuda.bindings.driver.CUmemLocationType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_INVALID
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
-
-
-        Location is a device location, thus id is a device ordinal
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST
-
-
-        Location is host, id is ignored
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA
-
-
-        Location is a host NUMA node, thus id is a host NUMA node id
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT
-
-
-        Location is a host NUMA node of the current thread, id is ignored
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemLocationType.CU_MEM_LOCATION_TYPE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUmemAllocationType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_INVALID
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED
-
-
-        This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUmemAllocationGranularity_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationGranularity_flags.CU_MEM_ALLOC_GRANULARITY_MINIMUM
-
-
-        Minimum required granularity for allocation
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationGranularity_flags.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
-
-
-        Recommended granularity for allocation for best performance
-
-.. autoclass:: cuda.bindings.driver.CUmemRangeHandleType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemRangeHandleType.CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemRangeHandleType.CU_MEM_RANGE_HANDLE_TYPE_MAX
-
-.. autoclass:: cuda.bindings.driver.CUarraySparseSubresourceType
-
-    .. autoattribute:: cuda.bindings.driver.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL
-
-
-    .. autoattribute:: cuda.bindings.driver.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL
-
-.. autoclass:: cuda.bindings.driver.CUmemOperationType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemOperationType.CU_MEM_OPERATION_TYPE_MAP
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemOperationType.CU_MEM_OPERATION_TYPE_UNMAP
-
-.. autoclass:: cuda.bindings.driver.CUmemHandleType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemHandleType.CU_MEM_HANDLE_TYPE_GENERIC
-
-.. autoclass:: cuda.bindings.driver.CUmemAllocationCompType
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationCompType.CU_MEM_ALLOCATION_COMP_NONE
-
-
-        Allocating non-compressible memory
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemAllocationCompType.CU_MEM_ALLOCATION_COMP_GENERIC
-
-
-        Allocating compressible memory
-
-.. autoclass:: cuda.bindings.driver.CUmulticastGranularity_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUmulticastGranularity_flags.CU_MULTICAST_GRANULARITY_MINIMUM
-
-
-        Minimum required granularity
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmulticastGranularity_flags.CU_MULTICAST_GRANULARITY_RECOMMENDED
-
-
-        Recommended granularity for best performance
-
-.. autoclass:: cuda.bindings.driver.CUgraphExecUpdateResult
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_SUCCESS
-
-
-        The update succeeded
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR
-
-
-        The update failed for an unexpected reason which is described in the return value of the function
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED
-
-
-        The update failed because the topology changed
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED
-
-
-        The update failed because a node type changed
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED
-
-
-        The update failed because the function of a kernel node changed (CUDA driver < 11.2)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED
-
-
-        The update failed because the parameters changed in a way that is not supported
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED
-
-
-        The update failed because something about the node is not supported
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE
-
-
-        The update failed because the function of a kernel node changed in an unsupported way
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphExecUpdateResult.CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED
-
-
-        The update failed because the node attributes changed in a way that is not supported
-
-.. autoclass:: cuda.bindings.driver.CUmemPool_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES
-
-
-        (value type = int) Allow cuMemAllocAsync to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies. (default enabled)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC
-
-
-        (value type = int) Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES
-
-
-        (value type = int) Allow cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by cuFreeAsync (default enabled).
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
-
-
-        (value type = cuuint64_t) Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS. When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next call to stream, event or context synchronize. (default 0)
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT
-
-
-        (value type = cuuint64_t) Amount of backing memory currently allocated for the mempool.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH
-
-
-        (value type = cuuint64_t) High watermark of backing memory allocated for the mempool since the last time it was reset. High watermark can only be reset to zero.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_CURRENT
-
-
-        (value type = cuuint64_t) Amount of memory from the pool that is currently in use by the application.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmemPool_attribute.CU_MEMPOOL_ATTR_USED_MEM_HIGH
-
-
-        (value type = cuuint64_t) High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only be reset to zero.
-
-.. autoclass:: cuda.bindings.driver.CUgraphMem_attribute
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT
-
-
-        (value type = cuuint64_t) Amount of memory, in bytes, currently associated with graphs
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_USED_MEM_HIGH
-
-
-        (value type = cuuint64_t) High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT
-
-
-        (value type = cuuint64_t) Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphMem_attribute.CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH
-
-
-        (value type = cuuint64_t) High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
-
-.. autoclass:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesOptions
-
-    .. autoattribute:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesOptions.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST
-
-
-        :py:obj:`~.cuFlushGPUDirectRDMAWrites()` and its CUDA Runtime API counterpart are supported on the device.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesOptions.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS
-
-
-        The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported on the device.
-
-.. autoclass:: cuda.bindings.driver.CUGPUDirectRDMAWritesOrdering
-
-    .. autoattribute:: cuda.bindings.driver.CUGPUDirectRDMAWritesOrdering.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE
-
-
-        The device does not natively support ordering of remote writes. :py:obj:`~.cuFlushGPUDirectRDMAWrites()` can be leveraged if supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUGPUDirectRDMAWritesOrdering.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER
-
-
-        Natively, the device can consistently consume remote writes, although other CUDA devices may not.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUGPUDirectRDMAWritesOrdering.CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES
-
-
-        Any CUDA device in the system can consistently consume remote writes to this device.
-
-.. autoclass:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesScope
-
-    .. autoattribute:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesScope.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER
-
-
-        Blocks until remote writes are visible to the CUDA device context owning the data.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesScope.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES
-
-
-        Blocks until remote writes are visible to all CUDA device contexts.
-
-.. autoclass:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesTarget
-
-    .. autoattribute:: cuda.bindings.driver.CUflushGPUDirectRDMAWritesTarget.CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX
-
-
-        Sets the target for :py:obj:`~.cuFlushGPUDirectRDMAWrites()` to the currently active CUDA device context.
-
-.. autoclass:: cuda.bindings.driver.CUgraphDebugDot_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE
-
-
-        Output all debug data as if every debug flag is enabled
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES
-
-
-        Use CUDA Runtime structures for output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS
-
-
-        Adds CUDA_KERNEL_NODE_PARAMS values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS
-
-
-        Adds CUDA_MEMCPY3D values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS
-
-
-        Adds CUDA_MEMSET_NODE_PARAMS values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS
-
-
-        Adds CUDA_HOST_NODE_PARAMS values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS
-
-
-        Adds CUevent handle from record and wait nodes to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS
-
-
-        Adds CUDA_EXT_SEM_SIGNAL_NODE_PARAMS values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS
-
-
-        Adds CUDA_EXT_SEM_WAIT_NODE_PARAMS values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES
-
-
-        Adds CUkernelNodeAttrValue values to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES
-
-
-        Adds node handles and every kernel function handle to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS
-
-
-        Adds memory alloc node parameters to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS
-
-
-        Adds memory free node parameters to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS
-
-
-        Adds batch mem op node parameters to output
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO
-
-
-        Adds edge numbering information
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphDebugDot_flags.CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS
-
-
-        Adds conditional node parameters to output
-
-.. autoclass:: cuda.bindings.driver.CUuserObject_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUuserObject_flags.CU_USER_OBJECT_NO_DESTRUCTOR_SYNC
-
-
-        Indicates the destructor execution is not synchronized by any CUDA handle.
-
-.. autoclass:: cuda.bindings.driver.CUuserObjectRetain_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUuserObjectRetain_flags.CU_GRAPH_USER_OBJECT_MOVE
-
-
-        Transfer references from the caller rather than creating new references.
-
-.. autoclass:: cuda.bindings.driver.CUgraphInstantiate_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiate_flags.CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH
-
-
-        Automatically free memory allocated in a graph before relaunching.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiate_flags.CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD
-
-
-        Automatically upload the graph after instantiation. Only supported by :py:obj:`~.cuGraphInstantiateWithParams`. The upload will be performed using the stream provided in `instantiateParams`.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiate_flags.CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH
-
-
-        Instantiate the graph to be launchable from the device. This flag can only be used on platforms which support unified addressing. This flag cannot be used in conjunction with CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUgraphInstantiate_flags.CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY
-
-
-        Run the graph using the per-node priority attributes rather than the priority of the stream it is launched into.
-
-.. autoclass:: cuda.bindings.driver.CUdeviceNumaConfig
-
-    .. autoattribute:: cuda.bindings.driver.CUdeviceNumaConfig.CU_DEVICE_NUMA_CONFIG_NONE
-
-
-        The GPU is not a NUMA node
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdeviceNumaConfig.CU_DEVICE_NUMA_CONFIG_NUMA_NODE
-
-
-        The GPU is a NUMA node, CU_DEVICE_ATTRIBUTE_NUMA_ID contains its NUMA ID
-
-.. autoclass:: cuda.bindings.driver.CUeglFrameType
-
-    .. autoattribute:: cuda.bindings.driver.CUeglFrameType.CU_EGL_FRAME_TYPE_ARRAY
-
-
-        Frame type CUDA array
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglFrameType.CU_EGL_FRAME_TYPE_PITCH
-
-
-        Frame type pointer
-
-.. autoclass:: cuda.bindings.driver.CUeglResourceLocationFlags
-
-    .. autoattribute:: cuda.bindings.driver.CUeglResourceLocationFlags.CU_EGL_RESOURCE_LOCATION_SYSMEM
-
-
-        Resource location sysmem
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglResourceLocationFlags.CU_EGL_RESOURCE_LOCATION_VIDMEM
-
-
-        Resource location vidmem
-
-.. autoclass:: cuda.bindings.driver.CUeglColorFormat
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_PLANAR
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR
-
-
-        Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV420Planar.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV422_PLANAR
-
-
-        Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR
-
-
-        Y, UV in two surfaces with VU byte ordering, width, height ratio same as YUV422Planar.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_RGB
-
-
-        R/G/B three channels in one surface with BGR byte ordering. Only pitch linear format supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BGR
-
-
-        R/G/B three channels in one surface with RGB byte ordering. Only pitch linear format supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_ARGB
-
-
-        R/G/B/A four channels in one surface with BGRA byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_RGBA
-
-
-        R/G/B/A four channels in one surface with ABGR byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_L
-
-
-        single luminance channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_R
-
-
-        single color channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV444_PLANAR
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR
-
-
-        Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV444Planar.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUYV_422
-
-
-        Y, U, V in one surface, interleaved as UYVY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_UYVY_422
-
-
-        Y, U, V in one surface, interleaved as YUYV in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_ABGR
-
-
-        R/G/B/A four channels in one surface with RGBA byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BGRA
-
-
-        R/G/B/A four channels in one surface with ARGB byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_A
-
-
-        Alpha color format - one channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_RG
-
-
-        R/G color format - two channels in one surface with GR byte ordering
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_AYUV
-
-
-        Y, U, V, A four channels in one surface, interleaved as VUYA.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR
-
-
-        Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR
-
-
-        Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_VYUY_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as YVYU in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_UYVY_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as YUYV in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUYV_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as UYVY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVYU_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as VYUY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV_ER
-
-
-        Extended Range Y, U, V three channels in one surface, interleaved as VUY. Only pitch linear format supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUVA_ER
-
-
-        Extended Range Y, U, V, A four channels in one surface, interleaved as AVUY.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_AYUV_ER
-
-
-        Extended Range Y, U, V, A four channels in one surface, interleaved as VUYA.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_RGGB
-
-
-        Bayer format - one channel in one surface with interleaved RGGB ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_BGGR
-
-
-        Bayer format - one channel in one surface with interleaved BGGR ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_GRBG
-
-
-        Bayer format - one channel in one surface with interleaved GRBG ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_GBRG
-
-
-        Bayer format - one channel in one surface with interleaved GBRG ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER10_RGGB
-
-
-        Bayer10 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER10_BGGR
-
-
-        Bayer10 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER10_GRBG
-
-
-        Bayer10 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER10_GBRG
-
-
-        Bayer10 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_RGGB
-
-
-        Bayer12 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_BGGR
-
-
-        Bayer12 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_GRBG
-
-
-        Bayer12 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_GBRG
-
-
-        Bayer12 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER14_RGGB
-
-
-        Bayer14 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER14_BGGR
-
-
-        Bayer14 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER14_GRBG
-
-
-        Bayer14 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER14_GBRG
-
-
-        Bayer14 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER20_RGGB
-
-
-        Bayer20 format - one channel in one surface with interleaved RGGB ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER20_BGGR
-
-
-        Bayer20 format - one channel in one surface with interleaved BGGR ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER20_GRBG
-
-
-        Bayer20 format - one channel in one surface with interleaved GRBG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER20_GBRG
-
-
-        Bayer20 format - one channel in one surface with interleaved GBRG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU444_PLANAR
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU422_PLANAR
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_PLANAR
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved RGGB ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved BGGR ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GRBG ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GBRG ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_BCCR
-
-
-        Bayer format - one channel in one surface with interleaved BCCR ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_RCCB
-
-
-        Bayer format - one channel in one surface with interleaved RCCB ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_CRBC
-
-
-        Bayer format - one channel in one surface with interleaved CRBC ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER_CBRC
-
-
-        Bayer format - one channel in one surface with interleaved CBRC ordering.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER10_CCCC
-
-
-        Bayer10 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_BCCR
-
-
-        Bayer12 format - one channel in one surface with interleaved BCCR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_RCCB
-
-
-        Bayer12 format - one channel in one surface with interleaved RCCB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_CRBC
-
-
-        Bayer12 format - one channel in one surface with interleaved CRBC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_CBRC
-
-
-        Bayer12 format - one channel in one surface with interleaved CBRC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_BAYER12_CCCC
-
-
-        Bayer12 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y
-
-
-        Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020
-
-
-        Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020
-
-
-        Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020
-
-
-        Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height= 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020
-
-
-        Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709
-
-
-        Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709
-
-
-        Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709
-
-
-        Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709
-
-
-        Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709
-
-
-        Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020
-
-
-        Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020
-
-
-        Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR
-
-
-        Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709
-
-
-        Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y_ER
-
-
-        Extended Range Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y_709_ER
-
-
-        Extended Range Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10_ER
-
-
-        Extended Range Color format for single Y10 plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10_709_ER
-
-
-        Extended Range Color format for single Y10 plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12_ER
-
-
-        Extended Range Color format for single Y12 plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12_709_ER
-
-
-        Extended Range Color format for single Y12 plane.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUVA
-
-
-        Y, U, V, A four channels in one surface, interleaved as AVUY.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YUV
-
-
-        Y, U, V three channels in one surface, interleaved as VUY. Only pitch linear format supported.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_YVYU
-
-
-        Y, U, V in one surface, interleaved as YVYU in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_VYUY
-
-
-        Y, U, V in one surface, interleaved as VYUY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.driver.CUeglColorFormat.CU_EGL_COLOR_FORMAT_MAX
-
-.. autoclass:: cuda.bindings.driver.CUdeviceptr_v2
-.. autoclass:: cuda.bindings.driver.CUdeviceptr
-.. autoclass:: cuda.bindings.driver.CUdevice_v1
-.. autoclass:: cuda.bindings.driver.CUdevice
-.. autoclass:: cuda.bindings.driver.CUcontext
-.. autoclass:: cuda.bindings.driver.CUmodule
-.. autoclass:: cuda.bindings.driver.CUfunction
-.. autoclass:: cuda.bindings.driver.CUlibrary
-.. autoclass:: cuda.bindings.driver.CUkernel
-.. autoclass:: cuda.bindings.driver.CUarray
-.. autoclass:: cuda.bindings.driver.CUmipmappedArray
-.. autoclass:: cuda.bindings.driver.CUtexref
-.. autoclass:: cuda.bindings.driver.CUsurfref
-.. autoclass:: cuda.bindings.driver.CUevent
-.. autoclass:: cuda.bindings.driver.CUstream
-.. autoclass:: cuda.bindings.driver.CUgraphicsResource
-.. autoclass:: cuda.bindings.driver.CUtexObject_v1
-.. autoclass:: cuda.bindings.driver.CUtexObject
-.. autoclass:: cuda.bindings.driver.CUsurfObject_v1
-.. autoclass:: cuda.bindings.driver.CUsurfObject
-.. autoclass:: cuda.bindings.driver.CUexternalMemory
-.. autoclass:: cuda.bindings.driver.CUexternalSemaphore
-.. autoclass:: cuda.bindings.driver.CUgraph
-.. autoclass:: cuda.bindings.driver.CUgraphNode
-.. autoclass:: cuda.bindings.driver.CUgraphExec
-.. autoclass:: cuda.bindings.driver.CUmemoryPool
-.. autoclass:: cuda.bindings.driver.CUuserObject
-.. autoclass:: cuda.bindings.driver.CUgraphConditionalHandle
-.. autoclass:: cuda.bindings.driver.CUgraphDeviceNode
-.. autoclass:: cuda.bindings.driver.CUasyncCallbackHandle
-.. autoclass:: cuda.bindings.driver.CUgreenCtx
-.. autoclass:: cuda.bindings.driver.CUuuid
-.. autoclass:: cuda.bindings.driver.CUmemFabricHandle_v1
-.. autoclass:: cuda.bindings.driver.CUmemFabricHandle
-.. autoclass:: cuda.bindings.driver.CUipcEventHandle_v1
-.. autoclass:: cuda.bindings.driver.CUipcEventHandle
-.. autoclass:: cuda.bindings.driver.CUipcMemHandle_v1
-.. autoclass:: cuda.bindings.driver.CUipcMemHandle
-.. autoclass:: cuda.bindings.driver.CUstreamBatchMemOpParams_v1
-.. autoclass:: cuda.bindings.driver.CUstreamBatchMemOpParams
-.. autoclass:: cuda.bindings.driver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_BATCH_MEM_OP_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_BATCH_MEM_OP_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUasyncNotificationInfo
-.. autoclass:: cuda.bindings.driver.CUasyncCallback
-.. autoclass:: cuda.bindings.driver.CUdevprop_v1
-.. autoclass:: cuda.bindings.driver.CUdevprop
-.. autoclass:: cuda.bindings.driver.CUlinkState
-.. autoclass:: cuda.bindings.driver.CUhostFn
-.. autoclass:: cuda.bindings.driver.CUaccessPolicyWindow_v1
-.. autoclass:: cuda.bindings.driver.CUaccessPolicyWindow
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_KERNEL_NODE_PARAMS_v3
-.. autoclass:: cuda.bindings.driver.CUDA_MEMSET_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_MEMSET_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_MEMSET_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUDA_HOST_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_HOST_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_HOST_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUDA_CONDITIONAL_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUgraphEdgeData
-.. autoclass:: cuda.bindings.driver.CUDA_GRAPH_INSTANTIATE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUlaunchMemSyncDomainMap
-.. autoclass:: cuda.bindings.driver.CUlaunchAttributeValue
-.. autoclass:: cuda.bindings.driver.CUlaunchAttribute
-.. autoclass:: cuda.bindings.driver.CUlaunchConfig
-.. autoclass:: cuda.bindings.driver.CUkernelNodeAttrID
-.. autoclass:: cuda.bindings.driver.CUkernelNodeAttrValue_v1
-.. autoclass:: cuda.bindings.driver.CUkernelNodeAttrValue
-.. autoclass:: cuda.bindings.driver.CUstreamAttrID
-.. autoclass:: cuda.bindings.driver.CUstreamAttrValue_v1
-.. autoclass:: cuda.bindings.driver.CUstreamAttrValue
-.. autoclass:: cuda.bindings.driver.CUexecAffinitySmCount_v1
-.. autoclass:: cuda.bindings.driver.CUexecAffinitySmCount
-.. autoclass:: cuda.bindings.driver.CUexecAffinityParam_v1
-.. autoclass:: cuda.bindings.driver.CUexecAffinityParam
-.. autoclass:: cuda.bindings.driver.CUctxCigParam
-.. autoclass:: cuda.bindings.driver.CUctxCreateParams
-.. autoclass:: cuda.bindings.driver.CUlibraryHostUniversalFunctionAndDataTable
-.. autoclass:: cuda.bindings.driver.CUstreamCallback
-.. autoclass:: cuda.bindings.driver.CUoccupancyB2DSize
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY2D_v2
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY2D
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D_v2
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D_PEER_v1
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY3D_PEER
-.. autoclass:: cuda.bindings.driver.CUDA_MEMCPY_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_DESCRIPTOR_v2
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_DESCRIPTOR
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY3D_DESCRIPTOR_v2
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY3D_DESCRIPTOR
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_SPARSE_PROPERTIES_v1
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_SPARSE_PROPERTIES
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_MEMORY_REQUIREMENTS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_ARRAY_MEMORY_REQUIREMENTS
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_TEXTURE_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_TEXTURE_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_VIEW_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_RESOURCE_VIEW_DESC
-.. autoclass:: cuda.bindings.driver.CUtensorMap
-.. autoclass:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_POINTER_ATTRIBUTE_P2P_TOKENS
-.. autoclass:: cuda.bindings.driver.CUDA_LAUNCH_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_LAUNCH_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_WAIT_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EXT_SEM_WAIT_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUmemGenericAllocationHandle_v1
-.. autoclass:: cuda.bindings.driver.CUmemGenericAllocationHandle
-.. autoclass:: cuda.bindings.driver.CUarrayMapInfo_v1
-.. autoclass:: cuda.bindings.driver.CUarrayMapInfo
-.. autoclass:: cuda.bindings.driver.CUmemLocation_v1
-.. autoclass:: cuda.bindings.driver.CUmemLocation
-.. autoclass:: cuda.bindings.driver.CUmemAllocationProp_v1
-.. autoclass:: cuda.bindings.driver.CUmemAllocationProp
-.. autoclass:: cuda.bindings.driver.CUmulticastObjectProp_v1
-.. autoclass:: cuda.bindings.driver.CUmulticastObjectProp
-.. autoclass:: cuda.bindings.driver.CUmemAccessDesc_v1
-.. autoclass:: cuda.bindings.driver.CUmemAccessDesc
-.. autoclass:: cuda.bindings.driver.CUgraphExecUpdateResultInfo_v1
-.. autoclass:: cuda.bindings.driver.CUgraphExecUpdateResultInfo
-.. autoclass:: cuda.bindings.driver.CUmemPoolProps_v1
-.. autoclass:: cuda.bindings.driver.CUmemPoolProps
-.. autoclass:: cuda.bindings.driver.CUmemPoolPtrExportData_v1
-.. autoclass:: cuda.bindings.driver.CUmemPoolPtrExportData
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_ALLOC_NODE_PARAMS_v1
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_ALLOC_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_ALLOC_NODE_PARAMS_v2
-.. autoclass:: cuda.bindings.driver.CUDA_MEM_FREE_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_CHILD_GRAPH_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EVENT_RECORD_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUDA_EVENT_WAIT_NODE_PARAMS
-.. autoclass:: cuda.bindings.driver.CUgraphNodeParams
-.. autoclass:: cuda.bindings.driver.CUeglFrame_v1
-.. autoclass:: cuda.bindings.driver.CUeglFrame
-.. autoclass:: cuda.bindings.driver.CUeglStreamConnection
-.. autoattribute:: cuda.bindings.driver.CUDA_VERSION
-
-    CUDA API version number
-
-.. autoattribute:: cuda.bindings.driver.CU_UUID_HAS_BEEN_DEFINED
-
-    CUDA UUID types
-
-.. autoattribute:: cuda.bindings.driver.CU_IPC_HANDLE_SIZE
-
-    CUDA IPC handle size
-
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_LEGACY
-
-    Legacy stream handle
-
-
-
-    Stream handle that can be passed as a CUstream to use an implicit stream with legacy synchronization behavior.
-
-
-
-    See details of the \link_sync_behavior
-
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_PER_THREAD
-
-    Per-thread stream handle
-
-
-
-    Stream handle that can be passed as a CUstream to use an implicit stream with per-thread synchronization behavior.
-
-
-
-    See details of the \link_sync_behavior
-
-.. autoattribute:: cuda.bindings.driver.CU_COMPUTE_ACCELERATED_TARGET_BASE
-.. autoattribute:: cuda.bindings.driver.CUDA_CB
-.. autoattribute:: cuda.bindings.driver.CU_GRAPH_COND_ASSIGN_DEFAULT
-
-    Conditional node handle flags Default value is applied when graph is launched.
-
-.. autoattribute:: cuda.bindings.driver.CU_GRAPH_KERNEL_NODE_PORT_DEFAULT
-
-    This port activates when the kernel has finished executing.
-
-.. autoattribute:: cuda.bindings.driver.CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC
-
-    This port activates when all blocks of the kernel have performed cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be used with edge type :py:obj:`~.CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC`. See also :py:obj:`~.CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT`.
-
-.. autoattribute:: cuda.bindings.driver.CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER
-
-    This port activates when all blocks of the kernel have begun execution. See also :py:obj:`~.CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT`.
-
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_DIMENSION
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_PRIORITY
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE
-.. autoattribute:: cuda.bindings.driver.CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_ATTRIBUTE_PRIORITY
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP
-.. autoattribute:: cuda.bindings.driver.CU_STREAM_ATTRIBUTE_MEM_SYNC_DOMAIN
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTALLOC_PORTABLE
-
-    If set, host memory is portable between CUDA contexts. Flag for :py:obj:`~.cuMemHostAlloc()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTALLOC_DEVICEMAP
-
-    If set, host memory is mapped into CUDA address space and :py:obj:`~.cuMemHostGetDevicePointer()` may be called on the host pointer. Flag for :py:obj:`~.cuMemHostAlloc()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTALLOC_WRITECOMBINED
-
-    If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction (MOVNTDQA). Flag for :py:obj:`~.cuMemHostAlloc()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTREGISTER_PORTABLE
-
-    If set, host memory is portable between CUDA contexts. Flag for :py:obj:`~.cuMemHostRegister()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTREGISTER_DEVICEMAP
-
-    If set, host memory is mapped into CUDA address space and :py:obj:`~.cuMemHostGetDevicePointer()` may be called on the host pointer. Flag for :py:obj:`~.cuMemHostRegister()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTREGISTER_IOMEMORY
-
-    If set, the passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. belonging to a third-party PCIe device. On Windows the flag is a no-op. On Linux that memory is marked as non cache-coherent for the GPU and is expected to be physically contiguous. It may return :py:obj:`~.CUDA_ERROR_NOT_PERMITTED` if run as an unprivileged user, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` on older Linux kernel versions. On all other platforms, it is not supported and :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` is returned. Flag for :py:obj:`~.cuMemHostRegister()`
-
-.. autoattribute:: cuda.bindings.driver.CU_MEMHOSTREGISTER_READ_ONLY
-
-    If set, the passed memory pointer is treated as pointing to memory that is considered read-only by the device. On platforms without :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES`, this flag is required in order to register memory mapped to the CPU as read-only. Support for the use of this flag can be queried from the device attribute :py:obj:`~.CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED`. Using this flag with a current context associated with a device that does not have this attribute set will cause :py:obj:`~.cuMemHostRegister` to error with :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`.
-
-.. autoattribute:: cuda.bindings.driver.CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL
-
-    Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers
-
-.. autoattribute:: cuda.bindings.driver.CU_TENSOR_MAP_NUM_QWORDS
-
-    Size of tensor map descriptor
-
-.. autoattribute:: cuda.bindings.driver.CUDA_EXTERNAL_MEMORY_DEDICATED
-
-    Indicates that the external memory object is a dedicated resource
-
-.. autoattribute:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC
-
-    When the `flags` parameter of :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS` contains this flag, it indicates that signaling an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC
-
-    When the `flags` parameter of :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS` contains this flag, it indicates that waiting on an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as :py:obj:`~.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF`, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_NVSCISYNC_ATTR_SIGNAL
-
-    When `flags` of :py:obj:`~.cuDeviceGetNvSciSyncAttributes` is set to this, it indicates that application needs signaler specific NvSciSyncAttr to be filled by :py:obj:`~.cuDeviceGetNvSciSyncAttributes`.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_NVSCISYNC_ATTR_WAIT
-
-    When `flags` of :py:obj:`~.cuDeviceGetNvSciSyncAttributes` is set to this, it indicates that application needs waiter specific NvSciSyncAttr to be filled by :py:obj:`~.cuDeviceGetNvSciSyncAttributes`.
-
-.. autoattribute:: cuda.bindings.driver.CU_MEM_CREATE_USAGE_TILE_POOL
-
-    This flag if set indicates that the memory will be used as a tile pool.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC
-
-    If set, each kernel launched as part of :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` only waits for prior work in the stream corresponding to that GPU to complete before the kernel begins execution.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC
-
-    If set, any subsequent work pushed in a stream that participated in a call to :py:obj:`~.cuLaunchCooperativeKernelMultiDevice` will only wait for the kernel launched on the GPU corresponding to that stream to complete before it begins execution.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_LAYERED
-
-    If set, the CUDA array is a collection of layers, where each layer is either a 1D or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number of layers, not the depth of a 3D array.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_2DARRAY
-
-    Deprecated, use CUDA_ARRAY3D_LAYERED
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_SURFACE_LDST
-
-    This flag must be set in order to bind a surface reference to the CUDA array
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_CUBEMAP
-
-    If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The width of such a CUDA array must be equal to its height, and Depth must be six. If :py:obj:`~.CUDA_ARRAY3D_LAYERED` flag is also set, then the CUDA array is a collection of cubemaps and Depth must be a multiple of six.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_TEXTURE_GATHER
-
-    This flag must be set in order to perform texture gather operations on a CUDA array.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_DEPTH_TEXTURE
-
-    This flag if set indicates that the CUDA array is a DEPTH_TEXTURE.
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_COLOR_ATTACHMENT
-
-    This flag indicates that the CUDA array may be bound as a color target in an external graphics API
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_SPARSE
-
-    This flag if set indicates that the CUDA array or CUDA mipmapped array is a sparse CUDA array or CUDA mipmapped array respectively
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_DEFERRED_MAPPING
-
-    This flag if set indicates that the CUDA array or CUDA mipmapped array will allow deferred memory mapping
-
-.. autoattribute:: cuda.bindings.driver.CUDA_ARRAY3D_VIDEO_ENCODE_DECODE
-
-    This flag indicates that the CUDA array will be used for hardware accelerated video encode/decode operations.
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSA_OVERRIDE_FORMAT
-
-    Override the texref format with a format inferred from the array. Flag for :py:obj:`~.cuTexRefSetArray()`
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSF_READ_AS_INTEGER
-
-    Read the texture as integers rather than promoting the values to floats in the range [0,1]. Flag for :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSF_NORMALIZED_COORDINATES
-
-    Use normalized texture coordinates in the range [0,1) instead of [0,dim). Flag for :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSF_SRGB
-
-    Perform sRGB->linear conversion during texture read. Flag for :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION
-
-    Disable any trilinear filtering optimizations. Flag for :py:obj:`~.cuTexRefSetFlags()` and :py:obj:`~.cuTexObjectCreate()`
-
-.. autoattribute:: cuda.bindings.driver.CU_TRSF_SEAMLESS_CUBEMAP
-
-    Enable seamless cube map filtering. Flag for :py:obj:`~.cuTexObjectCreate()`
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_END_AS_INT
-
-    C++ compile time constant for CU_LAUNCH_PARAM_END
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_END
-
-    End of array terminator for the `extra` parameter to :py:obj:`~.cuLaunchKernel`
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT
-
-    C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_POINTER
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_BUFFER_POINTER
-
-    Indicator that the next value in the `extra` parameter to :py:obj:`~.cuLaunchKernel` will be a pointer to a buffer containing all kernel parameters used for launching kernel `f`. This buffer needs to honor all alignment/padding requirements of the individual parameters. If :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE` is not also specified in the `extra` array, then :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER` will have no effect.
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT
-
-    C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_SIZE
-
-.. autoattribute:: cuda.bindings.driver.CU_LAUNCH_PARAM_BUFFER_SIZE
-
-    Indicator that the next value in the `extra` parameter to :py:obj:`~.cuLaunchKernel` will be a pointer to a size_t which contains the size of the buffer specified with :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`. It is required that :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER` also be specified in the `extra` array if the value associated with :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE` is not zero.
-
-.. autoattribute:: cuda.bindings.driver.CU_PARAM_TR_DEFAULT
-
-    For texture references loaded into the module, use default texunit from texture reference.
-
-.. autoattribute:: cuda.bindings.driver.CU_DEVICE_CPU
-
-    Device that represents the CPU
-
-.. autoattribute:: cuda.bindings.driver.CU_DEVICE_INVALID
-
-    Device that represents an invalid device
-
-.. autoattribute:: cuda.bindings.driver.MAX_PLANES
-
-    Maximum number of planes per frame
-
-.. autoattribute:: cuda.bindings.driver.CUDA_EGL_INFINITE_TIMEOUT
-
-    Indicates that timeout for :py:obj:`~.cuEGLStreamConsumerAcquireFrame` is infinite.
-
-
-Error Handling
---------------
-
-This section describes the error handling functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuGetErrorString
-.. autofunction:: cuda.bindings.driver.cuGetErrorName
-
-Initialization
---------------
-
-This section describes the initialization functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuInit
-
-Version Management
-------------------
-
-This section describes the version management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuDriverGetVersion
-
-Device Management
------------------
-
-This section describes the device management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuDeviceGet
-.. autofunction:: cuda.bindings.driver.cuDeviceGetCount
-.. autofunction:: cuda.bindings.driver.cuDeviceGetName
-.. autofunction:: cuda.bindings.driver.cuDeviceGetUuid
-.. autofunction:: cuda.bindings.driver.cuDeviceGetUuid_v2
-.. autofunction:: cuda.bindings.driver.cuDeviceGetLuid
-.. autofunction:: cuda.bindings.driver.cuDeviceTotalMem
-.. autofunction:: cuda.bindings.driver.cuDeviceGetTexture1DLinearMaxWidth
-.. autofunction:: cuda.bindings.driver.cuDeviceGetAttribute
-.. autofunction:: cuda.bindings.driver.cuDeviceGetNvSciSyncAttributes
-.. autofunction:: cuda.bindings.driver.cuDeviceSetMemPool
-.. autofunction:: cuda.bindings.driver.cuDeviceGetMemPool
-.. autofunction:: cuda.bindings.driver.cuDeviceGetDefaultMemPool
-.. autofunction:: cuda.bindings.driver.cuDeviceGetExecAffinitySupport
-.. autofunction:: cuda.bindings.driver.cuFlushGPUDirectRDMAWrites
-
-Primary Context Management
---------------------------
-
-This section describes the primary context management functions of the low-level CUDA driver application programming interface.
-
-
-
-The primary context is unique per device and shared with the CUDA runtime API. These functions allow integration with other libraries using CUDA.
-
-.. autofunction:: cuda.bindings.driver.cuDevicePrimaryCtxRetain
-.. autofunction:: cuda.bindings.driver.cuDevicePrimaryCtxRelease
-.. autofunction:: cuda.bindings.driver.cuDevicePrimaryCtxSetFlags
-.. autofunction:: cuda.bindings.driver.cuDevicePrimaryCtxGetState
-.. autofunction:: cuda.bindings.driver.cuDevicePrimaryCtxReset
-
-Context Management
-------------------
-
-This section describes the context management functions of the low-level CUDA driver application programming interface.
-
-
-
-Please note that some functions are described in Primary Context Management section.
-
-.. autofunction:: cuda.bindings.driver.cuCtxCreate
-.. autofunction:: cuda.bindings.driver.cuCtxCreate_v3
-.. autofunction:: cuda.bindings.driver.cuCtxCreate_v4
-.. autofunction:: cuda.bindings.driver.cuCtxDestroy
-.. autofunction:: cuda.bindings.driver.cuCtxPushCurrent
-.. autofunction:: cuda.bindings.driver.cuCtxPopCurrent
-.. autofunction:: cuda.bindings.driver.cuCtxSetCurrent
-.. autofunction:: cuda.bindings.driver.cuCtxGetCurrent
-.. autofunction:: cuda.bindings.driver.cuCtxGetDevice
-.. autofunction:: cuda.bindings.driver.cuCtxGetFlags
-.. autofunction:: cuda.bindings.driver.cuCtxSetFlags
-.. autofunction:: cuda.bindings.driver.cuCtxGetId
-.. autofunction:: cuda.bindings.driver.cuCtxSynchronize
-.. autofunction:: cuda.bindings.driver.cuCtxSetLimit
-.. autofunction:: cuda.bindings.driver.cuCtxGetLimit
-.. autofunction:: cuda.bindings.driver.cuCtxGetCacheConfig
-.. autofunction:: cuda.bindings.driver.cuCtxSetCacheConfig
-.. autofunction:: cuda.bindings.driver.cuCtxGetApiVersion
-.. autofunction:: cuda.bindings.driver.cuCtxGetStreamPriorityRange
-.. autofunction:: cuda.bindings.driver.cuCtxResetPersistingL2Cache
-.. autofunction:: cuda.bindings.driver.cuCtxGetExecAffinity
-.. autofunction:: cuda.bindings.driver.cuCtxRecordEvent
-.. autofunction:: cuda.bindings.driver.cuCtxWaitEvent
-
-Module Management
------------------
-
-This section describes the module management functions of the low-level CUDA driver application programming interface.
-
-.. autoclass:: cuda.bindings.driver.CUmoduleLoadingMode
-
-    .. autoattribute:: cuda.bindings.driver.CUmoduleLoadingMode.CU_MODULE_EAGER_LOADING
-
-
-        Lazy Kernel Loading is not enabled
-
-
-    .. autoattribute:: cuda.bindings.driver.CUmoduleLoadingMode.CU_MODULE_LAZY_LOADING
-
-
-        Lazy Kernel Loading is enabled
-
-.. autofunction:: cuda.bindings.driver.cuModuleLoad
-.. autofunction:: cuda.bindings.driver.cuModuleLoadData
-.. autofunction:: cuda.bindings.driver.cuModuleLoadDataEx
-.. autofunction:: cuda.bindings.driver.cuModuleLoadFatBinary
-.. autofunction:: cuda.bindings.driver.cuModuleUnload
-.. autofunction:: cuda.bindings.driver.cuModuleGetLoadingMode
-.. autofunction:: cuda.bindings.driver.cuModuleGetFunction
-.. autofunction:: cuda.bindings.driver.cuModuleGetFunctionCount
-.. autofunction:: cuda.bindings.driver.cuModuleEnumerateFunctions
-.. autofunction:: cuda.bindings.driver.cuModuleGetGlobal
-.. autofunction:: cuda.bindings.driver.cuLinkCreate
-.. autofunction:: cuda.bindings.driver.cuLinkAddData
-.. autofunction:: cuda.bindings.driver.cuLinkAddFile
-.. autofunction:: cuda.bindings.driver.cuLinkComplete
-.. autofunction:: cuda.bindings.driver.cuLinkDestroy
-
-Library Management
-------------------
-
-This section describes the library management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuLibraryLoadData
-.. autofunction:: cuda.bindings.driver.cuLibraryLoadFromFile
-.. autofunction:: cuda.bindings.driver.cuLibraryUnload
-.. autofunction:: cuda.bindings.driver.cuLibraryGetKernel
-.. autofunction:: cuda.bindings.driver.cuLibraryGetKernelCount
-.. autofunction:: cuda.bindings.driver.cuLibraryEnumerateKernels
-.. autofunction:: cuda.bindings.driver.cuLibraryGetModule
-.. autofunction:: cuda.bindings.driver.cuKernelGetFunction
-.. autofunction:: cuda.bindings.driver.cuKernelGetLibrary
-.. autofunction:: cuda.bindings.driver.cuLibraryGetGlobal
-.. autofunction:: cuda.bindings.driver.cuLibraryGetManaged
-.. autofunction:: cuda.bindings.driver.cuLibraryGetUnifiedFunction
-.. autofunction:: cuda.bindings.driver.cuKernelGetAttribute
-.. autofunction:: cuda.bindings.driver.cuKernelSetAttribute
-.. autofunction:: cuda.bindings.driver.cuKernelSetCacheConfig
-.. autofunction:: cuda.bindings.driver.cuKernelGetName
-.. autofunction:: cuda.bindings.driver.cuKernelGetParamInfo
-
-Memory Management
------------------
-
-This section describes the memory management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuMemGetInfo
-.. autofunction:: cuda.bindings.driver.cuMemAlloc
-.. autofunction:: cuda.bindings.driver.cuMemAllocPitch
-.. autofunction:: cuda.bindings.driver.cuMemFree
-.. autofunction:: cuda.bindings.driver.cuMemGetAddressRange
-.. autofunction:: cuda.bindings.driver.cuMemAllocHost
-.. autofunction:: cuda.bindings.driver.cuMemFreeHost
-.. autofunction:: cuda.bindings.driver.cuMemHostAlloc
-.. autofunction:: cuda.bindings.driver.cuMemHostGetDevicePointer
-.. autofunction:: cuda.bindings.driver.cuMemHostGetFlags
-.. autofunction:: cuda.bindings.driver.cuMemAllocManaged
-.. autofunction:: cuda.bindings.driver.cuDeviceRegisterAsyncNotification
-.. autofunction:: cuda.bindings.driver.cuDeviceUnregisterAsyncNotification
-.. autofunction:: cuda.bindings.driver.cuDeviceGetByPCIBusId
-.. autofunction:: cuda.bindings.driver.cuDeviceGetPCIBusId
-.. autofunction:: cuda.bindings.driver.cuIpcGetEventHandle
-.. autofunction:: cuda.bindings.driver.cuIpcOpenEventHandle
-.. autofunction:: cuda.bindings.driver.cuIpcGetMemHandle
-.. autofunction:: cuda.bindings.driver.cuIpcOpenMemHandle
-.. autofunction:: cuda.bindings.driver.cuIpcCloseMemHandle
-.. autofunction:: cuda.bindings.driver.cuMemHostRegister
-.. autofunction:: cuda.bindings.driver.cuMemHostUnregister
-.. autofunction:: cuda.bindings.driver.cuMemcpy
-.. autofunction:: cuda.bindings.driver.cuMemcpyPeer
-.. autofunction:: cuda.bindings.driver.cuMemcpyHtoD
-.. autofunction:: cuda.bindings.driver.cuMemcpyDtoH
-.. autofunction:: cuda.bindings.driver.cuMemcpyDtoD
-.. autofunction:: cuda.bindings.driver.cuMemcpyDtoA
-.. autofunction:: cuda.bindings.driver.cuMemcpyAtoD
-.. autofunction:: cuda.bindings.driver.cuMemcpyHtoA
-.. autofunction:: cuda.bindings.driver.cuMemcpyAtoH
-.. autofunction:: cuda.bindings.driver.cuMemcpyAtoA
-.. autofunction:: cuda.bindings.driver.cuMemcpy2D
-.. autofunction:: cuda.bindings.driver.cuMemcpy2DUnaligned
-.. autofunction:: cuda.bindings.driver.cuMemcpy3D
-.. autofunction:: cuda.bindings.driver.cuMemcpy3DPeer
-.. autofunction:: cuda.bindings.driver.cuMemcpyAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyPeerAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyHtoDAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyDtoHAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyDtoDAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyHtoAAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpyAtoHAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpy2DAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpy3DAsync
-.. autofunction:: cuda.bindings.driver.cuMemcpy3DPeerAsync
-.. autofunction:: cuda.bindings.driver.cuMemsetD8
-.. autofunction:: cuda.bindings.driver.cuMemsetD16
-.. autofunction:: cuda.bindings.driver.cuMemsetD32
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D8
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D16
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D32
-.. autofunction:: cuda.bindings.driver.cuMemsetD8Async
-.. autofunction:: cuda.bindings.driver.cuMemsetD16Async
-.. autofunction:: cuda.bindings.driver.cuMemsetD32Async
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D8Async
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D16Async
-.. autofunction:: cuda.bindings.driver.cuMemsetD2D32Async
-.. autofunction:: cuda.bindings.driver.cuArrayCreate
-.. autofunction:: cuda.bindings.driver.cuArrayGetDescriptor
-.. autofunction:: cuda.bindings.driver.cuArrayGetSparseProperties
-.. autofunction:: cuda.bindings.driver.cuMipmappedArrayGetSparseProperties
-.. autofunction:: cuda.bindings.driver.cuArrayGetMemoryRequirements
-.. autofunction:: cuda.bindings.driver.cuMipmappedArrayGetMemoryRequirements
-.. autofunction:: cuda.bindings.driver.cuArrayGetPlane
-.. autofunction:: cuda.bindings.driver.cuArrayDestroy
-.. autofunction:: cuda.bindings.driver.cuArray3DCreate
-.. autofunction:: cuda.bindings.driver.cuArray3DGetDescriptor
-.. autofunction:: cuda.bindings.driver.cuMipmappedArrayCreate
-.. autofunction:: cuda.bindings.driver.cuMipmappedArrayGetLevel
-.. autofunction:: cuda.bindings.driver.cuMipmappedArrayDestroy
-.. autofunction:: cuda.bindings.driver.cuMemGetHandleForAddressRange
-
-Virtual Memory Management
--------------------------
-
-This section describes the virtual memory management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuMemAddressReserve
-.. autofunction:: cuda.bindings.driver.cuMemAddressFree
-.. autofunction:: cuda.bindings.driver.cuMemCreate
-.. autofunction:: cuda.bindings.driver.cuMemRelease
-.. autofunction:: cuda.bindings.driver.cuMemMap
-.. autofunction:: cuda.bindings.driver.cuMemMapArrayAsync
-.. autofunction:: cuda.bindings.driver.cuMemUnmap
-.. autofunction:: cuda.bindings.driver.cuMemSetAccess
-.. autofunction:: cuda.bindings.driver.cuMemGetAccess
-.. autofunction:: cuda.bindings.driver.cuMemExportToShareableHandle
-.. autofunction:: cuda.bindings.driver.cuMemImportFromShareableHandle
-.. autofunction:: cuda.bindings.driver.cuMemGetAllocationGranularity
-.. autofunction:: cuda.bindings.driver.cuMemGetAllocationPropertiesFromHandle
-.. autofunction:: cuda.bindings.driver.cuMemRetainAllocationHandle
-
-Stream Ordered Memory Allocator
--------------------------------
-
-This section describes the stream ordered memory allocator exposed by the low-level CUDA driver application programming interface.
-
-
-
-
-
-**overview**
-
-
-
-The asynchronous allocator allows the user to allocate and free in stream order. All asynchronous accesses of the allocation must happen between the stream executions of the allocation and the free. If the memory is accessed outside of the promised stream order, a use before allocation / use after free error will cause undefined behavior.
-
-The allocator is free to reallocate the memory as long as it can guarantee that compliant memory accesses will not overlap temporally. The allocator may refer to internal stream ordering as well as inter-stream dependencies (such as CUDA events and null stream dependencies) when establishing the temporal guarantee. The allocator may also insert inter-stream dependencies to establish the temporal guarantee.
-
-
-
-
-
-**Supported Platforms**
-
-
-
-Whether or not a device supports the integrated stream ordered memory allocator may be queried by calling cuDeviceGetAttribute() with the device attribute CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED
-
-.. autofunction:: cuda.bindings.driver.cuMemFreeAsync
-.. autofunction:: cuda.bindings.driver.cuMemAllocAsync
-.. autofunction:: cuda.bindings.driver.cuMemPoolTrimTo
-.. autofunction:: cuda.bindings.driver.cuMemPoolSetAttribute
-.. autofunction:: cuda.bindings.driver.cuMemPoolGetAttribute
-.. autofunction:: cuda.bindings.driver.cuMemPoolSetAccess
-.. autofunction:: cuda.bindings.driver.cuMemPoolGetAccess
-.. autofunction:: cuda.bindings.driver.cuMemPoolCreate
-.. autofunction:: cuda.bindings.driver.cuMemPoolDestroy
-.. autofunction:: cuda.bindings.driver.cuMemAllocFromPoolAsync
-.. autofunction:: cuda.bindings.driver.cuMemPoolExportToShareableHandle
-.. autofunction:: cuda.bindings.driver.cuMemPoolImportFromShareableHandle
-.. autofunction:: cuda.bindings.driver.cuMemPoolExportPointer
-.. autofunction:: cuda.bindings.driver.cuMemPoolImportPointer
-
-Multicast Object Management
----------------------------
-
-This section describes the CUDA multicast object operations exposed by the low-level CUDA driver application programming interface.
-
-
-
-
-
-**overview**
-
-
-
-A multicast object created via cuMulticastCreate enables certain memory operations to be broadcast to a team of devices. Devices can be added to a multicast object via cuMulticastAddDevice. Memory can be bound on each participating device via either cuMulticastBindMem or cuMulticastBindAddr. Multicast objects can be mapped into a device's virtual address space using the virtual memmory management APIs (see cuMemMap and cuMemSetAccess).
-
-
-
-
-
-**Supported Platforms**
-
-
-
-Support for multicast on a specific device can be queried using the device attribute CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED
-
-.. autofunction:: cuda.bindings.driver.cuMulticastCreate
-.. autofunction:: cuda.bindings.driver.cuMulticastAddDevice
-.. autofunction:: cuda.bindings.driver.cuMulticastBindMem
-.. autofunction:: cuda.bindings.driver.cuMulticastBindAddr
-.. autofunction:: cuda.bindings.driver.cuMulticastUnbind
-.. autofunction:: cuda.bindings.driver.cuMulticastGetGranularity
-
-Unified Addressing
-------------------
-
-This section describes the unified addressing functions of the low-level CUDA driver application programming interface.
-
-
-
-
-
-**Overview**
-
-
-
-CUDA devices can share a unified address space with the host. For these devices there is no distinction between a device pointer and a host pointer -- the same pointer value may be used to access memory from the host program and from a kernel running on the device (with exceptions enumerated below).
-
-
-
-
-
-**Supported Platforms**
-
-
-
-Whether or not a device supports unified addressing may be queried by calling cuDeviceGetAttribute() with the device attribute CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING.
-
-Unified addressing is automatically enabled in 64-bit processes
-
-
-
-
-
-**Looking Up Information from Pointer Values**
-
-
-
-It is possible to look up information about the memory which backs a pointer value. For instance, one may want to know if a pointer points to host or device memory. As another example, in the case of device memory, one may want to know on which CUDA device the memory resides. These properties may be queried using the function cuPointerGetAttribute()
-
-Since pointers are unique, it is not necessary to specify information about the pointers specified to the various copy functions in the CUDA API. The function cuMemcpy() may be used to perform a copy between two pointers, ignoring whether they point to host or device memory (making cuMemcpyHtoD(), cuMemcpyDtoD(), and cuMemcpyDtoH() unnecessary for devices supporting unified addressing). For multidimensional copies, the memory type CU_MEMORYTYPE_UNIFIED may be used to specify that the CUDA driver should infer the location of the pointer from its value.
-
-
-
-
-
-**Automatic Mapping of Host Allocated Host Memory**
-
-
-
-All host memory allocated in all contexts using cuMemAllocHost() and cuMemHostAlloc() is always directly accessible from all contexts on all devices that support unified addressing. This is the case regardless of whether or not the flags CU_MEMHOSTALLOC_PORTABLE and CU_MEMHOSTALLOC_DEVICEMAP are specified.
-
-The pointer value through which allocated host memory may be accessed in kernels on all devices that support unified addressing is the same as the pointer value through which that memory is accessed on the host, so it is not necessary to call cuMemHostGetDevicePointer() to get the device pointer for these allocations.
-
-Note that this is not the case for memory allocated using the flag CU_MEMHOSTALLOC_WRITECOMBINED, as discussed below.
-
-
-
-
-
-**Automatic Registration of Peer Memory**
-
-
-
-Upon enabling direct access from a context that supports unified addressing to another peer context that supports unified addressing using cuCtxEnablePeerAccess() all memory allocated in the peer context using cuMemAlloc() and cuMemAllocPitch() will immediately be accessible by the current context. The device pointer value through which any peer memory may be accessed in the current context is the same pointer value through which that memory may be accessed in the peer context.
-
-
-
-
-
-**Exceptions, Disjoint Addressing**
-
-
-
-Not all memory may be accessed on devices through the same pointer value through which they are accessed on the host. These exceptions are host memory registered using cuMemHostRegister() and host memory allocated using the flag CU_MEMHOSTALLOC_WRITECOMBINED. For these exceptions, there exists a distinct host and device address for the memory. The device address is guaranteed to not overlap any valid host pointer range and is guaranteed to have the same value across all contexts that support unified addressing.
-
-This device address may be queried using cuMemHostGetDevicePointer() when a context using unified addressing is current. Either the host or the unified device pointer value may be used to refer to this memory through cuMemcpy() and similar functions using the CU_MEMORYTYPE_UNIFIED memory type.
-
-.. autofunction:: cuda.bindings.driver.cuPointerGetAttribute
-.. autofunction:: cuda.bindings.driver.cuMemPrefetchAsync
-.. autofunction:: cuda.bindings.driver.cuMemPrefetchAsync_v2
-.. autofunction:: cuda.bindings.driver.cuMemAdvise
-.. autofunction:: cuda.bindings.driver.cuMemAdvise_v2
-.. autofunction:: cuda.bindings.driver.cuMemRangeGetAttribute
-.. autofunction:: cuda.bindings.driver.cuMemRangeGetAttributes
-.. autofunction:: cuda.bindings.driver.cuPointerSetAttribute
-.. autofunction:: cuda.bindings.driver.cuPointerGetAttributes
-
-Stream Management
------------------
-
-This section describes the stream management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuStreamCreate
-.. autofunction:: cuda.bindings.driver.cuStreamCreateWithPriority
-.. autofunction:: cuda.bindings.driver.cuStreamGetPriority
-.. autofunction:: cuda.bindings.driver.cuStreamGetFlags
-.. autofunction:: cuda.bindings.driver.cuStreamGetId
-.. autofunction:: cuda.bindings.driver.cuStreamGetCtx
-.. autofunction:: cuda.bindings.driver.cuStreamGetCtx_v2
-.. autofunction:: cuda.bindings.driver.cuStreamWaitEvent
-.. autofunction:: cuda.bindings.driver.cuStreamAddCallback
-.. autofunction:: cuda.bindings.driver.cuStreamBeginCapture
-.. autofunction:: cuda.bindings.driver.cuStreamBeginCaptureToGraph
-.. autofunction:: cuda.bindings.driver.cuThreadExchangeStreamCaptureMode
-.. autofunction:: cuda.bindings.driver.cuStreamEndCapture
-.. autofunction:: cuda.bindings.driver.cuStreamIsCapturing
-.. autofunction:: cuda.bindings.driver.cuStreamGetCaptureInfo
-.. autofunction:: cuda.bindings.driver.cuStreamGetCaptureInfo_v3
-.. autofunction:: cuda.bindings.driver.cuStreamUpdateCaptureDependencies
-.. autofunction:: cuda.bindings.driver.cuStreamUpdateCaptureDependencies_v2
-.. autofunction:: cuda.bindings.driver.cuStreamAttachMemAsync
-.. autofunction:: cuda.bindings.driver.cuStreamQuery
-.. autofunction:: cuda.bindings.driver.cuStreamSynchronize
-.. autofunction:: cuda.bindings.driver.cuStreamDestroy
-.. autofunction:: cuda.bindings.driver.cuStreamCopyAttributes
-.. autofunction:: cuda.bindings.driver.cuStreamGetAttribute
-.. autofunction:: cuda.bindings.driver.cuStreamSetAttribute
-
-Event Management
-----------------
-
-This section describes the event management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuEventCreate
-.. autofunction:: cuda.bindings.driver.cuEventRecord
-.. autofunction:: cuda.bindings.driver.cuEventRecordWithFlags
-.. autofunction:: cuda.bindings.driver.cuEventQuery
-.. autofunction:: cuda.bindings.driver.cuEventSynchronize
-.. autofunction:: cuda.bindings.driver.cuEventDestroy
-.. autofunction:: cuda.bindings.driver.cuEventElapsedTime
-
-External Resource Interoperability
-----------------------------------
-
-This section describes the external resource interoperability functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuImportExternalMemory
-.. autofunction:: cuda.bindings.driver.cuExternalMemoryGetMappedBuffer
-.. autofunction:: cuda.bindings.driver.cuExternalMemoryGetMappedMipmappedArray
-.. autofunction:: cuda.bindings.driver.cuDestroyExternalMemory
-.. autofunction:: cuda.bindings.driver.cuImportExternalSemaphore
-.. autofunction:: cuda.bindings.driver.cuSignalExternalSemaphoresAsync
-.. autofunction:: cuda.bindings.driver.cuWaitExternalSemaphoresAsync
-.. autofunction:: cuda.bindings.driver.cuDestroyExternalSemaphore
-
-Stream Memory Operations
-------------------------
-
-This section describes the stream memory operations of the low-level CUDA driver application programming interface.
-
-
-
-Support for the CU_STREAM_WAIT_VALUE_NOR flag can be queried with ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2.
-
-
-
-Support for the cuStreamWriteValue64() and cuStreamWaitValue64() functions, as well as for the CU_STREAM_MEM_OP_WAIT_VALUE_64 and CU_STREAM_MEM_OP_WRITE_VALUE_64 flags, can be queried with CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS.
-
-
-
-Support for both CU_STREAM_WAIT_VALUE_FLUSH and CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES requires dedicated platform hardware features and can be queried with cuDeviceGetAttribute() and CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES.
-
-
-
-Note that all memory pointers passed as parameters to these operations are device pointers. Where necessary a device pointer should be obtained, for example with cuMemHostGetDevicePointer().
-
-
-
-None of the operations accepts pointers to managed memory buffers (cuMemAllocManaged).
-
-
-
-Warning: Improper use of these APIs may deadlock the application. Synchronization ordering established through these APIs is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by these APIs should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order.
-
-.. autofunction:: cuda.bindings.driver.cuStreamWaitValue32
-.. autofunction:: cuda.bindings.driver.cuStreamWaitValue64
-.. autofunction:: cuda.bindings.driver.cuStreamWriteValue32
-.. autofunction:: cuda.bindings.driver.cuStreamWriteValue64
-.. autofunction:: cuda.bindings.driver.cuStreamBatchMemOp
-
-Execution Control
------------------
-
-This section describes the execution control functions of the low-level CUDA driver application programming interface.
-
-.. autoclass:: cuda.bindings.driver.CUfunctionLoadingState
-
-    .. autoattribute:: cuda.bindings.driver.CUfunctionLoadingState.CU_FUNCTION_LOADING_STATE_UNLOADED
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunctionLoadingState.CU_FUNCTION_LOADING_STATE_LOADED
-
-
-    .. autoattribute:: cuda.bindings.driver.CUfunctionLoadingState.CU_FUNCTION_LOADING_STATE_MAX
-
-.. autofunction:: cuda.bindings.driver.cuFuncGetAttribute
-.. autofunction:: cuda.bindings.driver.cuFuncSetAttribute
-.. autofunction:: cuda.bindings.driver.cuFuncSetCacheConfig
-.. autofunction:: cuda.bindings.driver.cuFuncGetModule
-.. autofunction:: cuda.bindings.driver.cuFuncGetName
-.. autofunction:: cuda.bindings.driver.cuFuncGetParamInfo
-.. autofunction:: cuda.bindings.driver.cuFuncIsLoaded
-.. autofunction:: cuda.bindings.driver.cuFuncLoad
-.. autofunction:: cuda.bindings.driver.cuLaunchKernel
-.. autofunction:: cuda.bindings.driver.cuLaunchKernelEx
-.. autofunction:: cuda.bindings.driver.cuLaunchCooperativeKernel
-.. autofunction:: cuda.bindings.driver.cuLaunchCooperativeKernelMultiDevice
-.. autofunction:: cuda.bindings.driver.cuLaunchHostFunc
-
-Graph Management
-----------------
-
-This section describes the graph management functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuGraphCreate
-.. autofunction:: cuda.bindings.driver.cuGraphAddKernelNode
-.. autofunction:: cuda.bindings.driver.cuGraphKernelNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphKernelNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddMemcpyNode
-.. autofunction:: cuda.bindings.driver.cuGraphMemcpyNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphMemcpyNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddMemsetNode
-.. autofunction:: cuda.bindings.driver.cuGraphMemsetNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphMemsetNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddHostNode
-.. autofunction:: cuda.bindings.driver.cuGraphHostNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphHostNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddChildGraphNode
-.. autofunction:: cuda.bindings.driver.cuGraphChildGraphNodeGetGraph
-.. autofunction:: cuda.bindings.driver.cuGraphAddEmptyNode
-.. autofunction:: cuda.bindings.driver.cuGraphAddEventRecordNode
-.. autofunction:: cuda.bindings.driver.cuGraphEventRecordNodeGetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphEventRecordNodeSetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphAddEventWaitNode
-.. autofunction:: cuda.bindings.driver.cuGraphEventWaitNodeGetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphEventWaitNodeSetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphAddExternalSemaphoresSignalNode
-.. autofunction:: cuda.bindings.driver.cuGraphExternalSemaphoresSignalNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExternalSemaphoresSignalNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddExternalSemaphoresWaitNode
-.. autofunction:: cuda.bindings.driver.cuGraphExternalSemaphoresWaitNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExternalSemaphoresWaitNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddBatchMemOpNode
-.. autofunction:: cuda.bindings.driver.cuGraphBatchMemOpNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphBatchMemOpNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecBatchMemOpNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddMemAllocNode
-.. autofunction:: cuda.bindings.driver.cuGraphMemAllocNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuGraphAddMemFreeNode
-.. autofunction:: cuda.bindings.driver.cuGraphMemFreeNodeGetParams
-.. autofunction:: cuda.bindings.driver.cuDeviceGraphMemTrim
-.. autofunction:: cuda.bindings.driver.cuDeviceGetGraphMemAttribute
-.. autofunction:: cuda.bindings.driver.cuDeviceSetGraphMemAttribute
-.. autofunction:: cuda.bindings.driver.cuGraphClone
-.. autofunction:: cuda.bindings.driver.cuGraphNodeFindInClone
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetType
-.. autofunction:: cuda.bindings.driver.cuGraphGetNodes
-.. autofunction:: cuda.bindings.driver.cuGraphGetRootNodes
-.. autofunction:: cuda.bindings.driver.cuGraphGetEdges
-.. autofunction:: cuda.bindings.driver.cuGraphGetEdges_v2
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetDependencies
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetDependencies_v2
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetDependentNodes
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetDependentNodes_v2
-.. autofunction:: cuda.bindings.driver.cuGraphAddDependencies
-.. autofunction:: cuda.bindings.driver.cuGraphAddDependencies_v2
-.. autofunction:: cuda.bindings.driver.cuGraphRemoveDependencies
-.. autofunction:: cuda.bindings.driver.cuGraphRemoveDependencies_v2
-.. autofunction:: cuda.bindings.driver.cuGraphDestroyNode
-.. autofunction:: cuda.bindings.driver.cuGraphInstantiate
-.. autofunction:: cuda.bindings.driver.cuGraphInstantiateWithParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecGetFlags
-.. autofunction:: cuda.bindings.driver.cuGraphExecKernelNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecMemcpyNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecMemsetNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecHostNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecChildGraphNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecEventRecordNodeSetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphExecEventWaitNodeSetEvent
-.. autofunction:: cuda.bindings.driver.cuGraphExecExternalSemaphoresSignalNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecExternalSemaphoresWaitNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphNodeSetEnabled
-.. autofunction:: cuda.bindings.driver.cuGraphNodeGetEnabled
-.. autofunction:: cuda.bindings.driver.cuGraphUpload
-.. autofunction:: cuda.bindings.driver.cuGraphLaunch
-.. autofunction:: cuda.bindings.driver.cuGraphExecDestroy
-.. autofunction:: cuda.bindings.driver.cuGraphDestroy
-.. autofunction:: cuda.bindings.driver.cuGraphExecUpdate
-.. autofunction:: cuda.bindings.driver.cuGraphKernelNodeCopyAttributes
-.. autofunction:: cuda.bindings.driver.cuGraphKernelNodeGetAttribute
-.. autofunction:: cuda.bindings.driver.cuGraphKernelNodeSetAttribute
-.. autofunction:: cuda.bindings.driver.cuGraphDebugDotPrint
-.. autofunction:: cuda.bindings.driver.cuUserObjectCreate
-.. autofunction:: cuda.bindings.driver.cuUserObjectRetain
-.. autofunction:: cuda.bindings.driver.cuUserObjectRelease
-.. autofunction:: cuda.bindings.driver.cuGraphRetainUserObject
-.. autofunction:: cuda.bindings.driver.cuGraphReleaseUserObject
-.. autofunction:: cuda.bindings.driver.cuGraphAddNode
-.. autofunction:: cuda.bindings.driver.cuGraphAddNode_v2
-.. autofunction:: cuda.bindings.driver.cuGraphNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphExecNodeSetParams
-.. autofunction:: cuda.bindings.driver.cuGraphConditionalHandleCreate
-
-Occupancy
----------
-
-This section describes the occupancy calculation functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxActiveBlocksPerMultiprocessor
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxPotentialBlockSize
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxPotentialBlockSizeWithFlags
-.. autofunction:: cuda.bindings.driver.cuOccupancyAvailableDynamicSMemPerBlock
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxPotentialClusterSize
-.. autofunction:: cuda.bindings.driver.cuOccupancyMaxActiveClusters
-
-Texture Object Management
--------------------------
-
-This section describes the texture object management functions of the low-level CUDA driver application programming interface. The texture object API is only supported on devices of compute capability 3.0 or higher.
-
-.. autofunction:: cuda.bindings.driver.cuTexObjectCreate
-.. autofunction:: cuda.bindings.driver.cuTexObjectDestroy
-.. autofunction:: cuda.bindings.driver.cuTexObjectGetResourceDesc
-.. autofunction:: cuda.bindings.driver.cuTexObjectGetTextureDesc
-.. autofunction:: cuda.bindings.driver.cuTexObjectGetResourceViewDesc
-
-Surface Object Management
--------------------------
-
-This section describes the surface object management functions of the low-level CUDA driver application programming interface. The surface object API is only supported on devices of compute capability 3.0 or higher.
-
-.. autofunction:: cuda.bindings.driver.cuSurfObjectCreate
-.. autofunction:: cuda.bindings.driver.cuSurfObjectDestroy
-.. autofunction:: cuda.bindings.driver.cuSurfObjectGetResourceDesc
-
-Tensor Map Object Managment
----------------------------
-
-This section describes the tensor map object management functions of the low-level CUDA driver application programming interface. The tensor core API is only supported on devices of compute capability 9.0 or higher.
-
-.. autofunction:: cuda.bindings.driver.cuTensorMapEncodeTiled
-.. autofunction:: cuda.bindings.driver.cuTensorMapEncodeIm2col
-.. autofunction:: cuda.bindings.driver.cuTensorMapReplaceAddress
-
-Peer Context Memory Access
---------------------------
-
-This section describes the direct peer context memory access functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuDeviceCanAccessPeer
-.. autofunction:: cuda.bindings.driver.cuCtxEnablePeerAccess
-.. autofunction:: cuda.bindings.driver.cuCtxDisablePeerAccess
-.. autofunction:: cuda.bindings.driver.cuDeviceGetP2PAttribute
-
-Graphics Interoperability
--------------------------
-
-This section describes the graphics interoperability functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuGraphicsUnregisterResource
-.. autofunction:: cuda.bindings.driver.cuGraphicsSubResourceGetMappedArray
-.. autofunction:: cuda.bindings.driver.cuGraphicsResourceGetMappedMipmappedArray
-.. autofunction:: cuda.bindings.driver.cuGraphicsResourceGetMappedPointer
-.. autofunction:: cuda.bindings.driver.cuGraphicsResourceSetMapFlags
-.. autofunction:: cuda.bindings.driver.cuGraphicsMapResources
-.. autofunction:: cuda.bindings.driver.cuGraphicsUnmapResources
-
-Driver Entry Point Access
--------------------------
-
-This section describes the driver entry point access functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuGetProcAddress
-
-Coredump Attributes Control API
--------------------------------
-
-This section describes the coredump attribute control functions of the low-level CUDA driver application programming interface.
-
-.. autoclass:: cuda.bindings.driver.CUcoredumpSettings
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_ENABLE_ON_EXCEPTION
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_TRIGGER_HOST
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_LIGHTWEIGHT
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_ENABLE_USER_TRIGGER
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_FILE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_PIPE
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_GENERATION_FLAGS
-
-
-    .. autoattribute:: cuda.bindings.driver.CUcoredumpSettings.CU_COREDUMP_MAX
-
-.. autoclass:: cuda.bindings.driver.CUCoredumpGenerationFlags
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_DEFAULT_FLAGS
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_GLOBAL_MEMORY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_SHARED_MEMORY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_LOCAL_MEMORY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_ABORT
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_SKIP_CONSTBANK_MEMORY
-
-
-    .. autoattribute:: cuda.bindings.driver.CUCoredumpGenerationFlags.CU_COREDUMP_LIGHTWEIGHT_FLAGS
-
-.. autofunction:: cuda.bindings.driver.cuCoredumpGetAttribute
-.. autofunction:: cuda.bindings.driver.cuCoredumpGetAttributeGlobal
-.. autofunction:: cuda.bindings.driver.cuCoredumpSetAttribute
-.. autofunction:: cuda.bindings.driver.cuCoredumpSetAttributeGlobal
-
-Green Contexts
---------------
-
-This section describes the APIs for creation and manipulation of green contexts in the CUDA driver. Green contexts are a lightweight alternative to traditional contexts, with the ability to pass in a set of resources that they should be initialized with. This allows the developer to represent distinct spatial partitions of the GPU, provision resources for them, and target them via the same programming model that CUDA exposes (streams, kernel launches, etc.).
-
-
-
-There are 4 main steps to using these new set of APIs.
-
-- (1) Start with an initial set of resources, for example via cuDeviceGetDevResource. Only SM type is supported today.
-
-
-
-
-
-
-
-- (2) Partition this set of resources by providing them as input to a partition API, for example: cuDevSmResourceSplitByCount.
-
-
-
-
-
-
-
-- (3) Finalize the specification of resources by creating a descriptor via cuDevResourceGenerateDesc.
-
-
-
-
-
-
-
-- (4) Provision the resources and create a green context via cuGreenCtxCreate.
-
-
-
-
-
-
-
-
-
-
-
-For ``CU_DEV_RESOURCE_TYPE_SM``\ , the partitions created have minimum SM count requirements, often rounding up and aligning the minCount provided to cuDevSmResourceSplitByCount. The following is a guideline for each architecture and may be subject to change:
-
-- On Compute Architecture 6.X: The minimum count is 1 SM.
-
-
-
-
-
-
-
-- On Compute Architecture 7.X: The minimum count is 2 SMs and must be a multiple of 2.
-
-
-
-
-
-
-
-- On Compute Architecture 8.X: The minimum count is 4 SMs and must be a multiple of 2.
-
-
-
-
-
-
-
-- On Compute Architecture 9.0+: The minimum count is 8 SMs and must be a multiple of 8.
-
-
-
-
-
-
-
-
-
-
-
-In the future, flags can be provided to tradeoff functional and performance characteristics versus finer grained SM partitions.
-
-
-
-Even if the green contexts have disjoint SM partitions, it is not guaranteed that the kernels launched in them will run concurrently or have forward progress guarantees. This is due to other resources (like HW connections, see ::CUDA_DEVICE_MAX_CONNECTIONS) that could cause a dependency. Additionally, in certain scenarios, it is possible for the workload to run on more SMs than was provisioned (but never less). The following are two scenarios which can exhibit this behavior:
-
-- On Volta+ MPS: When ``CUDA_MPS_ACTIVE_THREAD_PERCENTAGE``\  is used, the set of SMs that are used for running kernels can be scaled up to the value of SMs used for the MPS client.
-
-
-
-
-
-
-
-- On Compute Architecture 9.x: When a module with dynamic parallelism (CDP) is loaded, all future kernels running under green contexts may use and share an additional set of 2 SMs.
-
-.. autoclass:: cuda.bindings.driver.CUdevSmResource_st
-.. autoclass:: cuda.bindings.driver.CUdevResource_st
-.. autoclass:: cuda.bindings.driver.CUdevSmResource
-.. autoclass:: cuda.bindings.driver.CUdevResource
-.. autoclass:: cuda.bindings.driver.CUgreenCtxCreate_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUgreenCtxCreate_flags.CU_GREEN_CTX_DEFAULT_STREAM
-
-
-        Required. Creates a default stream to use inside the green context
-
-.. autoclass:: cuda.bindings.driver.CUdevSmResourceSplit_flags
-
-    .. autoattribute:: cuda.bindings.driver.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevSmResourceSplit_flags.CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE
-
-.. autoclass:: cuda.bindings.driver.CUdevResourceType
-
-    .. autoattribute:: cuda.bindings.driver.CUdevResourceType.CU_DEV_RESOURCE_TYPE_INVALID
-
-
-    .. autoattribute:: cuda.bindings.driver.CUdevResourceType.CU_DEV_RESOURCE_TYPE_SM
-
-
-        Streaming multiprocessors related information
-
-.. autoclass:: cuda.bindings.driver.CUdevResourceDesc
-.. autoclass:: cuda.bindings.driver.CUdevSmResource
-.. autofunction:: cuda.bindings.driver._CONCAT_OUTER
-.. autofunction:: cuda.bindings.driver.cuGreenCtxCreate
-.. autofunction:: cuda.bindings.driver.cuGreenCtxDestroy
-.. autofunction:: cuda.bindings.driver.cuCtxFromGreenCtx
-.. autofunction:: cuda.bindings.driver.cuDeviceGetDevResource
-.. autofunction:: cuda.bindings.driver.cuCtxGetDevResource
-.. autofunction:: cuda.bindings.driver.cuGreenCtxGetDevResource
-.. autofunction:: cuda.bindings.driver.cuDevSmResourceSplitByCount
-.. autofunction:: cuda.bindings.driver.cuDevResourceGenerateDesc
-.. autofunction:: cuda.bindings.driver.cuGreenCtxRecordEvent
-.. autofunction:: cuda.bindings.driver.cuGreenCtxWaitEvent
-.. autofunction:: cuda.bindings.driver.cuStreamGetGreenCtx
-.. autofunction:: cuda.bindings.driver.cuGreenCtxStreamCreate
-.. autoattribute:: cuda.bindings.driver.RESOURCE_ABI_VERSION
-.. autoattribute:: cuda.bindings.driver.RESOURCE_ABI_EXTERNAL_BYTES
-.. autoattribute:: cuda.bindings.driver._CONCAT_INNER
-.. autoattribute:: cuda.bindings.driver._CONCAT_OUTER
-
-EGL Interoperability
---------------------
-
-This section describes the EGL interoperability functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuGraphicsEGLRegisterImage
-.. autofunction:: cuda.bindings.driver.cuEGLStreamConsumerConnect
-.. autofunction:: cuda.bindings.driver.cuEGLStreamConsumerConnectWithFlags
-.. autofunction:: cuda.bindings.driver.cuEGLStreamConsumerDisconnect
-.. autofunction:: cuda.bindings.driver.cuEGLStreamConsumerAcquireFrame
-.. autofunction:: cuda.bindings.driver.cuEGLStreamConsumerReleaseFrame
-.. autofunction:: cuda.bindings.driver.cuEGLStreamProducerConnect
-.. autofunction:: cuda.bindings.driver.cuEGLStreamProducerDisconnect
-.. autofunction:: cuda.bindings.driver.cuEGLStreamProducerPresentFrame
-.. autofunction:: cuda.bindings.driver.cuEGLStreamProducerReturnFrame
-.. autofunction:: cuda.bindings.driver.cuGraphicsResourceGetMappedEglFrame
-.. autofunction:: cuda.bindings.driver.cuEventCreateFromEGLSync
-
-OpenGL Interoperability
------------------------
-
-This section describes the OpenGL interoperability functions of the low-level CUDA driver application programming interface. Note that mapping of OpenGL resources is performed with the graphics API agnostic, resource mapping interface described in Graphics Interoperability.
-
-.. autoclass:: cuda.bindings.driver.CUGLDeviceList
-
-    .. autoattribute:: cuda.bindings.driver.CUGLDeviceList.CU_GL_DEVICE_LIST_ALL
-
-
-        The CUDA devices for all GPUs used by the current OpenGL context
-
-
-    .. autoattribute:: cuda.bindings.driver.CUGLDeviceList.CU_GL_DEVICE_LIST_CURRENT_FRAME
-
-
-        The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame
-
-
-    .. autoattribute:: cuda.bindings.driver.CUGLDeviceList.CU_GL_DEVICE_LIST_NEXT_FRAME
-
-
-        The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame
-
-.. autofunction:: cuda.bindings.driver.cuGraphicsGLRegisterBuffer
-.. autofunction:: cuda.bindings.driver.cuGraphicsGLRegisterImage
-.. autofunction:: cuda.bindings.driver.cuGLGetDevices
-
-Profiler Control
-----------------
-
-This section describes the profiler control functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuProfilerStart
-.. autofunction:: cuda.bindings.driver.cuProfilerStop
-
-VDPAU Interoperability
-----------------------
-
-This section describes the VDPAU interoperability functions of the low-level CUDA driver application programming interface.
-
-.. autofunction:: cuda.bindings.driver.cuVDPAUGetDevice
-.. autofunction:: cuda.bindings.driver.cuVDPAUCtxCreate
-.. autofunction:: cuda.bindings.driver.cuGraphicsVDPAURegisterVideoSurface
-.. autofunction:: cuda.bindings.driver.cuGraphicsVDPAURegisterOutputSurface
diff --git a/docs_src/source/module/nvrtc.rst b/docs_src/source/module/nvrtc.rst
deleted file mode 100644
index 2a1297c1..00000000
--- a/docs_src/source/module/nvrtc.rst
+++ /dev/null
@@ -1,1119 +0,0 @@
------
-nvrtc
------
-
-Error Handling
---------------
-
-NVRTC defines the following enumeration type and function for API call error handling.
-
-.. autoclass:: cuda.bindings.nvrtc.nvrtcResult
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_SUCCESS
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_OUT_OF_MEMORY
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_PROGRAM_CREATION_FAILURE
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_INVALID_INPUT
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_INVALID_PROGRAM
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_INVALID_OPTION
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_COMPILATION
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_BUILTIN_OPERATION_FAILURE
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_INTERNAL_ERROR
-
-
-    .. autoattribute:: cuda.bindings.nvrtc.nvrtcResult.NVRTC_ERROR_TIME_FILE_WRITE_FAILED
-
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetErrorString
-
-General Information Query
--------------------------
-
-NVRTC defines the following function for general information query.
-
-.. autofunction:: cuda.bindings.nvrtc.nvrtcVersion
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetNumSupportedArchs
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetSupportedArchs
-
-Compilation
------------
-
-NVRTC defines the following type and functions for actual compilation.
-
-.. autoclass:: cuda.bindings.nvrtc.nvrtcProgram
-.. autofunction:: cuda.bindings.nvrtc.nvrtcCreateProgram
-.. autofunction:: cuda.bindings.nvrtc.nvrtcDestroyProgram
-.. autofunction:: cuda.bindings.nvrtc.nvrtcCompileProgram
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetPTXSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetPTX
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetCUBINSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetCUBIN
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetNVVMSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetNVVM
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetLTOIRSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetLTOIR
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetOptiXIRSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetOptiXIR
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetProgramLogSize
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetProgramLog
-.. autofunction:: cuda.bindings.nvrtc.nvrtcAddNameExpression
-.. autofunction:: cuda.bindings.nvrtc.nvrtcGetLoweredName
-
-Supported Compile Options
--------------------------
-
-NVRTC supports the compile options below. Option names with two preceding dashs (``--``\ ) are long option names and option names with one preceding dash (``-``\ ) are short option names. Short option names can be used instead of long option names. When a compile option takes an argument, an assignment operator (``=``\ ) is used to separate the compile option argument from the compile option name, e.g., ``"--gpu-architecture=compute_60"``\ . Alternatively, the compile option name and the argument can be specified in separate strings without an assignment operator, .e.g, ``"--gpu-architecture"``\  ``"compute_60"``\ . Single-character short option names, such as ``-D``\ , ``-U``\ , and ``-I``\ , do not require an assignment operator, and the compile option name and the argument can be present in the same string with or without spaces between them. For instance, ``"-D=<def>"``\ , ``"-D<def>"``\ , and ``"-D <def>"``\  are all supported.
-
-
-
-The valid compiler options are:
-
-
-
-
-
-- Compilation targets
-
-
-
-
-
-  - ``--gpu-architecture=<arch>``\  (``-arch``\ )
-
-
-
-    Specify the name of the class of GPU architectures for which the input must be compiled.
-
-
-
-
-
-
-
-    - Valid ``<arch>``\ s:
-
-
-
-
-
-      - ``compute_50``\  
-
-
-
-
-
-
-
-      - ``compute_52``\  
-
-
-
-
-
-
-
-      - ``compute_53``\  
-
-
-
-
-
-
-
-      - ``compute_60``\  
-
-
-
-
-
-
-
-      - ``compute_61``\  
-
-
-
-
-
-
-
-      - ``compute_62``\  
-
-
-
-
-
-
-
-      - ``compute_70``\  
-
-
-
-
-
-
-
-      - ``compute_72``\  
-
-
-
-
-
-
-
-      - ``compute_75``\  
-
-
-
-
-
-
-
-      - ``compute_80``\  
-
-
-
-
-
-
-
-      - ``compute_87``\  
-
-
-
-
-
-
-
-      - ``compute_89``\  
-
-
-
-
-
-
-
-      - ``compute_90``\  
-
-
-
-
-
-
-
-      - ``compute_90a``\  
-
-
-
-
-
-
-
-      - ``sm_50``\  
-
-
-
-
-
-
-
-      - ``sm_52``\  
-
-
-
-
-
-
-
-      - ``sm_53``\  
-
-
-
-
-
-
-
-      - ``sm_60``\  
-
-
-
-
-
-
-
-      - ``sm_61``\  
-
-
-
-
-
-
-
-      - ``sm_62``\  
-
-
-
-
-
-
-
-      - ``sm_70``\  
-
-
-
-
-
-
-
-      - ``sm_72``\  
-
-
-
-
-
-
-
-      - ``sm_75``\  
-
-
-
-
-
-
-
-      - ``sm_80``\  
-
-
-
-
-
-
-
-      - ``sm_87``\  
-
-
-
-
-
-
-
-      - ``sm_89``\  
-
-
-
-
-
-
-
-      - ``sm_90``\  
-
-
-
-
-
-
-
-      - ``sm_90a``\  
-
-
-
-
-
-
-
-
-
-    - Default: ``compute_52``\  
-
-
-
-
-
-
-
-
-
-
-
-- Separate compilation / whole-program compilation
-
-
-
-
-
-  - ``--device-c``\  (``-dc``\ )
-
-
-
-    Generate relocatable code that can be linked with other relocatable device code. It is equivalent to --relocatable-device-code=true.
-
-
-
-
-
-
-
-  - ``--device-w``\  (``-dw``\ )
-
-
-
-    Generate non-relocatable code. It is equivalent to ``--relocatable-device-code=false``\ .
-
-
-
-
-
-
-
-  - ``--relocatable-device-code={true|false}``\  (``-rdc``\ )
-
-
-
-    Enable (disable) the generation of relocatable device code.
-
-
-
-
-
-    - Default: ``false``\  
-
-
-
-
-
-
-
-
-
-  - ``--extensible-whole-program``\  (``-ewp``\ )
-
-
-
-    Do extensible whole program compilation of device code.
-
-
-
-
-
-    - Default: ``false``\  
-
-
-
-
-
-
-
-
-
-
-
-- Debugging support
-
-
-
-
-
-  - ``--device-debug``\  (``-G``\ )
-
-
-
-    Generate debug information. If --dopt is not specified, then turns off all optimizations.
-
-
-
-
-
-
-
-  - ``--generate-line-info``\  (``-lineinfo``\ )
-
-
-
-    Generate line-number information.
-
-
-
-
-
-
-
-
-
-- Code generation
-
-
-
-
-
-  - ``--dopt``\  on (``-dopt``\ )
-
-
-
-
-
-
-
-
-
-  - ``--dopt=on``\  
-
-
-
-    Enable device code optimization. When specified along with '-G', enables limited debug information generation for optimized device code (currently, only line number information). When '-G' is not specified, '-dopt=on' is implicit.
-
-
-
-
-
-
-
-  - ``--ptxas-options``\  <options> (``-Xptxas``\ )
-
-
-
-
-
-
-
-
-
-  - ``--ptxas-options=<options>``\  
-
-
-
-    Specify options directly to ptxas, the PTX optimizing assembler.
-
-
-
-
-
-
-
-  - ``--maxrregcount=<N>``\  (``-maxrregcount``\ )
-
-
-
-    Specify the maximum amount of registers that GPU functions can use. Until a function-specific limit, a higher value will generally increase the performance of individual GPU threads that execute this function. However, because thread registers are allocated from a global register pool on each GPU, a higher value of this option will also reduce the maximum thread block size, thereby reducing the amount of thread parallelism. Hence, a good maxrregcount value is the result of a trade-off. If this option is not specified, then no maximum is assumed. Value less than the minimum registers required by ABI will be bumped up by the compiler to ABI minimum limit.
-
-
-
-
-
-
-
-  - ``--ftz={true|false}``\  (``-ftz``\ )
-
-
-
-    When performing single-precision floating-point operations, flush denormal values to zero or preserve denormal values. ``--use_fast_math``\  implies ``--ftz=true``\ .
-
-
-
-
-
-    - Default: ``false``\  
-
-
-
-
-
-
-
-
-
-  - ``--prec-sqrt={true|false}``\  (``-prec-sqrt``\ )
-
-
-
-    For single-precision floating-point square root, use IEEE round-to-nearest mode or use a faster approximation. ``--use_fast_math``\  implies ``--prec-sqrt=false``\ .
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-  - ``--prec-div={true|false}``\  (``-prec-div``\ )
-
-
-
-    For single-precision floating-point division and reciprocals, use IEEE round-to-nearest mode or use a faster approximation. ``--use_fast_math``\  implies ``--prec-div=false``\ .
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-  - ``--fmad={true|false}``\  (``-fmad``\ )
-
-
-
-    Enables (disables) the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add operations (FMAD, FFMA, or DFMA). ``--use_fast_math``\  implies ``--fmad=true``\ .
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-  - ``--use_fast_math``\  (``-use_fast_math``\ )
-
-
-
-    Make use of fast math operations. ``--use_fast_math``\  implies ``--ftz=true``\  ``--prec-div=false``\  ``--prec-sqrt=false``\  ``--fmad=true``\ .
-
-
-
-
-
-
-
-  - ``--extra-device-vectorization``\  (``-extra-device-vectorization``\ )
-
-
-
-    Enables more aggressive device code vectorization in the NVVM optimizer.
-
-
-
-
-
-
-
-  - ``--modify-stack-limit={true|false}``\  (``-modify-stack-limit``\ )
-
-
-
-    On Linux, during compilation, use ``setrlimit()``\  to increase stack size to maximum allowed. The limit is reset to the previous value at the end of compilation. Note: ``setrlimit()``\  changes the value for the entire process.
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-  - ``--dlink-time-opt``\  (``-dlto``\ )
-
-
-
-    Generate intermediate code for later link-time optimization. It implies ``-rdc=true``\ . Note: when this option is used the nvrtcGetLTOIR API should be used, as PTX or Cubin will not be generated.
-
-
-
-
-
-
-
-  - ``--gen-opt-lto``\  (``-gen-opt-lto``\ )
-
-
-
-    Run the optimizer passes before generating the LTO IR.
-
-
-
-
-
-
-
-  - ``--optix-ir``\  (``-optix-ir``\ )
-
-
-
-    Generate OptiX IR. The Optix IR is only intended for consumption by OptiX through appropriate APIs. This feature is not supported with link-time-optimization (``-dlto``\ )
-
-. Note: when this option is used the nvrtcGetOptiX API should be used, as PTX or Cubin will not be generated.
-
-
-
-
-
-
-
-  - ``--jump-table-density=``\ [0-101] (``-jtd``\ )
-
-
-
-    Specify the case density percentage in switch statements, and use it as a minimal threshold to determine whether jump table(brx.idx instruction) will be used to implement a switch statement. Default value is 101. The percentage ranges from 0 to 101 inclusively.
-
-
-
-
-
-
-
-  - ``--device-stack-protector={true|false}``\  (``-device-stack-protector``\ )
-
-
-
-    Enable (disable) the generation of stack canaries in device code.
-
-
-
-
-
-
-
-    - Default: ``false``\  
-
-
-
-
-
-
-
-
-
-
-
-- Preprocessing
-
-
-
-
-
-  - ``--define-macro=<def>``\  (``-D``\ )
-
-
-
-    ``<def>``\  can be either ``<name>``\  or ``<name=definitions>``\ .
-
-
-
-
-
-    - ``<name>``\  
-
-
-
-      Predefine ``<name>``\  as a macro with definition ``1``\ .
-
-
-
-
-
-
-
-    - ``<name>=<definition>``\  
-
-
-
-      The contents of ``<definition>``\  are tokenized and preprocessed as if they appeared during translation phase three in a ``#define``\  directive. In particular, the definition will be truncated by embedded new line characters.
-
-
-
-
-
-
-
-
-
-  - ``--undefine-macro=<def>``\  (``-U``\ )
-
-
-
-    Cancel any previous definition of ``<def>``\ .
-
-
-
-
-
-
-
-  - ``--include-path=<dir>``\  (``-I``\ )
-
-
-
-    Add the directory ``<dir>``\  to the list of directories to be searched for headers. These paths are searched after the list of headers given to nvrtcCreateProgram.
-
-
-
-
-
-
-
-  - ``--pre-include=<header>``\  (``-include``\ )
-
-
-
-    Preinclude ``<header>``\  during preprocessing.
-
-
-
-
-
-
-
-  - ``--no-source-include``\  (``-no-source-include``\ ) The preprocessor by default adds the directory of each input sources to the include path. This option disables this feature and only considers the path specified explicitly.
-
-
-
-
-
-
-
-
-
-- Language Dialect
-
-
-
-
-
-  - ``--std={c++03|c++11|c++14|c++17|c++20}``\  (``-std={c++11|c++14|c++17|c++20}``\ )
-
-
-
-    Set language dialect to C++03, C++11, C++14, C++17 or C++20
-
-
-
-
-
-    - Default: ``c++17``\  
-
-
-
-
-
-
-
-
-
-  - ``--builtin-move-forward={true|false}``\  (``-builtin-move-forward``\ )
-
-
-
-    Provide builtin definitions of ``std::move``\  and ``std::forward``\ , when C++11 or later language dialect is selected.
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-  - ``--builtin-initializer-list={true|false}``\  (``-builtin-initializer-list``\ )
-
-
-
-    Provide builtin definitions of ``std::initializer_list``\  class and member functions when C++11 or later language dialect is selected.
-
-
-
-
-
-    - Default: ``true``\  
-
-
-
-
-
-
-
-
-
-
-
-- Misc.
-
-
-
-
-
-  - ``--disable-warnings``\  (``-w``\ )
-
-
-
-    Inhibit all warning messages.
-
-
-
-
-
-
-
-  - ``--restrict``\  (``-restrict``\ )
-
-
-
-    Programmer assertion that all kernel pointer parameters are restrict pointers.
-
-
-
-
-
-
-
-  - ``--device-as-default-execution-space``\  (``-default-device``\ )
-
-
-
-    Treat entities with no execution space annotation as ``device``\  entities.
-
-
-
-
-
-
-
-  - ``--device-int128``\  (``-device-int128``\ )
-
-
-
-    Allow the ``__int128``\  type in device code. Also causes the macro ``CUDACC_RTC_INT128``\  to be defined.
-
-
-
-
-
-
-
-  - ``--optimization-info=<kind>``\  (``-opt-info``\ )
-
-
-
-    Provide optimization reports for the specified kind of optimization. The following kind tags are supported:
-
-
-
-
-
-    - ``inline``\  : emit a remark when a function is inlined.
-
-
-
-
-
-
-
-
-
-  - ``--display-error-number``\  (``-err-no``\ )
-
-
-
-    Display diagnostic number for warning messages. (Default)
-
-
-
-
-
-
-
-  - ``--no-display-error-number``\  (``-no-err-no``\ )
-
-
-
-    Disables the display of a diagnostic number for warning messages.
-
-
-
-
-
-
-
-  - ``--diag-error=<error-number>``\ ,... (``-diag-error``\ )
-
-
-
-    Emit error for specified diagnostic message number(s). Message numbers can be separated by comma.
-
-
-
-
-
-
-
-  - ``--diag-suppress=<error-number>``\ ,... (``-diag-suppress``\ )
-
-
-
-    Suppress specified diagnostic message number(s). Message numbers can be separated by comma.
-
-
-
-
-
-
-
-  - ``--diag-warn=<error-number>``\ ,... (``-diag-warn``\ )
-
-
-
-    Emit warning for specified diagnostic message number(s). Message numbers can be separated by comma.
-
-
-
-
-
-
-
-  - ``--brief-diagnostics={true|false}``\  (``-brief-diag``\ )
-
-
-
-    This option disables or enables showing source line and column info in a diagnostic. The --brief-diagnostics=true will not show the source line and column info.
-
-
-
-
-
-    - Default: ``false``\  
-
-
-
-
-
-
-
-
-
-  - ``--time=<file-name>``\  (``-time``\ )
-
-
-
-    Generate a comma separated value table with the time taken by each compilation phase, and append it at the end of the file given as the option argument. If the file does not exist, the column headings are generated in the first row of the table. If the file name is '-', the timing data is written to the compilation log.
-
-
-
-
-
-
-
-  - ``--split-compile=``\  <number of threads> (``-split-compile=``\  <number of threads>)
-
-
-
-    Perform compiler optimizations in parallel. Split compilation attempts to reduce compile time by enabling the compiler to run certain optimization passes concurrently. This option accepts a numerical value that specifies the maximum number of threads the compiler can use. One can also allow the compiler to use the maximum threads available on the system by setting --split-compile=0. Setting --split-compile=1 will cause this option to be ignored.
-
-
-
-
-
-
-
-  - ``--fdevice-syntax-only``\  (``-fdevice-syntax-only``\ )
-
-
-
-    Ends device compilation after front-end syntax checking. This option does not generate valid device code.
-
-
-
-
-
-
-
-  - ``--minimal``\  (``-minimal``\ )
-
-
-
-    Omit certain language features to reduce compile time for small programs. In particular, the following are omitted:
-
-
-
-
-
-    - Texture and surface functions and associated types, e.g., ``cudaTextureObject_t``\ .
-
-
-
-
-
-
-
-    - CUDA Runtime Functions that are provided by the cudadevrt device code library, typically named with prefix "cuda", e.g., ``cudaMalloc``\ .
-
-
-
-
-
-
-
-    - Kernel launch from device code.
-
-
-
-
-
-
-
-    - Types and macros associated with CUDA Runtime and Driver APIs, provided by cuda/tools/cudart/driver_types.h, typically named with prefix "cuda", e.g., ``cudaError_t``\ .
-
-
-
-
-
-
-
-
-
-  - ``--device-stack-protector``\  (``-device-stack-protector``\ )
-
-
-
-    Enable stack canaries in device code. Stack canaries make it more difficult to exploit certain types of memory safety bugs involving stack-local variables. The compiler uses heuristics to assess the risk of such a bug in each function. Only those functions which are deemed high-risk make use of a stack canary.
-
diff --git a/docs_src/source/module/runtime.rst b/docs_src/source/module/runtime.rst
deleted file mode 100644
index 55687b68..00000000
--- a/docs_src/source/module/runtime.rst
+++ /dev/null
@@ -1,5274 +0,0 @@
--------
-runtime
--------
-
-Profiler Control
-----------------
-
-This section describes the profiler control functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaProfilerStart
-.. autofunction:: cuda.bindings.runtime.cudaProfilerStop
-
-Device Management
------------------
-
-impl_private
-
-
-
-
-
-
-
-This section describes the device management functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaDeviceReset
-.. autofunction:: cuda.bindings.runtime.cudaDeviceSynchronize
-.. autofunction:: cuda.bindings.runtime.cudaDeviceSetLimit
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetLimit
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetTexture1DLinearMaxWidth
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetCacheConfig
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetStreamPriorityRange
-.. autofunction:: cuda.bindings.runtime.cudaDeviceSetCacheConfig
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetByPCIBusId
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetPCIBusId
-.. autofunction:: cuda.bindings.runtime.cudaIpcGetEventHandle
-.. autofunction:: cuda.bindings.runtime.cudaIpcOpenEventHandle
-.. autofunction:: cuda.bindings.runtime.cudaIpcGetMemHandle
-.. autofunction:: cuda.bindings.runtime.cudaIpcOpenMemHandle
-.. autofunction:: cuda.bindings.runtime.cudaIpcCloseMemHandle
-.. autofunction:: cuda.bindings.runtime.cudaDeviceFlushGPUDirectRDMAWrites
-.. autofunction:: cuda.bindings.runtime.cudaDeviceRegisterAsyncNotification
-.. autofunction:: cuda.bindings.runtime.cudaDeviceUnregisterAsyncNotification
-.. autofunction:: cuda.bindings.runtime.cudaGetDeviceCount
-.. autofunction:: cuda.bindings.runtime.cudaGetDeviceProperties
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetDefaultMemPool
-.. autofunction:: cuda.bindings.runtime.cudaDeviceSetMemPool
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetMemPool
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetNvSciSyncAttributes
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetP2PAttribute
-.. autofunction:: cuda.bindings.runtime.cudaChooseDevice
-.. autofunction:: cuda.bindings.runtime.cudaInitDevice
-.. autofunction:: cuda.bindings.runtime.cudaSetDevice
-.. autofunction:: cuda.bindings.runtime.cudaGetDevice
-.. autofunction:: cuda.bindings.runtime.cudaSetDeviceFlags
-.. autofunction:: cuda.bindings.runtime.cudaGetDeviceFlags
-
-Error Handling
---------------
-
-This section describes the error handling functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaGetLastError
-.. autofunction:: cuda.bindings.runtime.cudaPeekAtLastError
-.. autofunction:: cuda.bindings.runtime.cudaGetErrorName
-.. autofunction:: cuda.bindings.runtime.cudaGetErrorString
-
-Stream Management
------------------
-
-This section describes the stream management functions of the CUDA runtime application programming interface.
-
-.. autoclass:: cuda.bindings.runtime.cudaStreamCallback_t
-.. autofunction:: cuda.bindings.runtime.cudaStreamCreate
-.. autofunction:: cuda.bindings.runtime.cudaStreamCreateWithFlags
-.. autofunction:: cuda.bindings.runtime.cudaStreamCreateWithPriority
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetPriority
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetFlags
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetId
-.. autofunction:: cuda.bindings.runtime.cudaCtxResetPersistingL2Cache
-.. autofunction:: cuda.bindings.runtime.cudaStreamCopyAttributes
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaStreamSetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaStreamDestroy
-.. autofunction:: cuda.bindings.runtime.cudaStreamWaitEvent
-.. autofunction:: cuda.bindings.runtime.cudaStreamAddCallback
-.. autofunction:: cuda.bindings.runtime.cudaStreamSynchronize
-.. autofunction:: cuda.bindings.runtime.cudaStreamQuery
-.. autofunction:: cuda.bindings.runtime.cudaStreamAttachMemAsync
-.. autofunction:: cuda.bindings.runtime.cudaStreamBeginCapture
-.. autofunction:: cuda.bindings.runtime.cudaStreamBeginCaptureToGraph
-.. autofunction:: cuda.bindings.runtime.cudaThreadExchangeStreamCaptureMode
-.. autofunction:: cuda.bindings.runtime.cudaStreamEndCapture
-.. autofunction:: cuda.bindings.runtime.cudaStreamIsCapturing
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetCaptureInfo
-.. autofunction:: cuda.bindings.runtime.cudaStreamGetCaptureInfo_v3
-.. autofunction:: cuda.bindings.runtime.cudaStreamUpdateCaptureDependencies
-.. autofunction:: cuda.bindings.runtime.cudaStreamUpdateCaptureDependencies_v2
-
-Event Management
-----------------
-
-This section describes the event management functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaEventCreate
-.. autofunction:: cuda.bindings.runtime.cudaEventCreateWithFlags
-.. autofunction:: cuda.bindings.runtime.cudaEventRecord
-.. autofunction:: cuda.bindings.runtime.cudaEventRecordWithFlags
-.. autofunction:: cuda.bindings.runtime.cudaEventQuery
-.. autofunction:: cuda.bindings.runtime.cudaEventSynchronize
-.. autofunction:: cuda.bindings.runtime.cudaEventDestroy
-.. autofunction:: cuda.bindings.runtime.cudaEventElapsedTime
-
-External Resource Interoperability
-----------------------------------
-
-This section describes the external resource interoperability functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaImportExternalMemory
-.. autofunction:: cuda.bindings.runtime.cudaExternalMemoryGetMappedBuffer
-.. autofunction:: cuda.bindings.runtime.cudaExternalMemoryGetMappedMipmappedArray
-.. autofunction:: cuda.bindings.runtime.cudaDestroyExternalMemory
-.. autofunction:: cuda.bindings.runtime.cudaImportExternalSemaphore
-.. autofunction:: cuda.bindings.runtime.cudaSignalExternalSemaphoresAsync
-.. autofunction:: cuda.bindings.runtime.cudaWaitExternalSemaphoresAsync
-.. autofunction:: cuda.bindings.runtime.cudaDestroyExternalSemaphore
-
-Execution Control
------------------
-
-This section describes the execution control functions of the CUDA runtime application programming interface.
-
-
-
-Some functions have overloaded C++ API template versions documented separately in the C++ API Routines module.
-
-.. autofunction:: cuda.bindings.runtime.cudaFuncSetCacheConfig
-.. autofunction:: cuda.bindings.runtime.cudaFuncGetAttributes
-.. autofunction:: cuda.bindings.runtime.cudaFuncSetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaLaunchHostFunc
-
-Occupancy
----------
-
-This section describes the occupancy calculation functions of the CUDA runtime application programming interface.
-
-
-
-Besides the occupancy calculator functions (cudaOccupancyMaxActiveBlocksPerMultiprocessor and cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags), there are also C++ only occupancy-based launch configuration functions documented in C++ API Routines module.
-
-
-
-See cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API) cudaOccupancyAvailableDynamicSMemPerBlock (C++ API),
-
-.. autofunction:: cuda.bindings.runtime.cudaOccupancyMaxActiveBlocksPerMultiprocessor
-.. autofunction:: cuda.bindings.runtime.cudaOccupancyAvailableDynamicSMemPerBlock
-.. autofunction:: cuda.bindings.runtime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
-
-Memory Management
------------------
-
-This section describes the memory management functions of the CUDA runtime application programming interface.
-
-
-
-Some functions have overloaded C++ API template versions documented separately in the C++ API Routines module.
-
-.. autofunction:: cuda.bindings.runtime.cudaMallocManaged
-.. autofunction:: cuda.bindings.runtime.cudaMalloc
-.. autofunction:: cuda.bindings.runtime.cudaMallocHost
-.. autofunction:: cuda.bindings.runtime.cudaMallocPitch
-.. autofunction:: cuda.bindings.runtime.cudaMallocArray
-.. autofunction:: cuda.bindings.runtime.cudaFree
-.. autofunction:: cuda.bindings.runtime.cudaFreeHost
-.. autofunction:: cuda.bindings.runtime.cudaFreeArray
-.. autofunction:: cuda.bindings.runtime.cudaFreeMipmappedArray
-.. autofunction:: cuda.bindings.runtime.cudaHostAlloc
-.. autofunction:: cuda.bindings.runtime.cudaHostRegister
-.. autofunction:: cuda.bindings.runtime.cudaHostUnregister
-.. autofunction:: cuda.bindings.runtime.cudaHostGetDevicePointer
-.. autofunction:: cuda.bindings.runtime.cudaHostGetFlags
-.. autofunction:: cuda.bindings.runtime.cudaMalloc3D
-.. autofunction:: cuda.bindings.runtime.cudaMalloc3DArray
-.. autofunction:: cuda.bindings.runtime.cudaMallocMipmappedArray
-.. autofunction:: cuda.bindings.runtime.cudaGetMipmappedArrayLevel
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy3D
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy3DPeer
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy3DAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy3DPeerAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemGetInfo
-.. autofunction:: cuda.bindings.runtime.cudaArrayGetInfo
-.. autofunction:: cuda.bindings.runtime.cudaArrayGetPlane
-.. autofunction:: cuda.bindings.runtime.cudaArrayGetMemoryRequirements
-.. autofunction:: cuda.bindings.runtime.cudaMipmappedArrayGetMemoryRequirements
-.. autofunction:: cuda.bindings.runtime.cudaArrayGetSparseProperties
-.. autofunction:: cuda.bindings.runtime.cudaMipmappedArrayGetSparseProperties
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy
-.. autofunction:: cuda.bindings.runtime.cudaMemcpyPeer
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2D
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DToArray
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DFromArray
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DArrayToArray
-.. autofunction:: cuda.bindings.runtime.cudaMemcpyAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemcpyPeerAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DToArrayAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemcpy2DFromArrayAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemset
-.. autofunction:: cuda.bindings.runtime.cudaMemset2D
-.. autofunction:: cuda.bindings.runtime.cudaMemset3D
-.. autofunction:: cuda.bindings.runtime.cudaMemsetAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemset2DAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemset3DAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemPrefetchAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemPrefetchAsync_v2
-.. autofunction:: cuda.bindings.runtime.cudaMemAdvise
-.. autofunction:: cuda.bindings.runtime.cudaMemAdvise_v2
-.. autofunction:: cuda.bindings.runtime.cudaMemRangeGetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaMemRangeGetAttributes
-.. autofunction:: cuda.bindings.runtime.make_cudaPitchedPtr
-.. autofunction:: cuda.bindings.runtime.make_cudaPos
-.. autofunction:: cuda.bindings.runtime.make_cudaExtent
-
-Stream Ordered Memory Allocator
--------------------------------
-
-**overview**
-
-
-
-The asynchronous allocator allows the user to allocate and free in stream order. All asynchronous accesses of the allocation must happen between the stream executions of the allocation and the free. If the memory is accessed outside of the promised stream order, a use before allocation / use after free error will cause undefined behavior.
-
-The allocator is free to reallocate the memory as long as it can guarantee that compliant memory accesses will not overlap temporally. The allocator may refer to internal stream ordering as well as inter-stream dependencies (such as CUDA events and null stream dependencies) when establishing the temporal guarantee. The allocator may also insert inter-stream dependencies to establish the temporal guarantee.
-
-
-
-
-
-**Supported Platforms**
-
-
-
-Whether or not a device supports the integrated stream ordered memory allocator may be queried by calling cudaDeviceGetAttribute() with the device attribute cudaDevAttrMemoryPoolsSupported.
-
-.. autofunction:: cuda.bindings.runtime.cudaMallocAsync
-.. autofunction:: cuda.bindings.runtime.cudaFreeAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolTrimTo
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolSetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolGetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolSetAccess
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolGetAccess
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolCreate
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolDestroy
-.. autofunction:: cuda.bindings.runtime.cudaMallocFromPoolAsync
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolExportToShareableHandle
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolImportFromShareableHandle
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolExportPointer
-.. autofunction:: cuda.bindings.runtime.cudaMemPoolImportPointer
-
-Unified Addressing
-------------------
-
-This section describes the unified addressing functions of the CUDA runtime application programming interface.
-
-
-
-
-
-**Overview**
-
-
-
-CUDA devices can share a unified address space with the host. 
-
- For these devices there is no distinction between a device pointer and a host pointer -- the same pointer value may be used to access memory from the host program and from a kernel running on the device (with exceptions enumerated below).
-
-
-
-
-
-**Supported Platforms**
-
-
-
-Whether or not a device supports unified addressing may be queried by calling cudaGetDeviceProperties() with the device property cudaDeviceProp::unifiedAddressing.
-
-Unified addressing is automatically enabled in 64-bit processes .
-
-
-
-
-
-**Looking Up Information from Pointer Values**
-
-
-
-It is possible to look up information about the memory which backs a pointer value. For instance, one may want to know if a pointer points to host or device memory. As another example, in the case of device memory, one may want to know on which CUDA device the memory resides. These properties may be queried using the function cudaPointerGetAttributes()
-
-Since pointers are unique, it is not necessary to specify information about the pointers specified to cudaMemcpy() and other copy functions. 
-
- The copy direction cudaMemcpyDefault may be used to specify that the CUDA runtime should infer the location of the pointer from its value.
-
-
-
-
-
-**Automatic Mapping of Host Allocated Host Memory**
-
-
-
-All host memory allocated through all devices using cudaMallocHost() and cudaHostAlloc() is always directly accessible from all devices that support unified addressing. This is the case regardless of whether or not the flags cudaHostAllocPortable and cudaHostAllocMapped are specified.
-
-The pointer value through which allocated host memory may be accessed in kernels on all devices that support unified addressing is the same as the pointer value through which that memory is accessed on the host. It is not necessary to call cudaHostGetDevicePointer() to get the device pointer for these allocations. 
-
-
-
-Note that this is not the case for memory allocated using the flag cudaHostAllocWriteCombined, as discussed below.
-
-
-
-
-
-**Direct Access of Peer Memory**
-
-
-
-Upon enabling direct access from a device that supports unified addressing to another peer device that supports unified addressing using cudaDeviceEnablePeerAccess() all memory allocated in the peer device using cudaMalloc() and cudaMallocPitch() will immediately be accessible by the current device. The device pointer value through which any peer's memory may be accessed in the current device is the same pointer value through which that memory may be accessed from the peer device.
-
-
-
-
-
-**Exceptions, Disjoint Addressing**
-
-
-
-Not all memory may be accessed on devices through the same pointer value through which they are accessed on the host. These exceptions are host memory registered using cudaHostRegister() and host memory allocated using the flag cudaHostAllocWriteCombined. For these exceptions, there exists a distinct host and device address for the memory. The device address is guaranteed to not overlap any valid host pointer range and is guaranteed to have the same value across all devices that support unified addressing. 
-
-
-
-This device address may be queried using cudaHostGetDevicePointer() when a device using unified addressing is current. Either the host or the unified device pointer value may be used to refer to this memory in cudaMemcpy() and similar functions using the cudaMemcpyDefault memory direction.
-
-.. autofunction:: cuda.bindings.runtime.cudaPointerGetAttributes
-
-Peer Device Memory Access
--------------------------
-
-This section describes the peer device memory access functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaDeviceCanAccessPeer
-.. autofunction:: cuda.bindings.runtime.cudaDeviceEnablePeerAccess
-.. autofunction:: cuda.bindings.runtime.cudaDeviceDisablePeerAccess
-
-OpenGL Interoperability
------------------------
-
-impl_private
-
-
-
-This section describes the OpenGL interoperability functions of the CUDA runtime application programming interface. Note that mapping of OpenGL resources is performed with the graphics API agnostic, resource mapping interface described in Graphics Interopability.
-
-.. autoclass:: cuda.bindings.runtime.cudaGLDeviceList
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGLDeviceList.cudaGLDeviceListAll
-
-
-        The CUDA devices for all GPUs used by the current OpenGL context
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGLDeviceList.cudaGLDeviceListCurrentFrame
-
-
-        The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGLDeviceList.cudaGLDeviceListNextFrame
-
-
-        The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame
-
-.. autofunction:: cuda.bindings.runtime.cudaGLGetDevices
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsGLRegisterImage
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsGLRegisterBuffer
-
-Direct3D 9 Interoperability
----------------------------
-
-
-
-
-Direct3D 10 Interoperability
-----------------------------
-
-
-
-
-Direct3D 11 Interoperability
-----------------------------
-
-
-
-
-VDPAU Interoperability
-----------------------
-
-This section describes the VDPAU interoperability functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaVDPAUGetDevice
-.. autofunction:: cuda.bindings.runtime.cudaVDPAUSetVDPAUDevice
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsVDPAURegisterVideoSurface
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsVDPAURegisterOutputSurface
-
-EGL Interoperability
---------------------
-
-This section describes the EGL interoperability functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsEGLRegisterImage
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamConsumerConnect
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamConsumerConnectWithFlags
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamConsumerDisconnect
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamConsumerAcquireFrame
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamConsumerReleaseFrame
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamProducerConnect
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamProducerDisconnect
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamProducerPresentFrame
-.. autofunction:: cuda.bindings.runtime.cudaEGLStreamProducerReturnFrame
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsResourceGetMappedEglFrame
-.. autofunction:: cuda.bindings.runtime.cudaEventCreateFromEGLSync
-
-Graphics Interoperability
--------------------------
-
-This section describes the graphics interoperability functions of the CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsUnregisterResource
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsResourceSetMapFlags
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsMapResources
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsUnmapResources
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsResourceGetMappedPointer
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsSubResourceGetMappedArray
-.. autofunction:: cuda.bindings.runtime.cudaGraphicsResourceGetMappedMipmappedArray
-
-Texture Object Management
--------------------------
-
-This section describes the low level texture object management functions of the CUDA runtime application programming interface. The texture object API is only supported on devices of compute capability 3.0 or higher.
-
-.. autofunction:: cuda.bindings.runtime.cudaGetChannelDesc
-.. autofunction:: cuda.bindings.runtime.cudaCreateChannelDesc
-.. autofunction:: cuda.bindings.runtime.cudaCreateTextureObject
-.. autofunction:: cuda.bindings.runtime.cudaDestroyTextureObject
-.. autofunction:: cuda.bindings.runtime.cudaGetTextureObjectResourceDesc
-.. autofunction:: cuda.bindings.runtime.cudaGetTextureObjectTextureDesc
-.. autofunction:: cuda.bindings.runtime.cudaGetTextureObjectResourceViewDesc
-
-Surface Object Management
--------------------------
-
-This section describes the low level texture object management functions of the CUDA runtime application programming interface. The surface object API is only supported on devices of compute capability 3.0 or higher.
-
-.. autofunction:: cuda.bindings.runtime.cudaCreateSurfaceObject
-.. autofunction:: cuda.bindings.runtime.cudaDestroySurfaceObject
-.. autofunction:: cuda.bindings.runtime.cudaGetSurfaceObjectResourceDesc
-
-Version Management
-------------------
-
-
-
-.. autofunction:: cuda.bindings.runtime.cudaDriverGetVersion
-.. autofunction:: cuda.bindings.runtime.cudaRuntimeGetVersion
-.. autofunction:: cuda.bindings.runtime.getLocalRuntimeVersion
-
-Graph Management
-----------------
-
-This section describes the graph management functions of CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaGraphCreate
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddKernelNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphKernelNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphKernelNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphKernelNodeCopyAttributes
-.. autofunction:: cuda.bindings.runtime.cudaGraphKernelNodeGetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaGraphKernelNodeSetAttribute
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddMemcpyNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddMemcpyNode1D
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemcpyNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemcpyNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemcpyNodeSetParams1D
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddMemsetNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemsetNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemsetNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddHostNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphHostNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphHostNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddChildGraphNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphChildGraphNodeGetGraph
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddEmptyNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddEventRecordNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphEventRecordNodeGetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphEventRecordNodeSetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddEventWaitNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphEventWaitNodeGetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphEventWaitNodeSetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddExternalSemaphoresSignalNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphExternalSemaphoresSignalNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExternalSemaphoresSignalNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddExternalSemaphoresWaitNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphExternalSemaphoresWaitNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExternalSemaphoresWaitNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddMemAllocNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemAllocNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddMemFreeNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphMemFreeNodeGetParams
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGraphMemTrim
-.. autofunction:: cuda.bindings.runtime.cudaDeviceGetGraphMemAttribute
-.. autofunction:: cuda.bindings.runtime.cudaDeviceSetGraphMemAttribute
-.. autofunction:: cuda.bindings.runtime.cudaGraphClone
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeFindInClone
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetType
-.. autofunction:: cuda.bindings.runtime.cudaGraphGetNodes
-.. autofunction:: cuda.bindings.runtime.cudaGraphGetRootNodes
-.. autofunction:: cuda.bindings.runtime.cudaGraphGetEdges
-.. autofunction:: cuda.bindings.runtime.cudaGraphGetEdges_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetDependencies
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetDependencies_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetDependentNodes
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetDependentNodes_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddDependencies
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddDependencies_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphRemoveDependencies
-.. autofunction:: cuda.bindings.runtime.cudaGraphRemoveDependencies_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphDestroyNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphInstantiate
-.. autofunction:: cuda.bindings.runtime.cudaGraphInstantiateWithFlags
-.. autofunction:: cuda.bindings.runtime.cudaGraphInstantiateWithParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecGetFlags
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecKernelNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecMemcpyNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecMemcpyNodeSetParams1D
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecMemsetNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecHostNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecChildGraphNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecEventRecordNodeSetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecEventWaitNodeSetEvent
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecExternalSemaphoresSignalNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecExternalSemaphoresWaitNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeSetEnabled
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeGetEnabled
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecUpdate
-.. autofunction:: cuda.bindings.runtime.cudaGraphUpload
-.. autofunction:: cuda.bindings.runtime.cudaGraphLaunch
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecDestroy
-.. autofunction:: cuda.bindings.runtime.cudaGraphDestroy
-.. autofunction:: cuda.bindings.runtime.cudaGraphDebugDotPrint
-.. autofunction:: cuda.bindings.runtime.cudaUserObjectCreate
-.. autofunction:: cuda.bindings.runtime.cudaUserObjectRetain
-.. autofunction:: cuda.bindings.runtime.cudaUserObjectRelease
-.. autofunction:: cuda.bindings.runtime.cudaGraphRetainUserObject
-.. autofunction:: cuda.bindings.runtime.cudaGraphReleaseUserObject
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddNode
-.. autofunction:: cuda.bindings.runtime.cudaGraphAddNode_v2
-.. autofunction:: cuda.bindings.runtime.cudaGraphNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphExecNodeSetParams
-.. autofunction:: cuda.bindings.runtime.cudaGraphConditionalHandleCreate
-
-Driver Entry Point Access
--------------------------
-
-This section describes the driver entry point access functions of CUDA runtime application programming interface.
-
-.. autofunction:: cuda.bindings.runtime.cudaGetDriverEntryPoint
-.. autofunction:: cuda.bindings.runtime.cudaGetDriverEntryPointByVersion
-
-C++ API Routines
-----------------
-C++-style interface built on top of CUDA runtime API.
-impl_private
-
-
-
-
-
-
-
-This section describes the C++ high level API functions of the CUDA runtime application programming interface. To use these functions, your application needs to be compiled with the ``nvcc``\  compiler.
-
-
-Interactions with the CUDA Driver API
--------------------------------------
-
-This section describes the interactions between the CUDA Driver API and the CUDA Runtime API
-
-
-
-
-
-**Primary Contexts**
-
-
-
-There exists a one to one relationship between CUDA devices in the CUDA Runtime API and ::CUcontext s in the CUDA Driver API within a process. The specific context which the CUDA Runtime API uses for a device is called the device's primary context. From the perspective of the CUDA Runtime API, a device and its primary context are synonymous.
-
-
-
-
-
-**Initialization and Tear-Down**
-
-
-
-CUDA Runtime API calls operate on the CUDA Driver API ::CUcontext which is current to to the calling host thread.
-
-The function cudaInitDevice() ensures that the primary context is initialized for the requested device but does not make it current to the calling thread.
-
-The function cudaSetDevice() initializes the primary context for the specified device and makes it current to the calling thread by calling ::cuCtxSetCurrent().
-
-The CUDA Runtime API will automatically initialize the primary context for a device at the first CUDA Runtime API call which requires an active context. If no ::CUcontext is current to the calling thread when a CUDA Runtime API call which requires an active context is made, then the primary context for a device will be selected, made current to the calling thread, and initialized.
-
-The context which the CUDA Runtime API initializes will be initialized using the parameters specified by the CUDA Runtime API functions cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(), ::cudaD3D10SetDirect3DDevice(), ::cudaD3D11SetDirect3DDevice(), cudaGLSetGLDevice(), and cudaVDPAUSetVDPAUDevice(). Note that these functions will fail with cudaErrorSetOnActiveProcess if they are called when the primary context for the specified device has already been initialized. (or if the current device has already been initialized, in the case of cudaSetDeviceFlags()).
-
-Primary contexts will remain active until they are explicitly deinitialized using cudaDeviceReset(). The function cudaDeviceReset() will deinitialize the primary context for the calling thread's current device immediately. The context will remain current to all of the threads that it was current to. The next CUDA Runtime API call on any thread which requires an active context will trigger the reinitialization of that device's primary context.
-
-Note that primary contexts are shared resources. It is recommended that the primary context not be reset except just before exit or to recover from an unspecified launch failure.
-
-
-
-
-
-**Context Interoperability**
-
-
-
-Note that the use of multiple ::CUcontext s per device within a single process will substantially degrade performance and is strongly discouraged. Instead, it is highly recommended that the implicit one-to-one device-to-context mapping for the process provided by the CUDA Runtime API be used.
-
-If a non-primary ::CUcontext created by the CUDA Driver API is current to a thread then the CUDA Runtime API calls to that thread will operate on that ::CUcontext, with some exceptions listed below. Interoperability between data types is discussed in the following sections.
-
-The function cudaPointerGetAttributes() will return the error cudaErrorIncompatibleDriverContext if the pointer being queried was allocated by a non-primary context. The function cudaDeviceEnablePeerAccess() and the rest of the peer access API may not be called when a non-primary ::CUcontext is current. 
-
- To use the pointer query and peer access APIs with a context created using the CUDA Driver API, it is necessary that the CUDA Driver API be used to access these features.
-
-All CUDA Runtime API state (e.g, global variables' addresses and values) travels with its underlying ::CUcontext. In particular, if a ::CUcontext is moved from one thread to another then all CUDA Runtime API state will move to that thread as well.
-
-Please note that attaching to legacy contexts (those with a version of 3010 as returned by ::cuCtxGetApiVersion()) is not possible. The CUDA Runtime will return cudaErrorIncompatibleDriverContext in such cases.
-
-
-
-
-
-**Interactions between CUstream and cudaStream_t**
-
-
-
-The types ::CUstream and cudaStream_t are identical and may be used interchangeably.
-
-
-
-
-
-**Interactions between CUevent and cudaEvent_t**
-
-
-
-The types ::CUevent and cudaEvent_t are identical and may be used interchangeably.
-
-
-
-
-
-**Interactions between CUarray and cudaArray_t**
-
-
-
-The types ::CUarray and struct ::cudaArray * represent the same data type and may be used interchangeably by casting the two types between each other.
-
-In order to use a ::CUarray in a CUDA Runtime API function which takes a struct ::cudaArray *, it is necessary to explicitly cast the ::CUarray to a struct ::cudaArray *.
-
-In order to use a struct ::cudaArray * in a CUDA Driver API function which takes a ::CUarray, it is necessary to explicitly cast the struct ::cudaArray * to a ::CUarray .
-
-
-
-
-
-**Interactions between CUgraphicsResource and cudaGraphicsResource_t**
-
-
-
-The types ::CUgraphicsResource and cudaGraphicsResource_t represent the same data type and may be used interchangeably by casting the two types between each other.
-
-In order to use a ::CUgraphicsResource in a CUDA Runtime API function which takes a cudaGraphicsResource_t, it is necessary to explicitly cast the ::CUgraphicsResource to a cudaGraphicsResource_t.
-
-In order to use a cudaGraphicsResource_t in a CUDA Driver API function which takes a ::CUgraphicsResource, it is necessary to explicitly cast the cudaGraphicsResource_t to a ::CUgraphicsResource.
-
-
-
-
-
-**Interactions between CUtexObject and cudaTextureObject_t**
-
-
-
-The types ::CUtexObject and cudaTextureObject_t represent the same data type and may be used interchangeably by casting the two types between each other.
-
-In order to use a ::CUtexObject in a CUDA Runtime API function which takes a cudaTextureObject_t, it is necessary to explicitly cast the ::CUtexObject to a cudaTextureObject_t.
-
-In order to use a cudaTextureObject_t in a CUDA Driver API function which takes a ::CUtexObject, it is necessary to explicitly cast the cudaTextureObject_t to a ::CUtexObject.
-
-
-
-
-
-**Interactions between CUsurfObject and cudaSurfaceObject_t**
-
-
-
-The types ::CUsurfObject and cudaSurfaceObject_t represent the same data type and may be used interchangeably by casting the two types between each other.
-
-In order to use a ::CUsurfObject in a CUDA Runtime API function which takes a cudaSurfaceObject_t, it is necessary to explicitly cast the ::CUsurfObject to a cudaSurfaceObject_t.
-
-In order to use a cudaSurfaceObject_t in a CUDA Driver API function which takes a ::CUsurfObject, it is necessary to explicitly cast the cudaSurfaceObject_t to a ::CUsurfObject.
-
-
-
-
-
-**Interactions between CUfunction and cudaFunction_t**
-
-
-
-The types ::CUfunction and cudaFunction_t represent the same data type and may be used interchangeably by casting the two types between each other.
-
-In order to use a cudaFunction_t in a CUDA Driver API function which takes a ::CUfunction, it is necessary to explicitly cast the cudaFunction_t to a ::CUfunction.
-
-.. autofunction:: cuda.bindings.runtime.cudaGetKernel
-
-Data types used by CUDA Runtime
--------------------------------
-
-
-
-.. autoclass:: cuda.bindings.runtime.cudaEglPlaneDesc_st
-.. autoclass:: cuda.bindings.runtime.cudaEglFrame_st
-.. autoclass:: cuda.bindings.runtime.cudaChannelFormatDesc
-.. autoclass:: cuda.bindings.runtime.cudaArraySparseProperties
-.. autoclass:: cuda.bindings.runtime.cudaArrayMemoryRequirements
-.. autoclass:: cuda.bindings.runtime.cudaPitchedPtr
-.. autoclass:: cuda.bindings.runtime.cudaExtent
-.. autoclass:: cuda.bindings.runtime.cudaPos
-.. autoclass:: cuda.bindings.runtime.cudaMemcpy3DParms
-.. autoclass:: cuda.bindings.runtime.cudaMemcpyNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaMemcpy3DPeerParms
-.. autoclass:: cuda.bindings.runtime.cudaMemsetParams
-.. autoclass:: cuda.bindings.runtime.cudaMemsetParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaAccessPolicyWindow
-.. autoclass:: cuda.bindings.runtime.cudaHostNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaHostNodeParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaResourceDesc
-.. autoclass:: cuda.bindings.runtime.cudaResourceViewDesc
-.. autoclass:: cuda.bindings.runtime.cudaPointerAttributes
-.. autoclass:: cuda.bindings.runtime.cudaFuncAttributes
-.. autoclass:: cuda.bindings.runtime.cudaMemLocation
-.. autoclass:: cuda.bindings.runtime.cudaMemAccessDesc
-.. autoclass:: cuda.bindings.runtime.cudaMemPoolProps
-.. autoclass:: cuda.bindings.runtime.cudaMemPoolPtrExportData
-.. autoclass:: cuda.bindings.runtime.cudaMemAllocNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaMemAllocNodeParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaMemFreeNodeParams
-.. autoclass:: cuda.bindings.runtime.CUuuid_st
-.. autoclass:: cuda.bindings.runtime.cudaDeviceProp
-.. autoclass:: cuda.bindings.runtime.cudaIpcEventHandle_st
-.. autoclass:: cuda.bindings.runtime.cudaIpcMemHandle_st
-.. autoclass:: cuda.bindings.runtime.cudaMemFabricHandle_st
-.. autoclass:: cuda.bindings.runtime.cudaExternalMemoryHandleDesc
-.. autoclass:: cuda.bindings.runtime.cudaExternalMemoryBufferDesc
-.. autoclass:: cuda.bindings.runtime.cudaExternalMemoryMipmappedArrayDesc
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreHandleDesc
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreSignalParams
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreWaitParams
-.. autoclass:: cuda.bindings.runtime.cudaKernelNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaKernelNodeParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreSignalNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreSignalNodeParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreWaitNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreWaitNodeParamsV2
-.. autoclass:: cuda.bindings.runtime.cudaConditionalNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaChildGraphNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaEventRecordNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaEventWaitNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaGraphNodeParams
-.. autoclass:: cuda.bindings.runtime.cudaGraphEdgeData_st
-.. autoclass:: cuda.bindings.runtime.cudaGraphInstantiateParams_st
-.. autoclass:: cuda.bindings.runtime.cudaGraphExecUpdateResultInfo_st
-.. autoclass:: cuda.bindings.runtime.cudaGraphKernelNodeUpdate
-.. autoclass:: cuda.bindings.runtime.cudaLaunchMemSyncDomainMap_st
-.. autoclass:: cuda.bindings.runtime.cudaLaunchAttributeValue
-.. autoclass:: cuda.bindings.runtime.cudaLaunchAttribute_st
-.. autoclass:: cuda.bindings.runtime.cudaAsyncNotificationInfo
-.. autoclass:: cuda.bindings.runtime.cudaTextureDesc
-.. autoclass:: cuda.bindings.runtime.cudaEglFrameType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglFrameType.cudaEglFrameTypeArray
-
-
-        Frame type CUDA array
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglFrameType.cudaEglFrameTypePitch
-
-
-        Frame type CUDA pointer
-
-.. autoclass:: cuda.bindings.runtime.cudaEglResourceLocationFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglResourceLocationFlags.cudaEglResourceLocationSysmem
-
-
-        Resource location sysmem
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglResourceLocationFlags.cudaEglResourceLocationVidmem
-
-
-        Resource location vidmem
-
-.. autoclass:: cuda.bindings.runtime.cudaEglColorFormat
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420Planar
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420SemiPlanar
-
-
-        Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV420Planar.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV422Planar
-
-
-        Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV422SemiPlanar
-
-
-        Y, UV in two surfaces with VU byte ordering, width, height ratio same as YUV422Planar.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatARGB
-
-
-        R/G/B/A four channels in one surface with BGRA byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatRGBA
-
-
-        R/G/B/A four channels in one surface with ABGR byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatL
-
-
-        single luminance channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatR
-
-
-        single color channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV444Planar
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV444SemiPlanar
-
-
-        Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV444Planar.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUYV422
-
-
-        Y, U, V in one surface, interleaved as UYVY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatUYVY422
-
-
-        Y, U, V in one surface, interleaved as YUYV in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatABGR
-
-
-        R/G/B/A four channels in one surface with RGBA byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBGRA
-
-
-        R/G/B/A four channels in one surface with ARGB byte ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatA
-
-
-        Alpha color format - one channel in one surface.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatRG
-
-
-        R/G color format - two channels in one surface with GR byte ordering
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatAYUV
-
-
-        Y, U, V, A four channels in one surface, interleaved as VUYA.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU444SemiPlanar
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU422SemiPlanar
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420SemiPlanar
-
-
-        Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_444SemiPlanar
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_420SemiPlanar
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_444SemiPlanar
-
-
-        Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_420SemiPlanar
-
-
-        Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatVYUY_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as YVYU in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatUYVY_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as YUYV in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUYV_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as UYVY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVYU_ER
-
-
-        Extended Range Y, U, V in one surface, interleaved as VYUY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUVA_ER
-
-
-        Extended Range Y, U, V, A four channels in one surface, interleaved as AVUY.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatAYUV_ER
-
-
-        Extended Range Y, U, V, A four channels in one surface, interleaved as VUYA.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV444Planar_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV422Planar_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420Planar_ER
-
-
-        Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV444SemiPlanar_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV422SemiPlanar_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420SemiPlanar_ER
-
-
-        Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU444Planar_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU422Planar_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420Planar_ER
-
-
-        Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU444SemiPlanar_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU422SemiPlanar_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420SemiPlanar_ER
-
-
-        Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerRGGB
-
-
-        Bayer format - one channel in one surface with interleaved RGGB ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerBGGR
-
-
-        Bayer format - one channel in one surface with interleaved BGGR ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerGRBG
-
-
-        Bayer format - one channel in one surface with interleaved GRBG ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerGBRG
-
-
-        Bayer format - one channel in one surface with interleaved GBRG ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer10RGGB
-
-
-        Bayer10 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer10BGGR
-
-
-        Bayer10 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer10GRBG
-
-
-        Bayer10 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer10GBRG
-
-
-        Bayer10 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12RGGB
-
-
-        Bayer12 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12BGGR
-
-
-        Bayer12 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12GRBG
-
-
-        Bayer12 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12GBRG
-
-
-        Bayer12 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer14RGGB
-
-
-        Bayer14 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer14BGGR
-
-
-        Bayer14 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer14GRBG
-
-
-        Bayer14 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer14GBRG
-
-
-        Bayer14 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer20RGGB
-
-
-        Bayer20 format - one channel in one surface with interleaved RGGB ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer20BGGR
-
-
-        Bayer20 format - one channel in one surface with interleaved BGGR ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer20GRBG
-
-
-        Bayer20 format - one channel in one surface with interleaved GRBG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer20GBRG
-
-
-        Bayer20 format - one channel in one surface with interleaved GBRG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU444Planar
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU422Planar
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420Planar
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerIspRGGB
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved RGGB ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerIspBGGR
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved BGGR ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerIspGRBG
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GRBG ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerIspGBRG
-
-
-        Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GBRG ordering and mapped to opaque integer datatype.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerBCCR
-
-
-        Bayer format - one channel in one surface with interleaved BCCR ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerRCCB
-
-
-        Bayer format - one channel in one surface with interleaved RCCB ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerCRBC
-
-
-        Bayer format - one channel in one surface with interleaved CRBC ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayerCBRC
-
-
-        Bayer format - one channel in one surface with interleaved CBRC ordering.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer10CCCC
-
-
-        Bayer10 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 10 bits used 6 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12BCCR
-
-
-        Bayer12 format - one channel in one surface with interleaved BCCR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12RCCB
-
-
-        Bayer12 format - one channel in one surface with interleaved RCCB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12CRBC
-
-
-        Bayer12 format - one channel in one surface with interleaved CRBC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12CBRC
-
-
-        Bayer12 format - one channel in one surface with interleaved CBRC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatBayer12CCCC
-
-
-        Bayer12 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY
-
-
-        Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420SemiPlanar_2020
-
-
-        Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420SemiPlanar_2020
-
-
-        Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420Planar_2020
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420Planar_2020
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420SemiPlanar_709
-
-
-        Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420SemiPlanar_709
-
-
-        Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUV420Planar_709
-
-
-        Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVU420Planar_709
-
-
-        Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_420SemiPlanar_709
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_420SemiPlanar_2020
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_422SemiPlanar_2020
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_422SemiPlanar
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_422SemiPlanar_709
-
-
-        Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY_ER
-
-
-        Extended Range Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY_709_ER
-
-
-        Extended Range Color format for single Y plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10_ER
-
-
-        Extended Range Color format for single Y10 plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10_709_ER
-
-
-        Extended Range Color format for single Y10 plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12_ER
-
-
-        Extended Range Color format for single Y12 plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12_709_ER
-
-
-        Extended Range Color format for single Y12 plane.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYUVA
-
-
-        Y, U, V, A four channels in one surface, interleaved as AVUY.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatYVYU
-
-
-        Y, U, V in one surface, interleaved as YVYU in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatVYUY
-
-
-        Y, U, V in one surface, interleaved as VYUY in one channel.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_420SemiPlanar_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_444SemiPlanar_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER
-
-
-        Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_420SemiPlanar_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_444SemiPlanar_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaEglColorFormat.cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER
-
-
-        Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
-
-.. autoclass:: cuda.bindings.runtime.cudaError_t
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaSuccess
-
-
-        The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`).
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidValue
-
-
-        This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMemoryAllocation
-
-
-        The API call failed because it was unable to allocate enough memory or other resources to perform the requested operation.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInitializationError
-
-
-        The API call failed because the CUDA driver and runtime could not be initialized.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCudartUnloading
-
-
-        This indicates that a CUDA Runtime API call cannot be executed because it is being called during process shut down, at a point in time after CUDA driver has been unloaded.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorProfilerDisabled
-
-
-        This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorProfilerNotInitialized
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorProfilerAlreadyStarted
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorProfilerAlreadyStopped
-
-
-        [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidConfiguration
-
-
-        This indicates that a kernel launch is requesting resources that can never be satisfied by the current device. Requesting more shared memory per block than the device supports will trigger this error, as will requesting too many threads or blocks. See :py:obj:`~.cudaDeviceProp` for more device limitations.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidPitchValue
-
-
-        This indicates that one or more of the pitch-related parameters passed to the API call is not within the acceptable range for pitch.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidSymbol
-
-
-        This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidHostPointer
-
-
-        This indicates that at least one host pointer passed to the API call is not a valid host pointer. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidDevicePointer
-
-
-        This indicates that at least one device pointer passed to the API call is not a valid device pointer. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidTexture
-
-
-        This indicates that the texture passed to the API call is not a valid texture.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidTextureBinding
-
-
-        This indicates that the texture binding is not valid. This occurs if you call :py:obj:`~.cudaGetTextureAlignmentOffset()` with an unbound texture.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidChannelDescriptor
-
-
-        This indicates that the channel descriptor passed to the API call is not valid. This occurs if the format is not one of the formats specified by :py:obj:`~.cudaChannelFormatKind`, or if one of the dimensions is invalid.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidMemcpyDirection
-
-
-        This indicates that the direction of the memcpy passed to the API call is not one of the types specified by :py:obj:`~.cudaMemcpyKind`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorAddressOfConstant
-
-
-        This indicated that the user has taken the address of a constant variable, which was forbidden up until the CUDA 3.1 release. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorTextureFetchFailed
-
-
-        This indicated that a texture fetch was not able to be performed. This was previously used for device emulation of texture operations. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorTextureNotBound
-
-
-        This indicated that a texture was not bound for access. This was previously used for device emulation of texture operations. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSynchronizationError
-
-
-        This indicated that a synchronization operation had failed. This was previously used for some device emulation functions. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidFilterSetting
-
-
-        This indicates that a non-float texture was being accessed with linear filtering. This is not supported by CUDA.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidNormSetting
-
-
-        This indicates that an attempt was made to read a non-float texture as a normalized float. This is not supported by CUDA.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMixedDeviceExecution
-
-
-        Mixing of device and device emulation code was not allowed. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotYetImplemented
-
-
-        This indicates that the API call is not yet implemented. Production releases of CUDA will never return this error. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMemoryValueTooLarge
-
-
-        This indicated that an emulated device pointer exceeded the 32-bit address range. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStubLibrary
-
-
-        This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInsufficientDriver
-
-
-        This indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library. This is not a supported configuration. Users should install an updated NVIDIA display driver to allow the application to run.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCallRequiresNewerDriver
-
-
-        This indicates that the API call requires a newer CUDA driver than the one currently installed. Users should install an updated NVIDIA CUDA driver to allow the API call to succeed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidSurface
-
-
-        This indicates that the surface passed to the API call is not a valid surface.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDuplicateVariableName
-
-
-        This indicates that multiple global or constant variables (across separate CUDA source files in the application) share the same string name.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDuplicateTextureName
-
-
-        This indicates that multiple textures (across separate CUDA source files in the application) share the same string name.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDuplicateSurfaceName
-
-
-        This indicates that multiple surfaces (across separate CUDA source files in the application) share the same string name.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDevicesUnavailable
-
-
-        This indicates that all CUDA devices are busy or unavailable at the current time. Devices are often busy/unavailable due to use of :py:obj:`~.cudaComputeModeProhibited`, :py:obj:`~.cudaComputeModeExclusiveProcess`, or when long running CUDA kernels have filled up the GPU and are blocking new work from starting. They can also be unavailable due to memory constraints on a device that already has active CUDA work being performed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorIncompatibleDriverContext
-
-
-        This indicates that the current context is not compatible with this the CUDA Runtime. This can only occur if you are using CUDA Runtime/Driver interoperability and have created an existing Driver context using the driver API. The Driver context may be incompatible either because the Driver context was created using an older version of the API, because the Runtime API call expects a primary driver context and the Driver context is not primary, or because the Driver context has been destroyed. Please see :py:obj:`~.Interactions`with the CUDA Driver API" for more information.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMissingConfiguration
-
-
-        The device function being invoked (usually via :py:obj:`~.cudaLaunchKernel()`) was not previously configured via the :py:obj:`~.cudaConfigureCall()` function.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorPriorLaunchFailure
-
-
-        This indicated that a previous kernel launch failed. This was previously used for device emulation of kernel launches. [Deprecated]
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchMaxDepthExceeded
-
-
-        This error indicates that a device runtime grid launch did not occur because the depth of the child grid would exceed the maximum supported number of nested grid launches.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchFileScopedTex
-
-
-        This error indicates that a grid launch did not occur because the kernel uses file-scoped textures which are unsupported by the device runtime. Kernels launched via the device runtime only support textures created with the Texture Object API's.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchFileScopedSurf
-
-
-        This error indicates that a grid launch did not occur because the kernel uses file-scoped surfaces which are unsupported by the device runtime. Kernels launched via the device runtime only support surfaces created with the Surface Object API's.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSyncDepthExceeded
-
-
-        This error indicates that a call to :py:obj:`~.cudaDeviceSynchronize` made from the device runtime failed because the call was made at grid depth greater than than either the default (2 levels of grids) or user specified device limit :py:obj:`~.cudaLimitDevRuntimeSyncDepth`. To be able to synchronize on launched grids at a greater depth successfully, the maximum nested depth at which :py:obj:`~.cudaDeviceSynchronize` will be called must be specified with the :py:obj:`~.cudaLimitDevRuntimeSyncDepth` limit to the :py:obj:`~.cudaDeviceSetLimit` api before the host-side launch of a kernel using the device runtime. Keep in mind that additional levels of sync depth require the runtime to reserve large amounts of device memory that cannot be used for user allocations. Note that :py:obj:`~.cudaDeviceSynchronize` made from device runtime is only supported on devices of compute capability < 9.0.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchPendingCountExceeded
-
-
-        This error indicates that a device runtime grid launch failed because the launch would exceed the limit :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`. For this launch to proceed successfully, :py:obj:`~.cudaDeviceSetLimit` must be called to set the :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` to be higher than the upper bound of outstanding launches that can be issued to the device runtime. Keep in mind that raising the limit of pending device runtime launches will require the runtime to reserve device memory that cannot be used for user allocations.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidDeviceFunction
-
-
-        The requested device function does not exist or is not compiled for the proper device architecture.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNoDevice
-
-
-        This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidDevice
-
-
-        This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for the specified device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDeviceNotLicensed
-
-
-        This indicates that the device doesn't have a valid Grid License.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSoftwareValidityNotEstablished
-
-
-        By default, the CUDA runtime may perform a minimal set of self-tests, as well as CUDA driver tests, to establish the validity of both. Introduced in CUDA 11.2, this error return indicates that at least one of these tests has failed and the validity of either the runtime or the driver could not be established.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStartupFailure
-
-
-        This indicates an internal startup failure in the CUDA runtime.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidKernelImage
-
-
-        This indicates that the device kernel image is invalid.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDeviceUninitialized
-
-
-        This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call is not a valid handle (such as a context that has had :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMapBufferObjectFailed
-
-
-        This indicates that the buffer object could not be mapped.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnmapBufferObjectFailed
-
-
-        This indicates that the buffer object could not be unmapped.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorArrayIsMapped
-
-
-        This indicates that the specified array is currently mapped and thus cannot be destroyed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorAlreadyMapped
-
-
-        This indicates that the resource is already mapped.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNoKernelImageForDevice
-
-
-        This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorAlreadyAcquired
-
-
-        This indicates that a resource has already been acquired.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotMapped
-
-
-        This indicates that a resource is not mapped.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotMappedAsArray
-
-
-        This indicates that a mapped resource is not available for access as an array.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotMappedAsPointer
-
-
-        This indicates that a mapped resource is not available for access as a pointer.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorECCUncorrectable
-
-
-        This indicates that an uncorrectable ECC error was detected during execution.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnsupportedLimit
-
-
-        This indicates that the :py:obj:`~.cudaLimit` passed to the API call is not supported by the active device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorDeviceAlreadyInUse
-
-
-        This indicates that a call tried to access an exclusive-thread device that is already in use by a different thread.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorPeerAccessUnsupported
-
-
-        This error indicates that P2P access is not supported across the given devices.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidPtx
-
-
-        A PTX compilation failed. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidGraphicsContext
-
-
-        This indicates an error with the OpenGL or DirectX context.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNvlinkUncorrectable
-
-
-        This indicates that an uncorrectable NVLink error was detected during the execution.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorJitCompilerNotFound
-
-
-        This indicates that the PTX JIT compiler library was not found. The JIT Compiler library is used for PTX compilation. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnsupportedPtxVersion
-
-
-        This indicates that the provided PTX was compiled with an unsupported toolchain. The most common reason for this, is the PTX was generated by a compiler newer than what is supported by the CUDA driver and PTX JIT compiler.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorJitCompilationDisabled
-
-
-        This indicates that the JIT compilation was disabled. The JIT compilation compiles PTX. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnsupportedExecAffinity
-
-
-        This indicates that the provided execution affinity is not supported by the device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnsupportedDevSideSync
-
-
-        This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidSource
-
-
-        This indicates that the device kernel source is invalid.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorFileNotFound
-
-
-        This indicates that the file specified was not found.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSharedObjectSymbolNotFound
-
-
-        This indicates that a link to a shared object failed to resolve.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSharedObjectInitFailed
-
-
-        This indicates that initialization of a shared object failed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorOperatingSystem
-
-
-        This error indicates that an OS call failed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidResourceHandle
-
-
-        This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like :py:obj:`~.cudaStream_t` and :py:obj:`~.cudaEvent_t`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorIllegalState
-
-
-        This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLossyQuery
-
-
-        This indicates an attempt was made to introspect an object in a way that would discard semantically important information. This is either due to the object using funtionality newer than the API version used to introspect it or omission of optional return arguments.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSymbolNotFound
-
-
-        This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and surface names.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotReady
-
-
-        This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than :py:obj:`~.cudaSuccess` (which indicates completion). Calls that may return this value include :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorIllegalAddress
-
-
-        The device encountered a load or store instruction on an invalid memory address. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchOutOfResources
-
-
-        This indicates that a launch did not occur because it did not have appropriate resources. Although this error is similar to :py:obj:`~.cudaErrorInvalidConfiguration`, this error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchTimeout
-
-
-        This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device property :py:obj:`~.kernelExecTimeoutEnabled` for more information. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchIncompatibleTexturing
-
-
-        This error indicates a kernel launch that uses an incompatible texturing mode.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorPeerAccessAlreadyEnabled
-
-
-        This error indicates that a call to :py:obj:`~.cudaDeviceEnablePeerAccess()` is trying to re-enable peer addressing on from a context which has already had peer addressing enabled.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorPeerAccessNotEnabled
-
-
-        This error indicates that :py:obj:`~.cudaDeviceDisablePeerAccess()` is trying to disable peer addressing which has not been enabled yet via :py:obj:`~.cudaDeviceEnablePeerAccess()`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSetOnActiveProcess
-
-
-        This indicates that the user has called :py:obj:`~.cudaSetValidDevices()`, :py:obj:`~.cudaSetDeviceFlags()`, :py:obj:`~.cudaD3D9SetDirect3DDevice()`, :py:obj:`~.cudaD3D10SetDirect3DDevice`, :py:obj:`~.cudaD3D11SetDirect3DDevice()`, or :py:obj:`~.cudaVDPAUSetVDPAUDevice()` after initializing the CUDA runtime by calling non-device management operations (allocating memory and launching kernels are examples of non-device management operations). This error can also be returned if using runtime/driver interoperability and there is an existing :py:obj:`~.CUcontext` active on the host thread.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorContextIsDestroyed
-
-
-        This error indicates that the context current to the calling thread has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary context which has not yet been initialized.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorAssert
-
-
-        An assert triggered in device code during kernel execution. The device cannot be used again. All existing allocations are invalid. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorTooManyPeers
-
-
-        This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to :py:obj:`~.cudaEnablePeerAccess()`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorHostMemoryAlreadyRegistered
-
-
-        This error indicates that the memory range passed to :py:obj:`~.cudaHostRegister()` has already been registered.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorHostMemoryNotRegistered
-
-
-        This error indicates that the pointer passed to :py:obj:`~.cudaHostUnregister()` does not correspond to any currently registered memory region.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorHardwareStackError
-
-
-        Device encountered an error in the call stack during kernel execution, possibly due to stack corruption or exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorIllegalInstruction
-
-
-        The device encountered an illegal instruction during kernel execution This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMisalignedAddress
-
-
-        The device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidAddressSpace
-
-
-        While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidPc
-
-
-        The device encountered an invalid program counter. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorLaunchFailure
-
-
-        An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCooperativeLaunchTooLarge
-
-
-        This error indicates that the number of blocks launched per grid for a kernel that was launched via either :py:obj:`~.cudaLaunchCooperativeKernel` or :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` exceeds the maximum number of blocks as allowed by :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor` or :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` times the number of multiprocessors as specified by the device attribute :py:obj:`~.cudaDevAttrMultiProcessorCount`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotPermitted
-
-
-        This error indicates the attempted operation is not permitted.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorNotSupported
-
-
-        This error indicates the attempted operation is not supported on the current system or device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSystemNotReady
-
-
-        This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorSystemDriverMismatch
-
-
-        This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility documentation for supported versions.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCompatNotSupportedOnDevice
-
-
-        This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES environment variable.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsConnectionFailed
-
-
-        This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsRpcFailure
-
-
-        This error indicates that the remote procedural call between the MPS server and the MPS client failed.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsServerNotReady
-
-
-        This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the process of recovering from a fatal failure.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsMaxClientsReached
-
-
-        This error indicates that the hardware resources required to create MPS client have been exhausted.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsMaxConnectionsReached
-
-
-        This error indicates the the hardware resources required to device connections have been exhausted.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorMpsClientTerminated
-
-
-        This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCdpNotSupported
-
-
-        This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCdpVersionMismatch
-
-
-        This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureUnsupported
-
-
-        The operation is not permitted when the stream is capturing.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureInvalidated
-
-
-        The current capture sequence on the stream has been invalidated due to a previous error.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureMerge
-
-
-        The operation would have resulted in a merge of two independent capture sequences.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureUnmatched
-
-
-        The capture was not initiated in this stream.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureUnjoined
-
-
-        The capture sequence contains a fork that was not joined to the primary stream.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureIsolation
-
-
-        A dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureImplicit
-
-
-        The operation would have resulted in a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorCapturedEvent
-
-
-        The operation is not permitted on an event which was last recorded in a capturing stream.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorStreamCaptureWrongThread
-
-
-        A stream capture sequence not initiated with the :py:obj:`~.cudaStreamCaptureModeRelaxed` argument to :py:obj:`~.cudaStreamBeginCapture` was passed to :py:obj:`~.cudaStreamEndCapture` in a different thread.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorTimeout
-
-
-        This indicates that the wait operation has timed out.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorGraphExecUpdateFailure
-
-
-        This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph update.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorExternalDevice
-
-
-        This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidClusterSize
-
-
-        This indicates that a kernel launch error has occurred due to cluster misconfiguration.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorFunctionNotLoaded
-
-
-        Indiciates a function handle is not loaded when calling an API that requires a loaded function.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidResourceType
-
-
-        This error indicates one or more resources passed in are not valid resource types for the operation.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorInvalidResourceConfiguration
-
-
-        This error indicates one or more resources are insufficient or non-applicable for the operation.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorUnknown
-
-
-        This indicates that an unknown internal error has occurred.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaError_t.cudaErrorApiFailureBase
-
-.. autoclass:: cuda.bindings.runtime.cudaChannelFormatKind
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSigned
-
-
-        Signed channel format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
-
-
-        Unsigned channel format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindFloat
-
-
-        Float channel format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindNone
-
-
-        No channel format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindNV12
-
-
-        Unsigned 8-bit integers, planar 4:2:0 YUV format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1
-
-
-        1 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2
-
-
-        2 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4
-
-
-        4 channel unsigned 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1
-
-
-        1 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2
-
-
-        2 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4
-
-
-        4 channel unsigned 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1
-
-
-        1 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2
-
-
-        2 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4
-
-
-        4 channel signed 8-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1
-
-
-        1 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2
-
-
-        2 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4
-
-
-        4 channel signed 16-bit normalized integer
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1
-
-
-        4 channel unsigned normalized block-compressed (BC1 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2
-
-
-        4 channel unsigned normalized block-compressed (BC2 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3
-
-
-        4 channel unsigned normalized block-compressed (BC3 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4
-
-
-        1 channel unsigned normalized block-compressed (BC4 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4
-
-
-        1 channel signed normalized block-compressed (BC4 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5
-
-
-        2 channel unsigned normalized block-compressed (BC5 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5
-
-
-        2 channel signed normalized block-compressed (BC5 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H
-
-
-        3 channel unsigned half-float block-compressed (BC6H compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H
-
-
-        3 channel signed half-float block-compressed (BC6H compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7
-
-
-        4 channel unsigned normalized block-compressed (BC7 compression) format
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB
-
-
-        4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
-
-.. autoclass:: cuda.bindings.runtime.cudaMemoryType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryType.cudaMemoryTypeUnregistered
-
-
-        Unregistered memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryType.cudaMemoryTypeHost
-
-
-        Host memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryType.cudaMemoryTypeDevice
-
-
-        Device memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryType.cudaMemoryTypeManaged
-
-
-        Managed memory
-
-.. autoclass:: cuda.bindings.runtime.cudaMemcpyKind
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemcpyKind.cudaMemcpyHostToHost
-
-
-        Host -> Host
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice
-
-
-        Host -> Device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost
-
-
-        Device -> Host
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice
-
-
-        Device -> Device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemcpyKind.cudaMemcpyDefault
-
-
-        Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing
-
-.. autoclass:: cuda.bindings.runtime.cudaAccessProperty
-
-    .. autoattribute:: cuda.bindings.runtime.cudaAccessProperty.cudaAccessPropertyNormal
-
-
-        Normal cache persistence.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaAccessProperty.cudaAccessPropertyStreaming
-
-
-        Streaming access is less likely to persit from cache.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaAccessProperty.cudaAccessPropertyPersisting
-
-
-        Persisting access is more likely to persist in cache.
-
-.. autoclass:: cuda.bindings.runtime.cudaStreamCaptureStatus
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureStatus.cudaStreamCaptureStatusNone
-
-
-        Stream is not capturing
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureStatus.cudaStreamCaptureStatusActive
-
-
-        Stream is actively capturing
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureStatus.cudaStreamCaptureStatusInvalidated
-
-
-        Stream is part of a capture sequence that has been invalidated, but not terminated
-
-.. autoclass:: cuda.bindings.runtime.cudaStreamCaptureMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed
-
-.. autoclass:: cuda.bindings.runtime.cudaSynchronizationPolicy
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSynchronizationPolicy.cudaSyncPolicyAuto
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSynchronizationPolicy.cudaSyncPolicySpin
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSynchronizationPolicy.cudaSyncPolicyYield
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSynchronizationPolicy.cudaSyncPolicyBlockingSync
-
-.. autoclass:: cuda.bindings.runtime.cudaClusterSchedulingPolicy
-
-    .. autoattribute:: cuda.bindings.runtime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyDefault
-
-
-        the default policy
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicySpread
-
-
-        spread the blocks within a cluster to the SMs
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyLoadBalancing
-
-
-        allow the hardware to load-balance the blocks in a cluster to the SMs
-
-.. autoclass:: cuda.bindings.runtime.cudaStreamUpdateCaptureDependenciesFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamAddCaptureDependencies
-
-
-        Add new nodes to the dependency set
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamSetCaptureDependencies
-
-
-        Replace the dependency set with the new nodes
-
-.. autoclass:: cuda.bindings.runtime.cudaUserObjectFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaUserObjectFlags.cudaUserObjectNoDestructorSync
-
-
-        Indicates the destructor execution is not synchronized by any CUDA handle.
-
-.. autoclass:: cuda.bindings.runtime.cudaUserObjectRetainFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaUserObjectRetainFlags.cudaGraphUserObjectMove
-
-
-        Transfer references from the caller rather than creating new references.
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphicsRegisterFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone
-
-
-        Default
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsReadOnly
-
-
-        CUDA will not write to this resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard
-
-
-        CUDA will only write to and will not read from this resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsSurfaceLoadStore
-
-
-        CUDA will bind this resource to a surface reference
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsTextureGather
-
-
-        CUDA will perform texture gather operations on this resource
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphicsMapFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsNone
-
-
-        Default; Assume resource can be read/written
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsReadOnly
-
-
-        CUDA will not write to this resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsWriteDiscard
-
-
-        CUDA will only write to and will not read from this resource
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphicsCubeFace
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveX
-
-
-        Positive X face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeX
-
-
-        Negative X face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveY
-
-
-        Positive Y face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeY
-
-
-        Negative Y face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveZ
-
-
-        Positive Z face of cubemap
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeZ
-
-
-        Negative Z face of cubemap
-
-.. autoclass:: cuda.bindings.runtime.cudaResourceType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceType.cudaResourceTypeArray
-
-
-        Array resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceType.cudaResourceTypeMipmappedArray
-
-
-        Mipmapped array resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceType.cudaResourceTypeLinear
-
-
-        Linear resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceType.cudaResourceTypePitch2D
-
-
-        Pitch 2D resource
-
-.. autoclass:: cuda.bindings.runtime.cudaResourceViewFormat
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatNone
-
-
-        No resource view format (use underlying resource format)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar1
-
-
-        1 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar2
-
-
-        2 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar4
-
-
-        4 channel unsigned 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedChar1
-
-
-        1 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedChar2
-
-
-        2 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedChar4
-
-
-        4 channel signed 8-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort1
-
-
-        1 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort2
-
-
-        2 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort4
-
-
-        4 channel unsigned 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedShort1
-
-
-        1 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedShort2
-
-
-        2 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedShort4
-
-
-        4 channel signed 16-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt1
-
-
-        1 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt2
-
-
-        2 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt4
-
-
-        4 channel unsigned 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedInt1
-
-
-        1 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedInt2
-
-
-        2 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedInt4
-
-
-        4 channel signed 32-bit integers
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatHalf1
-
-
-        1 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatHalf2
-
-
-        2 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatHalf4
-
-
-        4 channel 16-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatFloat1
-
-
-        1 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatFloat2
-
-
-        2 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatFloat4
-
-
-        4 channel 32-bit floating point
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed1
-
-
-        Block compressed 1
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed2
-
-
-        Block compressed 2
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed3
-
-
-        Block compressed 3
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed4
-
-
-        Block compressed 4 unsigned
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed4
-
-
-        Block compressed 4 signed
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed5
-
-
-        Block compressed 5 unsigned
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed5
-
-
-        Block compressed 5 signed
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed6H
-
-
-        Block compressed 6 unsigned half-float
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed6H
-
-
-        Block compressed 6 signed half-float
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed7
-
-
-        Block compressed 7
-
-.. autoclass:: cuda.bindings.runtime.cudaFuncAttribute
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeMaxDynamicSharedMemorySize
-
-
-        Maximum dynamic shared memory size
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributePreferredSharedMemoryCarveout
-
-
-        Preferred shared memory-L1 cache split
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeClusterDimMustBeSet
-
-
-        Indicator to enforce valid cluster dimension specification on kernel launch
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterWidth
-
-
-        Required cluster width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterHeight
-
-
-        Required cluster height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterDepth
-
-
-        Required cluster depth
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeNonPortableClusterSizeAllowed
-
-
-        Whether non-portable cluster scheduling policy is supported
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeClusterSchedulingPolicyPreference
-
-
-        Required cluster scheduling policy preference
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncAttribute.cudaFuncAttributeMax
-
-.. autoclass:: cuda.bindings.runtime.cudaFuncCache
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncCache.cudaFuncCachePreferNone
-
-
-        Default function cache configuration, no preference
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncCache.cudaFuncCachePreferShared
-
-
-        Prefer larger shared memory and smaller L1 cache
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncCache.cudaFuncCachePreferL1
-
-
-        Prefer larger L1 cache and smaller shared memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFuncCache.cudaFuncCachePreferEqual
-
-
-        Prefer equal size L1 cache and shared memory
-
-.. autoclass:: cuda.bindings.runtime.cudaSharedMemConfig
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedMemConfig.cudaSharedMemBankSizeDefault
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedMemConfig.cudaSharedMemBankSizeFourByte
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedMemConfig.cudaSharedMemBankSizeEightByte
-
-.. autoclass:: cuda.bindings.runtime.cudaSharedCarveout
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedCarveout.cudaSharedmemCarveoutDefault
-
-
-        No preference for shared memory or L1 (default)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedCarveout.cudaSharedmemCarveoutMaxShared
-
-
-        Prefer maximum available shared memory, minimum L1 cache
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSharedCarveout.cudaSharedmemCarveoutMaxL1
-
-
-        Prefer maximum available L1 cache, minimum shared memory
-
-.. autoclass:: cuda.bindings.runtime.cudaComputeMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaComputeMode.cudaComputeModeDefault
-
-
-        Default compute mode (Multiple threads can use :py:obj:`~.cudaSetDevice()` with this device)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaComputeMode.cudaComputeModeExclusive
-
-
-        Compute-exclusive-thread mode (Only one thread in one process will be able to use :py:obj:`~.cudaSetDevice()` with this device)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaComputeMode.cudaComputeModeProhibited
-
-
-        Compute-prohibited mode (No threads can use :py:obj:`~.cudaSetDevice()` with this device)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaComputeMode.cudaComputeModeExclusiveProcess
-
-
-        Compute-exclusive-process mode (Many threads in one process will be able to use :py:obj:`~.cudaSetDevice()` with this device)
-
-.. autoclass:: cuda.bindings.runtime.cudaLimit
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitStackSize
-
-
-        GPU thread stack size
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitPrintfFifoSize
-
-
-        GPU printf FIFO size
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitMallocHeapSize
-
-
-        GPU malloc heap size
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitDevRuntimeSyncDepth
-
-
-        GPU device runtime synchronize depth
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitDevRuntimePendingLaunchCount
-
-
-        GPU device runtime pending launch count
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitMaxL2FetchGranularity
-
-
-        A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLimit.cudaLimitPersistingL2CacheSize
-
-
-        A size in bytes for L2 persisting lines cache size
-
-.. autoclass:: cuda.bindings.runtime.cudaMemoryAdvise
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseSetReadMostly
-
-
-        Data will mostly be read and only occassionally be written to
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseUnsetReadMostly
-
-
-        Undo the effect of :py:obj:`~.cudaMemAdviseSetReadMostly`
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseSetPreferredLocation
-
-
-        Set the preferred location for the data as the specified device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseUnsetPreferredLocation
-
-
-        Clear the preferred location for the data
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy
-
-
-        Data will be accessed by the specified device, so prevent page faults as much as possible
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemoryAdvise.cudaMemAdviseUnsetAccessedBy
-
-
-        Let the Unified Memory subsystem decide on the page faulting policy for the specified device
-
-.. autoclass:: cuda.bindings.runtime.cudaMemRangeAttribute
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributeReadMostly
-
-
-        Whether the range will mostly be read and only occassionally be written to
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocation
-
-
-        The preferred location of the range
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributeAccessedBy
-
-
-        Memory range has :py:obj:`~.cudaMemAdviseSetAccessedBy` set for specified device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocation
-
-
-        The last location to which the range was prefetched
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationType
-
-
-        The preferred location type of the range
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationId
-
-
-        The preferred location id of the range
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationType
-
-
-        The last location type to which the range was prefetched
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationId
-
-
-        The last location id to which the range was prefetched
-
-.. autoclass:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesOptions
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionHost
-
-
-        :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()` and its CUDA Driver API counterpart are supported on the device.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionMemOps
-
-
-        The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported on the CUDA device.
-
-.. autoclass:: cuda.bindings.runtime.cudaGPUDirectRDMAWritesOrdering
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingNone
-
-
-        The device does not natively support ordering of GPUDirect RDMA writes. :py:obj:`~.cudaFlushGPUDirectRDMAWrites()` can be leveraged if supported.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingOwner
-
-
-        Natively, the device can consistently consume GPUDirect RDMA writes, although other CUDA devices may not.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingAllDevices
-
-
-        Any CUDA device in the system can consistently consume GPUDirect RDMA writes to this device.
-
-.. autoclass:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesScope
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToOwner
-
-
-        Blocks until remote writes are visible to the CUDA device context owning the data.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToAllDevices
-
-
-        Blocks until remote writes are visible to all CUDA device contexts.
-
-.. autoclass:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesTarget
-
-    .. autoattribute:: cuda.bindings.runtime.cudaFlushGPUDirectRDMAWritesTarget.cudaFlushGPUDirectRDMAWritesTargetCurrentDevice
-
-
-        Sets the target for :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()` to the currently active CUDA device context.
-
-.. autoclass:: cuda.bindings.runtime.cudaDeviceAttr
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock
-
-
-        Maximum number of threads per block
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxBlockDimX
-
-
-        Maximum block dimension X
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxBlockDimY
-
-
-        Maximum block dimension Y
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxBlockDimZ
-
-
-        Maximum block dimension Z
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxGridDimX
-
-
-        Maximum grid dimension X
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxGridDimY
-
-
-        Maximum grid dimension Y
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxGridDimZ
-
-
-        Maximum grid dimension Z
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlock
-
-
-        Maximum shared memory available per block in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrTotalConstantMemory
-
-
-        Memory available on device for constant variables in a CUDA C kernel in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrWarpSize
-
-
-        Warp size in threads
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxPitch
-
-
-        Maximum pitch in bytes allowed by memory copies
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerBlock
-
-
-        Maximum number of 32-bit registers available per block
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrClockRate
-
-
-        Peak clock frequency in kilohertz
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrTextureAlignment
-
-
-        Alignment requirement for textures
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGpuOverlap
-
-
-        Device can possibly copy memory and execute a kernel concurrently
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMultiProcessorCount
-
-
-        Number of multiprocessors on device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrKernelExecTimeout
-
-
-        Specifies whether there is a run time limit on kernels
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrIntegrated
-
-
-        Device is integrated with host memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrCanMapHostMemory
-
-
-        Device can map host memory into CUDA address space
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrComputeMode
-
-
-        Compute mode (See :py:obj:`~.cudaComputeMode` for details)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture1DWidth
-
-
-        Maximum 1D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DWidth
-
-
-        Maximum 2D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DHeight
-
-
-        Maximum 2D texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidth
-
-
-        Maximum 3D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeight
-
-
-        Maximum 3D texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepth
-
-
-        Maximum 3D texture depth
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredWidth
-
-
-        Maximum 2D layered texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredHeight
-
-
-        Maximum 2D layered texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredLayers
-
-
-        Maximum layers in a 2D layered texture
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrSurfaceAlignment
-
-
-        Alignment requirement for surfaces
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrConcurrentKernels
-
-
-        Device can possibly execute multiple kernels concurrently
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrEccEnabled
-
-
-        Device has ECC support enabled
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrPciBusId
-
-
-        PCI bus ID of the device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrPciDeviceId
-
-
-        PCI device ID of the device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrTccDriver
-
-
-        Device is using TCC driver model
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMemoryClockRate
-
-
-        Peak memory clock frequency in kilohertz
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGlobalMemoryBusWidth
-
-
-        Global memory bus width in bits
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrL2CacheSize
-
-
-        Size of L2 cache in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerMultiProcessor
-
-
-        Maximum resident threads per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrAsyncEngineCount
-
-
-        Number of asynchronous engines
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrUnifiedAddressing
-
-
-        Device shares a unified address space with the host
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredWidth
-
-
-        Maximum 1D layered texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredLayers
-
-
-        Maximum layers in a 1D layered texture
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherWidth
-
-
-        Maximum 2D texture width if cudaArrayTextureGather is set
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherHeight
-
-
-        Maximum 2D texture height if cudaArrayTextureGather is set
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidthAlt
-
-
-        Alternate maximum 3D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeightAlt
-
-
-        Alternate maximum 3D texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepthAlt
-
-
-        Alternate maximum 3D texture depth
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrPciDomainId
-
-
-        PCI domain ID of the device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrTexturePitchAlignment
-
-
-        Pitch alignment requirement for textures
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapWidth
-
-
-        Maximum cubemap texture width/height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredWidth
-
-
-        Maximum cubemap layered texture width/height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredLayers
-
-
-        Maximum layers in a cubemap layered texture
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface1DWidth
-
-
-        Maximum 1D surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface2DWidth
-
-
-        Maximum 2D surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface2DHeight
-
-
-        Maximum 2D surface height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface3DWidth
-
-
-        Maximum 3D surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface3DHeight
-
-
-        Maximum 3D surface height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface3DDepth
-
-
-        Maximum 3D surface depth
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredWidth
-
-
-        Maximum 1D layered surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredLayers
-
-
-        Maximum layers in a 1D layered surface
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredWidth
-
-
-        Maximum 2D layered surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredHeight
-
-
-        Maximum 2D layered surface height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredLayers
-
-
-        Maximum layers in a 2D layered surface
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapWidth
-
-
-        Maximum cubemap surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredWidth
-
-
-        Maximum cubemap layered surface width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredLayers
-
-
-        Maximum layers in a cubemap layered surface
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLinearWidth
-
-
-        Maximum 1D linear texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearWidth
-
-
-        Maximum 2D linear texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearHeight
-
-
-        Maximum 2D linear texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearPitch
-
-
-        Maximum 2D linear texture pitch in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedWidth
-
-
-        Maximum mipmapped 2D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedHeight
-
-
-        Maximum mipmapped 2D texture height
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor
-
-
-        Major compute capability version number
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor
-
-
-        Minor compute capability version number
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTexture1DMipmappedWidth
-
-
-        Maximum mipmapped 1D texture width
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrStreamPrioritiesSupported
-
-
-        Device supports stream priorities
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGlobalL1CacheSupported
-
-
-        Device supports caching globals in L1
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrLocalL1CacheSupported
-
-
-        Device supports caching locals in L1
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerMultiprocessor
-
-
-        Maximum shared memory available per multiprocessor in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerMultiprocessor
-
-
-        Maximum number of 32-bit registers available per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrManagedMemory
-
-
-        Device can allocate managed memory on this system
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrIsMultiGpuBoard
-
-
-        Device is on a multi-GPU board
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMultiGpuBoardGroupID
-
-
-        Unique identifier for a group of devices on the same multi-GPU board
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrHostNativeAtomicSupported
-
-
-        Link between the device and the host supports native atomic operations
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrSingleToDoublePrecisionPerfRatio
-
-
-        Ratio of single precision performance (in floating-point operations per second) to double precision performance
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess
-
-
-        Device supports coherently accessing pageable memory without calling cudaHostRegister on it
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess
-
-
-        Device can coherently access managed memory concurrently with the CPU
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrComputePreemptionSupported
-
-
-        Device supports Compute Preemption
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrCanUseHostPointerForRegisteredMem
-
-
-        Device can access host registered memory at the same virtual address as the CPU
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved92
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved93
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved94
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrCooperativeLaunch
-
-
-        Device supports launching cooperative kernels via :py:obj:`~.cudaLaunchCooperativeKernel`
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrCooperativeMultiDeviceLaunch
-
-
-        Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlockOptin
-
-
-        The maximum optin shared memory per block. This value may vary by chip. See :py:obj:`~.cudaFuncSetAttribute`
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrCanFlushRemoteWrites
-
-
-        Device supports flushing of outstanding remote writes.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrHostRegisterSupported
-
-
-        Device supports host memory registration via :py:obj:`~.cudaHostRegister`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccessUsesHostPageTables
-
-
-        Device accesses pageable memory via the host's page tables.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrDirectManagedMemAccessFromHost
-
-
-        Host can directly access managed memory on the device without migration.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxBlocksPerMultiprocessor
-
-
-        Maximum number of blocks per multiprocessor
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxPersistingL2CacheSize
-
-
-        Maximum L2 persisting lines capacity setting in bytes.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxAccessPolicyWindowSize
-
-
-        Maximum value of :py:obj:`~.cudaAccessPolicyWindow.num_bytes`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReservedSharedMemoryPerBlock
-
-
-        Shared memory reserved by CUDA driver per block in bytes
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported
-
-
-        Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrHostRegisterReadOnlySupported
-
-
-        Device supports using the :py:obj:`~.cudaHostRegister` flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrTimelineSemaphoreInteropSupported
-
-
-        External timeline semaphore interop is supported on the device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMaxTimelineSemaphoreInteropSupported
-
-
-        Deprecated, External timeline semaphore interop is supported on the device
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported
-
-
-        Device supports using the :py:obj:`~.cudaMallocAsync` and :py:obj:`~.cudaMemPool` family of APIs
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMASupported
-
-
-        Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAFlushWritesOptions
-
-
-        The returned attribute shall be interpreted as a bitmask, where the individual bits are listed in the :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAWritesOrdering
-
-
-        GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` for the numerical values returned here.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMemoryPoolSupportedHandleTypes
-
-
-        Handle types supported with mempool based IPC
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrClusterLaunch
-
-
-        Indicates device supports cluster launch
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrDeferredMappingCudaArraySupported
-
-
-        Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved122
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved123
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved124
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrIpcEventSupport
-
-
-        Device supports IPC Events.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMemSyncDomainCount
-
-
-        Number of memory synchronization domains the device supports.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved127
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved128
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved129
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrNumaConfig
-
-
-        NUMA configuration of a device: value is of type :py:obj:`~.cudaDeviceNumaConfig` enum
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrNumaId
-
-
-        NUMA node ID of the GPU memory
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrReserved132
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMpsEnabled
-
-
-        Contexts created on this device will be shared via MPS
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrHostNumaId
-
-
-        NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrD3D12CigSupported
-
-
-        Device supports CIG with D3D12.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceAttr.cudaDevAttrMax
-
-.. autoclass:: cuda.bindings.runtime.cudaMemPoolAttr
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies
-
-
-        (value type = int) Allow cuMemAllocAsync to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies. (default enabled)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic
-
-
-        (value type = int) Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies
-
-
-        (value type = int) Allow cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by cuFreeAsync (default enabled).
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold
-
-
-        (value type = cuuint64_t) Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS. When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next call to stream, event or context synchronize. (default 0)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemCurrent
-
-
-        (value type = cuuint64_t) Amount of backing memory currently allocated for the mempool.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemHigh
-
-
-        (value type = cuuint64_t) High watermark of backing memory allocated for the mempool since the last time it was reset. High watermark can only be reset to zero.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemCurrent
-
-
-        (value type = cuuint64_t) Amount of memory from the pool that is currently in use by the application.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemHigh
-
-
-        (value type = cuuint64_t) High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only be reset to zero.
-
-.. autoclass:: cuda.bindings.runtime.cudaMemLocationType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemLocationType.cudaMemLocationTypeInvalid
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemLocationType.cudaMemLocationTypeDevice
-
-
-        Location is a device location, thus id is a device ordinal
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemLocationType.cudaMemLocationTypeHost
-
-
-        Location is host, id is ignored
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemLocationType.cudaMemLocationTypeHostNuma
-
-
-        Location is a host NUMA node, thus id is a host NUMA node id
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemLocationType.cudaMemLocationTypeHostNumaCurrent
-
-
-        Location is the host NUMA node closest to the current thread's CPU, id is ignored
-
-.. autoclass:: cuda.bindings.runtime.cudaMemAccessFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAccessFlags.cudaMemAccessFlagsProtNone
-
-
-        Default, make the address range not accessible
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAccessFlags.cudaMemAccessFlagsProtRead
-
-
-        Make the address range read accessible
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAccessFlags.cudaMemAccessFlagsProtReadWrite
-
-
-        Make the address range read-write accessible
-
-.. autoclass:: cuda.bindings.runtime.cudaMemAllocationType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationType.cudaMemAllocationTypeInvalid
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationType.cudaMemAllocationTypePinned
-
-
-        This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationType.cudaMemAllocationTypeMax
-
-.. autoclass:: cuda.bindings.runtime.cudaMemAllocationHandleType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationHandleType.cudaMemHandleTypeNone
-
-
-        Does not allow any export mechanism. >
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationHandleType.cudaMemHandleTypePosixFileDescriptor
-
-
-        Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32
-
-
-        Allows a Win32 NT handle to be used for exporting. (HANDLE)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32Kmt
-
-
-        Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaMemAllocationHandleType.cudaMemHandleTypeFabric
-
-
-        Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t)
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphMemAttributeType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemCurrent
-
-
-        (value type = cuuint64_t) Amount of memory, in bytes, currently associated with graphs.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemHigh
-
-
-        (value type = cuuint64_t) High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemCurrent
-
-
-        (value type = cuuint64_t) Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemHigh
-
-
-        (value type = cuuint64_t) High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator.
-
-.. autoclass:: cuda.bindings.runtime.cudaDeviceP2PAttr
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceP2PAttr.cudaDevP2PAttrPerformanceRank
-
-
-        A relative value indicating the performance of the link between two devices
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceP2PAttr.cudaDevP2PAttrAccessSupported
-
-
-        Peer access is enabled
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceP2PAttr.cudaDevP2PAttrNativeAtomicSupported
-
-
-        Native atomic operation over the link supported
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceP2PAttr.cudaDevP2PAttrCudaArrayAccessSupported
-
-
-        Accessing CUDA arrays over the link supported
-
-.. autoclass:: cuda.bindings.runtime.cudaExternalMemoryHandleType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd
-
-
-        Handle is an opaque file descriptor
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32
-
-
-        Handle is an opaque shared NT handle
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt
-
-
-        Handle is an opaque, globally shared handle
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap
-
-
-        Handle is a D3D12 heap object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource
-
-
-        Handle is a D3D12 committed resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource
-
-
-        Handle is a shared NT handle to a D3D11 resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt
-
-
-        Handle is a globally shared handle to a D3D11 resource
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf
-
-
-        Handle is an NvSciBuf object
-
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd
-
-
-        Handle is an opaque file descriptor
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32
-
-
-        Handle is an opaque shared NT handle
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
-
-
-        Handle is an opaque, globally shared handle
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence
-
-
-        Handle is a shared NT handle referencing a D3D12 fence object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence
-
-
-        Handle is a shared NT handle referencing a D3D11 fence object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync
-
-
-        Opaque handle to NvSciSync Object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex
-
-
-        Handle is a shared NT handle referencing a D3D11 keyed mutex object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt
-
-
-        Handle is a shared KMT handle referencing a D3D11 keyed mutex object
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd
-
-
-        Handle is an opaque handle file descriptor referencing a timeline semaphore
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32
-
-
-        Handle is an opaque handle file descriptor referencing a timeline semaphore
-
-.. autoclass:: cuda.bindings.runtime.cudaCGScope
-
-    .. autoattribute:: cuda.bindings.runtime.cudaCGScope.cudaCGScopeInvalid
-
-
-        Invalid cooperative group scope
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaCGScope.cudaCGScopeGrid
-
-
-        Scope represented by a grid_group
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaCGScope.cudaCGScopeMultiGrid
-
-
-        Scope represented by a multi_grid_group
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphConditionalHandleFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphConditionalHandleFlags.cudaGraphCondAssignDefault
-
-
-        Apply default handle value when graph is launched.
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphConditionalNodeType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphConditionalNodeType.cudaGraphCondTypeIf
-
-
-        Conditional 'if' Node. Body executed once if condition value is non-zero.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphConditionalNodeType.cudaGraphCondTypeWhile
-
-
-        Conditional 'while' Node. Body executed repeatedly while condition value is non-zero.
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphNodeType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeKernel
-
-
-        GPU kernel node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeMemcpy
-
-
-        Memcpy node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeMemset
-
-
-        Memset node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeHost
-
-
-        Host (executable) node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeGraph
-
-
-        Node which executes an embedded graph
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeEmpty
-
-
-        Empty (no-op) node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeWaitEvent
-
-
-        External event wait node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeEventRecord
-
-
-        External event record node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreSignal
-
-
-        External semaphore signal node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreWait
-
-
-        External semaphore wait node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeMemAlloc
-
-
-        Memory allocation node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeMemFree
-
-
-        Memory free node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeConditional
-
-
-        Conditional node                                    May be used to implement a conditional execution path or loop
-
-                                           inside of a graph. The graph(s) contained within the body of the conditional node
-
-                                           can be selectively executed or iterated upon based on the value of a conditional
-
-                                           variable.
-
-
-
-                                           Handles must be created in advance of creating the node
-
-                                           using :py:obj:`~.cudaGraphConditionalHandleCreate`.
-
-
-
-                                           The following restrictions apply to graphs which contain conditional nodes:
-
-                                             The graph cannot be used in a child node.
-
-                                             Only one instantiation of the graph may exist at any point in time.
-
-                                             The graph cannot be cloned.
-
-
-
-                                           To set the control value, supply a default value when creating the handle and/or
-
-                                           call :py:obj:`~.cudaGraphSetConditional` from device code.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphNodeType.cudaGraphNodeTypeCount
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphDependencyType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDependencyType.cudaGraphDependencyTypeDefault
-
-
-        This is an ordinary dependency.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDependencyType.cudaGraphDependencyTypeProgrammatic
-
-
-        This dependency type allows the downstream node to use `cudaGridDependencySynchronize()`. It may only be used between kernel nodes, and must be used with either the :py:obj:`~.cudaGraphKernelNodePortProgrammatic` or :py:obj:`~.cudaGraphKernelNodePortLaunchCompletion` outgoing port.
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphExecUpdateResult
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateSuccess
-
-
-        The update succeeded
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateError
-
-
-        The update failed for an unexpected reason which is described in the return value of the function
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorTopologyChanged
-
-
-        The update failed because the topology changed
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNodeTypeChanged
-
-
-        The update failed because a node type changed
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorFunctionChanged
-
-
-        The update failed because the function of a kernel node changed (CUDA driver < 11.2)
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorParametersChanged
-
-
-        The update failed because the parameters changed in a way that is not supported
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNotSupported
-
-
-        The update failed because something about the node is not supported
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorUnsupportedFunctionChange
-
-
-        The update failed because the function of a kernel node changed in an unsupported way
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorAttributesChanged
-
-
-        The update failed because the node attributes changed in a way that is not supported
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphInstantiateResult
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateResult.cudaGraphInstantiateSuccess
-
-
-        Instantiation succeeded
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateResult.cudaGraphInstantiateError
-
-
-        Instantiation failed for an unexpected reason which is described in the return value of the function
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateResult.cudaGraphInstantiateInvalidStructure
-
-
-        Instantiation failed due to invalid structure, such as cycles
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateResult.cudaGraphInstantiateNodeOperationNotSupported
-
-
-        Instantiation for device launch failed because the graph contained an unsupported operation
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateResult.cudaGraphInstantiateMultipleDevicesNotSupported
-
-
-        Instantiation for device launch failed due to the nodes belonging to different contexts
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphKernelNodeField
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldInvalid
-
-
-        Invalid field
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldGridDim
-
-
-        Grid dimension update
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldParam
-
-
-        Kernel parameter update
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldEnabled
-
-
-        Node enable/disable
-
-.. autoclass:: cuda.bindings.runtime.cudaGetDriverEntryPointFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGetDriverEntryPointFlags.cudaEnableDefault
-
-
-        Default search mode for driver symbols.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGetDriverEntryPointFlags.cudaEnableLegacyStream
-
-
-        Search for legacy versions of driver symbols.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGetDriverEntryPointFlags.cudaEnablePerThreadDefaultStream
-
-
-        Search for per-thread versions of driver symbols.
-
-.. autoclass:: cuda.bindings.runtime.cudaDriverEntryPointQueryResult
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSuccess
-
-
-        Search for symbol found a match
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSymbolNotFound
-
-
-        Search for symbol was not found
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointVersionNotSufficent
-
-
-        Search for symbol was found but version wasn't great enough
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphDebugDotFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsVerbose
-
-
-        Output all debug data as if every debug flag is enabled
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeParams
-
-
-        Adds :py:obj:`~.cudaKernelNodeParams` to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemcpyNodeParams
-
-
-        Adds :py:obj:`~.cudaMemcpy3DParms` to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemsetNodeParams
-
-
-        Adds :py:obj:`~.cudaMemsetParams` to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHostNodeParams
-
-
-        Adds :py:obj:`~.cudaHostNodeParams` to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsEventNodeParams
-
-
-        Adds cudaEvent_t handle from record and wait nodes to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasSignalNodeParams
-
-
-        Adds :py:obj:`~.cudaExternalSemaphoreSignalNodeParams` values to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasWaitNodeParams
-
-
-        Adds :py:obj:`~.cudaExternalSemaphoreWaitNodeParams` to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeAttributes
-
-
-        Adds cudaKernelNodeAttrID values to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHandles
-
-
-        Adds node handles and every kernel function handle to output
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsConditionalNodeParams
-
-
-        Adds :py:obj:`~.cudaConditionalNodeParams` to output
-
-.. autoclass:: cuda.bindings.runtime.cudaGraphInstantiateFlags
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagAutoFreeOnLaunch
-
-
-        Automatically free memory allocated in a graph before relaunching.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUpload
-
-
-        Automatically upload the graph after instantiation. Only supported by 
-
-         :py:obj:`~.cudaGraphInstantiateWithParams`. The upload will be performed using the 
-
-         stream provided in `instantiateParams`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagDeviceLaunch
-
-
-        Instantiate the graph to be launchable from the device. This flag can only 
-
-         be used on platforms which support unified addressing. This flag cannot be 
-
-         used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUseNodePriority
-
-
-        Run the graph using the per-node priority attributes rather than the priority of the stream it is launched into.
-
-.. autoclass:: cuda.bindings.runtime.cudaLaunchMemSyncDomain
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainDefault
-
-
-        Launch kernels in the default domain
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainRemote
-
-
-        Launch kernels in the remote domain
-
-.. autoclass:: cuda.bindings.runtime.cudaLaunchAttributeID
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore
-
-
-        Ignored entry, for convenient composition
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy
-
-
-        Valid for streams. See :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference
-
-
-        Valid for graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization
-
-
-        Valid for launches. Setting :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed` to non-0 signals that the kernel will use programmatic means to resolve its stream dependency, so that the CUDA runtime should opportunistically allow the grid's execution to overlap with the previous kernel in the stream, if that kernel requests the overlap. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions).
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent
-
-
-        Valid for launches. Set :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the event. Event recorded through this launch attribute is guaranteed to only trigger after all block in the associated kernel trigger the event. A block can trigger the event programmatically in a future CUDA release. A trigger can also be inserted at the beginning of each block's execution if triggerAtBlockStart is set to non-0. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions). Note that dependents (including the CPU thread calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to observe the release precisely when it is released. For example, :py:obj:`~.cudaEventSynchronize()` may only observe the event trigger long after the associated kernel has completed. This recording type is primarily meant for establishing programmatic dependency between device tasks. Note also this type of dependency allows, but does not guarantee, concurrent execution of tasks. 
-
-         The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the :py:obj:`~.cudaEventDisableTiming` flag set).
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributePriority
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.priority`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain
-
-
-        Valid for streams, graph nodes, launches. See :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent
-
-
-        Valid for launches. Set :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record the event. 
-
-         Nominally, the event is triggered once all blocks of the kernel have begun execution. Currently this is a best effort. If a kernel B has a launch completion dependency on a kernel A, B may wait until A is complete. Alternatively, blocks of B may begin before all blocks of A have begun, for example if B can claim execution resources unavailable to A (e.g. they run on different GPUs) or if B is a higher priority than A. Exercise caution if such an ordering inversion could lead to deadlock. 
-
-         A launch completion event is nominally similar to a programmatic event with `triggerAtBlockStart` set except that it is not visible to `cudaGridDependencySynchronize()` and can be used with compute capability less than 9.0. 
-
-         The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the :py:obj:`~.cudaEventDisableTiming` flag set).
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode
-
-
-        Valid for graph nodes, launches. This attribute is graphs-only, and passing it to a launch in a non-capturing stream will result in an error. 
-
-         :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable can only be set to 0 or 1. Setting the field to 1 indicates that the corresponding kernel node should be device-updatable. On success, a handle will be returned via :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode which can be passed to the various device-side update functions to update the node's kernel parameters from within another kernel. For more information on the types of device updates that can be made, as well as the relevant limitations thereof, see :py:obj:`~.cudaGraphKernelNodeUpdatesApply`. 
-
-         Nodes which are device-updatable have additional restrictions compared to regular kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to this functionality, a node cannot opt out, and any attempt to set the deviceUpdatable attribute to 0 will result in an error. Device-updatable kernel nodes also cannot have their attributes copied to/from another kernel node via :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one or more device-updatable nodes also do not allow multiple instantiation, and neither the graph nor its instantiated version can be passed to :py:obj:`~.cudaGraphExecUpdate`. 
-
-         If a graph contains device-updatable nodes and updates those nodes from the device from within the graph, the graph must be uploaded with :py:obj:`~.cuGraphUpload` before it is launched. For such a graph, if host-side executable graph updates are made to the device-updatable nodes, the graph must be uploaded before it is launched again.
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout
-
-
-        Valid for launches. On devices where the L1 cache and shared memory use the same hardware resources, setting :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a percentage between 0-100 signals sets the shared memory carveout preference in percent of the total shared memory for that kernel launch. This attribute takes precedence over :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is only a hint, and the driver can choose a different configuration if required for the launch.
-
-.. autoclass:: cuda.bindings.runtime.cudaDeviceNumaConfig
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNone
-
-
-        The GPU is not a NUMA node
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNumaNode
-
-
-        The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID
-
-.. autoclass:: cuda.bindings.runtime.cudaAsyncNotificationType
-
-    .. autoattribute:: cuda.bindings.runtime.cudaAsyncNotificationType.cudaAsyncNotificationTypeOverBudget
-
-.. autoclass:: cuda.bindings.runtime.cudaSurfaceBoundaryMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSurfaceBoundaryMode.cudaBoundaryModeZero
-
-
-        Zero boundary mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSurfaceBoundaryMode.cudaBoundaryModeClamp
-
-
-        Clamp boundary mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSurfaceBoundaryMode.cudaBoundaryModeTrap
-
-
-        Trap boundary mode
-
-.. autoclass:: cuda.bindings.runtime.cudaSurfaceFormatMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSurfaceFormatMode.cudaFormatModeForced
-
-
-        Forced format mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaSurfaceFormatMode.cudaFormatModeAuto
-
-
-        Auto format mode
-
-.. autoclass:: cuda.bindings.runtime.cudaTextureAddressMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureAddressMode.cudaAddressModeWrap
-
-
-        Wrapping address mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureAddressMode.cudaAddressModeClamp
-
-
-        Clamp to edge address mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureAddressMode.cudaAddressModeMirror
-
-
-        Mirror address mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureAddressMode.cudaAddressModeBorder
-
-
-        Border address mode
-
-.. autoclass:: cuda.bindings.runtime.cudaTextureFilterMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureFilterMode.cudaFilterModePoint
-
-
-        Point filter mode
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureFilterMode.cudaFilterModeLinear
-
-
-        Linear filter mode
-
-.. autoclass:: cuda.bindings.runtime.cudaTextureReadMode
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureReadMode.cudaReadModeElementType
-
-
-        Read texture as specified element type
-
-
-    .. autoattribute:: cuda.bindings.runtime.cudaTextureReadMode.cudaReadModeNormalizedFloat
-
-
-        Read texture as normalized float
-
-.. autoclass:: cuda.bindings.runtime.cudaEglPlaneDesc
-.. autoclass:: cuda.bindings.runtime.cudaEglFrame
-.. autoclass:: cuda.bindings.runtime.cudaEglStreamConnection
-.. autoclass:: cuda.bindings.runtime.cudaArray_t
-.. autoclass:: cuda.bindings.runtime.cudaArray_const_t
-.. autoclass:: cuda.bindings.runtime.cudaMipmappedArray_t
-.. autoclass:: cuda.bindings.runtime.cudaMipmappedArray_const_t
-.. autoclass:: cuda.bindings.runtime.cudaHostFn_t
-.. autoclass:: cuda.bindings.runtime.CUuuid
-.. autoclass:: cuda.bindings.runtime.cudaUUID_t
-.. autoclass:: cuda.bindings.runtime.cudaIpcEventHandle_t
-.. autoclass:: cuda.bindings.runtime.cudaIpcMemHandle_t
-.. autoclass:: cuda.bindings.runtime.cudaMemFabricHandle_t
-.. autoclass:: cuda.bindings.runtime.cudaStream_t
-.. autoclass:: cuda.bindings.runtime.cudaEvent_t
-.. autoclass:: cuda.bindings.runtime.cudaGraphicsResource_t
-.. autoclass:: cuda.bindings.runtime.cudaExternalMemory_t
-.. autoclass:: cuda.bindings.runtime.cudaExternalSemaphore_t
-.. autoclass:: cuda.bindings.runtime.cudaGraph_t
-.. autoclass:: cuda.bindings.runtime.cudaGraphNode_t
-.. autoclass:: cuda.bindings.runtime.cudaUserObject_t
-.. autoclass:: cuda.bindings.runtime.cudaGraphConditionalHandle
-.. autoclass:: cuda.bindings.runtime.cudaFunction_t
-.. autoclass:: cuda.bindings.runtime.cudaKernel_t
-.. autoclass:: cuda.bindings.runtime.cudaMemPool_t
-.. autoclass:: cuda.bindings.runtime.cudaGraphEdgeData
-.. autoclass:: cuda.bindings.runtime.cudaGraphExec_t
-.. autoclass:: cuda.bindings.runtime.cudaGraphInstantiateParams
-.. autoclass:: cuda.bindings.runtime.cudaGraphExecUpdateResultInfo
-.. autoclass:: cuda.bindings.runtime.cudaGraphDeviceNode_t
-.. autoclass:: cuda.bindings.runtime.cudaLaunchMemSyncDomainMap
-.. autoclass:: cuda.bindings.runtime.cudaLaunchAttributeValue
-.. autoclass:: cuda.bindings.runtime.cudaLaunchAttribute
-.. autoclass:: cuda.bindings.runtime.cudaAsyncCallbackHandle_t
-.. autoclass:: cuda.bindings.runtime.cudaAsyncNotificationInfo_t
-.. autoclass:: cuda.bindings.runtime.cudaAsyncCallback
-.. autoclass:: cuda.bindings.runtime.cudaSurfaceObject_t
-.. autoclass:: cuda.bindings.runtime.cudaTextureObject_t
-.. autoattribute:: cuda.bindings.runtime.CUDA_EGL_MAX_PLANES
-
-    Maximum number of planes per frame
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostAllocDefault
-
-    Default page-locked allocation flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostAllocPortable
-
-    Pinned memory accessible by all CUDA contexts
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostAllocMapped
-
-    Map allocation into device space
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostAllocWriteCombined
-
-    Write-combined memory
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostRegisterDefault
-
-    Default host memory registration flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostRegisterPortable
-
-    Pinned memory accessible by all CUDA contexts
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostRegisterMapped
-
-    Map registered memory into device space
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostRegisterIoMemory
-
-    Memory-mapped I/O space
-
-.. autoattribute:: cuda.bindings.runtime.cudaHostRegisterReadOnly
-
-    Memory-mapped read-only
-
-.. autoattribute:: cuda.bindings.runtime.cudaPeerAccessDefault
-
-    Default peer addressing enable flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaStreamDefault
-
-    Default stream flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaStreamNonBlocking
-
-    Stream does not synchronize with stream 0 (the NULL stream)
-
-.. autoattribute:: cuda.bindings.runtime.cudaStreamLegacy
-
-    Legacy stream handle
-
-
-
-    Stream handle that can be passed as a cudaStream_t to use an implicit stream with legacy synchronization behavior.
-
-
-
-    See details of the \link_sync_behavior
-
-.. autoattribute:: cuda.bindings.runtime.cudaStreamPerThread
-
-    Per-thread stream handle
-
-
-
-    Stream handle that can be passed as a cudaStream_t to use an implicit stream with per-thread synchronization behavior.
-
-
-
-    See details of the \link_sync_behavior
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventDefault
-
-    Default event flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventBlockingSync
-
-    Event uses blocking synchronization
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventDisableTiming
-
-    Event will not record timing data
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventInterprocess
-
-    Event is suitable for interprocess use. cudaEventDisableTiming must be set
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventRecordDefault
-
-    Default event record flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventRecordExternal
-
-    Event is captured in the graph as an external event node when performing stream capture
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventWaitDefault
-
-    Default event wait flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaEventWaitExternal
-
-    Event is captured in the graph as an external event node when performing stream capture
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceScheduleAuto
-
-    Device flag - Automatic scheduling
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceScheduleSpin
-
-    Device flag - Spin default scheduling
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceScheduleYield
-
-    Device flag - Yield default scheduling
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceScheduleBlockingSync
-
-    Device flag - Use blocking synchronization
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceBlockingSync
-
-    Device flag - Use blocking synchronization [Deprecated]
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceScheduleMask
-
-    Device schedule flags mask
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceMapHost
-
-    Device flag - Support mapped pinned allocations
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceLmemResizeToMax
-
-    Device flag - Keep local memory allocation after launch
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceSyncMemops
-
-    Device flag - Ensure synchronous memory operations on this context will synchronize
-
-.. autoattribute:: cuda.bindings.runtime.cudaDeviceMask
-
-    Device flags mask
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayDefault
-
-    Default CUDA array allocation flag
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayLayered
-
-    Must be set in cudaMalloc3DArray to create a layered CUDA array
-
-.. autoattribute:: cuda.bindings.runtime.cudaArraySurfaceLoadStore
-
-    Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind surfaces to the CUDA array
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayCubemap
-
-    Must be set in cudaMalloc3DArray to create a cubemap CUDA array
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayTextureGather
-
-    Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform texture gather operations on the CUDA array
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayColorAttachment
-
-    Must be set in cudaExternalMemoryGetMappedMipmappedArray if the mipmapped array is used as a color target in a graphics API
-
-.. autoattribute:: cuda.bindings.runtime.cudaArraySparse
-
-    Must be set in cudaMallocArray, cudaMalloc3DArray or cudaMallocMipmappedArray in order to create a sparse CUDA array or CUDA mipmapped array
-
-.. autoattribute:: cuda.bindings.runtime.cudaArrayDeferredMapping
-
-    Must be set in cudaMallocArray, cudaMalloc3DArray or cudaMallocMipmappedArray in order to create a deferred mapping CUDA array or CUDA mipmapped array
-
-.. autoattribute:: cuda.bindings.runtime.cudaIpcMemLazyEnablePeerAccess
-
-    Automatically enable peer access between remote devices as needed
-
-.. autoattribute:: cuda.bindings.runtime.cudaMemAttachGlobal
-
-    Memory can be accessed by any stream on any device
-
-.. autoattribute:: cuda.bindings.runtime.cudaMemAttachHost
-
-    Memory cannot be accessed by any stream on any device
-
-.. autoattribute:: cuda.bindings.runtime.cudaMemAttachSingle
-
-    Memory can only be accessed by a single stream on the associated device
-
-.. autoattribute:: cuda.bindings.runtime.cudaOccupancyDefault
-
-    Default behavior
-
-.. autoattribute:: cuda.bindings.runtime.cudaOccupancyDisableCachingOverride
-
-    Assume global caching is enabled and cannot be automatically turned off
-
-.. autoattribute:: cuda.bindings.runtime.cudaCpuDeviceId
-
-    Device id that represents the CPU
-
-.. autoattribute:: cuda.bindings.runtime.cudaInvalidDeviceId
-
-    Device id that represents an invalid device
-
-.. autoattribute:: cuda.bindings.runtime.cudaInitDeviceFlagsAreValid
-
-    Tell the CUDA runtime that DeviceFlags is being set in cudaInitDevice call
-
-.. autoattribute:: cuda.bindings.runtime.cudaCooperativeLaunchMultiDeviceNoPreSync
-
-    If set, each kernel launched as part of :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` only waits for prior work in the stream corresponding to that GPU to complete before the kernel begins execution.
-
-.. autoattribute:: cuda.bindings.runtime.cudaCooperativeLaunchMultiDeviceNoPostSync
-
-    If set, any subsequent work pushed in a stream that participated in a call to :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` will only wait for the kernel launched on the GPU corresponding to that stream to complete before it begins execution.
-
-.. autoattribute:: cuda.bindings.runtime.cudaArraySparsePropertiesSingleMipTail
-
-    Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers
-
-.. autoattribute:: cuda.bindings.runtime.CUDART_CB
-.. autoattribute:: cuda.bindings.runtime.CU_UUID_HAS_BEEN_DEFINED
-
-    CUDA UUID types
-
-.. autoattribute:: cuda.bindings.runtime.CUDA_IPC_HANDLE_SIZE
-
-    CUDA IPC Handle Size
-
-.. autoattribute:: cuda.bindings.runtime.cudaExternalMemoryDedicated
-
-    Indicates that the external memory object is a dedicated resource
-
-.. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreSignalSkipNvSciBufMemSync
-
-    When the /p flags parameter of :py:obj:`~.cudaExternalSemaphoreSignalParams` contains this flag, it indicates that signaling an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
-
-.. autoattribute:: cuda.bindings.runtime.cudaExternalSemaphoreWaitSkipNvSciBufMemSync
-
-    When the /p flags parameter of :py:obj:`~.cudaExternalSemaphoreWaitParams` contains this flag, it indicates that waiting an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
-
-.. autoattribute:: cuda.bindings.runtime.cudaNvSciSyncAttrSignal
-
-    When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to this, it indicates that application need signaler specific NvSciSyncAttr to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
-
-.. autoattribute:: cuda.bindings.runtime.cudaNvSciSyncAttrWait
-
-    When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to this, it indicates that application need waiter specific NvSciSyncAttr to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
-
-.. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodePortDefault
-
-    This port activates when the kernel has finished executing.
-
-.. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodePortProgrammatic
-
-    This port activates when all blocks of the kernel have performed cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be used with edge type :py:obj:`~.cudaGraphDependencyTypeProgrammatic`. See also :py:obj:`~.cudaLaunchAttributeProgrammaticEvent`.
-
-.. autoattribute:: cuda.bindings.runtime.cudaGraphKernelNodePortLaunchCompletion
-
-    This port activates when all blocks of the kernel have begun execution. See also :py:obj:`~.cudaLaunchAttributeLaunchCompletionEvent`.
-
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttrID
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttributeAccessPolicyWindow
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttributeSynchronizationPolicy
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttributeMemSyncDomainMap
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttributeMemSyncDomain
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttributePriority
-.. autoattribute:: cuda.bindings.runtime.cudaStreamAttrValue
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttrID
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeAccessPolicyWindow
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeCooperative
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributePriority
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeClusterDimension
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeClusterSchedulingPolicyPreference
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeMemSyncDomainMap
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeMemSyncDomain
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributePreferredSharedMemoryCarveout
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttributeDeviceUpdatableKernelNode
-.. autoattribute:: cuda.bindings.runtime.cudaKernelNodeAttrValue
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceType1D
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceType2D
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceType3D
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceTypeCubemap
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceType1DLayered
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceType2DLayered
-.. autoattribute:: cuda.bindings.runtime.cudaSurfaceTypeCubemapLayered
-.. autoattribute:: cuda.bindings.runtime.cudaTextureType1D
-.. autoattribute:: cuda.bindings.runtime.cudaTextureType2D
-.. autoattribute:: cuda.bindings.runtime.cudaTextureType3D
-.. autoattribute:: cuda.bindings.runtime.cudaTextureTypeCubemap
-.. autoattribute:: cuda.bindings.runtime.cudaTextureType1DLayered
-.. autoattribute:: cuda.bindings.runtime.cudaTextureType2DLayered
-.. autoattribute:: cuda.bindings.runtime.cudaTextureTypeCubemapLayered
diff --git a/docs_src/source/motivation.md b/docs_src/source/motivation.md
deleted file mode 100644
index 5b8879f2..00000000
--- a/docs_src/source/motivation.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Motivation
-## What is CUDA Python?
-
-NVIDIA’s CUDA Python provides [Cython](https://cython.org/) bindings and Python
-wrappers for the driver and runtime API for existing toolkits and libraries to
-simplify GPU-based accelerated processing. Python is one of the most popular
-programming languages for science, engineering, data analytics, and deep 
-learning applications.  The goal of CUDA Python is to unify
-the Python ecosystem with a single set of interfaces that provide full coverage
-of and access to the CUDA host APIs from Python.
-
-## Why CUDA Python?
-
-CUDA Python provides uniform APIs and bindings for inclusion into existing
-toolkits and libraries to simplify GPU-based parallel processing for HPC, data
-science, and AI.
-
-[Numba](https://numba.pydata.org/), a Python compiler from
-[Anaconda](https://www.anaconda.com/) that can compile Python code for execution
-on CUDA-capable GPUs, provides Python developers with an easy entry into
-GPU-accelerated computing and a path for using increasingly sophisticated CUDA
-code with a minimum of new syntax and jargon. Numba has its own CUDA driver API 
-bindings that can now be replaced with CUDA Python. With CUDA Python and Numba, 
-you get the best of both worlds: rapid iterative development with Python and the
-speed of a compiled language targeting both CPUs and NVIDIA GPUs.
-
-[CuPy](https://cupy.dev/) is a
-[NumPy](https://numpy.org/)/[SciPy](https://www.scipy.org/) compatible Array
-library, from [Preferred Networks](https://www.preferred.jp/en/), for
-GPU-accelerated computing with Python. CUDA Python simplifies the CuPy build 
-and allows for a faster and smaller memory footprint when importing the CuPy 
-Python module. In the future, when more CUDA Toolkit libraries are supported, 
-CuPy will have a lighter maintenance overhead and have fewer wheels to 
-release. Users benefit from a faster CUDA runtime!
-
-Our goal is to help unify the Python CUDA ecosystem with a single standard set
-of interfaces, providing full coverage of, and access to, the CUDA host APIs
-from Python. We want to provide a foundation for the ecosystem to build on top
-of in unison to allow composing different accelerated libraries together to
-solve the problems at hand. We also want to lower the barrier to entry for
-Python developers to utilize NVIDIA GPUs.
diff --git a/docs_src/source/overview.md b/docs_src/source/overview.md
deleted file mode 100644
index 155be761..00000000
--- a/docs_src/source/overview.md
+++ /dev/null
@@ -1,321 +0,0 @@
----
-jupytext:
-  text_representation:
-    format_name: myst
-kernelspec:
-  display_name: Python 3
-  name: python3
----
-
-# Overview
-
-<p style="font-size: 14px; color: grey; text-align: right;">by <a
-href="https://developer.nvidia.com/blog/author/mnicely/">Matthew Nicely</a></p>
-
-Python plays a key role within the science, engineering, data analytics, and
-deep learning application ecosystem. NVIDIA has long been committed to helping
-the Python ecosystem leverage the accelerated massively parallel performance of
-GPUs to deliver standardized libraries, tools, and applications. Today, we’re
-introducing another step towards simplification of the developer experience with
-improved Python code portability and compatibility.
-
-Our goal is to help unify the Python CUDA ecosystem with a single standard set
-of low-level interfaces, providing full coverage of and access to the CUDA host
-APIs from Python. We want to provide an ecosystem foundation to allow
-interoperability among different accelerated libraries. Most importantly, it
-should be easy for Python developers to use NVIDIA GPUs.
-
-## CUDA Python workflow
-
-Because Python is an interpreted language, you need a way to compile the device
-code into
-[PTX](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html) and
-then extract the function to be called at a later point in the application. It’s
-not important for understanding CUDA Python, but Parallel Thread Execution (PTX)
-is a low-level virtual machine and instruction set architecture (ISA). You
-construct your device code in the form of a string and compile it with
-[NVRTC](http://docs.nvidia.com/cuda/nvrtc/index.html), a runtime compilation
-library for CUDA C++. Using the NVIDIA [Driver
-API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html), manually create a
-CUDA context and all required resources on the GPU, then launch the compiled
-CUDA C++ code and retrieve the results from the GPU. Now that you have an
-overview, jump into a commonly used example for parallel programming:
-[SAXPY](https://developer.nvidia.com/blog/six-ways-saxpy/).
-
-The first thing to do is import the [Driver
-API](https://docs.nvidia.com/cuda/cuda-driver-api/index.html) and
-[NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html) modules from the CUDA
-Python package. In this example, you copy data from the host to device. You need
-[NumPy](https://numpy.org/doc/stable/contents.html) to store data on the host.
-
-```{code-cell} python
-from cuda.bindings import driver, nvrtc
-import numpy as np
-```
-
-Error checking is a fundamental best practice in code development and a code
-example is provided.
-In a future release, this may automatically raise exceptions using a Python
-object model.
-
-```{code-cell} python
-def _cudaGetErrorEnum(error):
-    if isinstance(error, driver.CUresult):
-        err, name = driver.cuGetErrorName(error)
-        return name if err == driver.CUresult.CUDA_SUCCESS else "<unknown>"
-    elif isinstance(error, nvrtc.nvrtcResult):
-        return nvrtc.nvrtcGetErrorString(error)[1]
-    else:
-        raise RuntimeError('Unknown error type: {}'.format(error))
-
-def checkCudaErrors(result):
-    if result[0].value:
-        raise RuntimeError("CUDA error code={}({})".format(result[0].value, _cudaGetErrorEnum(result[0])))
-    if len(result) == 1:
-        return None
-    elif len(result) == 2:
-        return result[1]
-    else:
-        return result[1:]
-```
-
-It’s common practice to write CUDA kernels near the top of a translation unit,
-so write it next. The entire kernel is wrapped in triple quotes to form a
-string. The string is compiled later using NVRTC. This is the only part of CUDA
-Python that requires some understanding of CUDA C++. For more information, see
-[An Even Easier Introduction to
-CUDA](https://developer.nvidia.com/blog/even-easier-introduction-cuda/).
-
-```{code-cell} python
-saxpy = """\
-extern "C" __global__
-void saxpy(float a, float *x, float *y, float *out, size_t n)
-{
- size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
- if (tid < n) {
-   out[tid] = a * x[tid] + y[tid];
- }
-}
-"""
-```
-Go ahead and compile the kernel into PTX. Remember that this is executed at runtime using NVRTC. There are three basic steps to NVRTC:
-
-- Create a program from the string.
-- Compile the program.
-- Extract PTX from the compiled program.
-
-In the following code example, the Driver API is initialized so that the NVIDIA driver
-and GPU are accessible. Next, the GPU is queried for their compute capability. Finally,
-the program is compiled to target our local compute capability architecture with FMAD enabled.
-
-```{code-cell} python
-# Initialize CUDA Driver API
-checkCudaErrors(driver.cuInit(0))
-
-# Retrieve handle for device 0
-cuDevice = checkCudaErrors(driver.cuDeviceGet(0))
-
-# Derive target architecture for device 0
-major = checkCudaErrors(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice))
-minor = checkCudaErrors(driver.cuDeviceGetAttribute(driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice))
-arch_arg = bytes(f'--gpu-architecture=compute_{major}{minor}', 'ascii')
-
-# Create program
-prog = checkCudaErrors(nvrtc.nvrtcCreateProgram(str.encode(saxpy), b"saxpy.cu", 0, [], []))
-
-# Compile program
-opts = [b"--fmad=false", arch_arg]
-checkCudaErrors(nvrtc.nvrtcCompileProgram(prog, 2, opts))
-
-# Get PTX from compilation
-ptxSize = checkCudaErrors(nvrtc.nvrtcGetPTXSize(prog))
-ptx = b" " * ptxSize
-checkCudaErrors(nvrtc.nvrtcGetPTX(prog, ptx))
-```
-
-Before you can use the PTX or do any work on the GPU, you must create a CUDA
-context. CUDA contexts are analogous to host processes for the device. In the
-following code example, a handle for compute device 0 is passed to
-`cuCtxCreate` to designate that GPU for context creation.
-
-```{code-cell} python
-# Create context
-context = checkCudaErrors(driver.cuCtxCreate(0, cuDevice))
-```
-
-With a CUDA context created on device 0, load the PTX generated earlier into a
-module. A module is analogous to dynamically loaded libraries for the device.
-After loading into the module, extract a specific kernel with
-`cuModuleGetFunction`. It is not uncommon for multiple kernels to reside in PTX.
-
-```{code-cell} python
-# Load PTX as module data and retrieve function
-ptx = np.char.array(ptx)
-# Note: Incompatible --gpu-architecture would be detected here
-module = checkCudaErrors(driver.cuModuleLoadData(ptx.ctypes.data))
-kernel = checkCudaErrors(driver.cuModuleGetFunction(module, b"saxpy"))
-```
-
-Next, get all your data prepared and transferred to the GPU. For increased
-application performance, you can input data on the device to eliminate data
-transfers. For completeness, this example shows how you would transfer data to
-and from the device.
-
-```{code-cell} python
-NUM_THREADS = 512  # Threads per block
-NUM_BLOCKS = 32768  # Blocks per grid
-
-a = np.array([2.0], dtype=np.float32)
-n = np.array(NUM_THREADS * NUM_BLOCKS, dtype=np.uint32)
-bufferSize = n * a.itemsize
-
-hX = np.random.rand(n).astype(dtype=np.float32)
-hY = np.random.rand(n).astype(dtype=np.float32)
-hOut = np.zeros(n).astype(dtype=np.float32)
-```
-
-With the input data `a`, `x`, and `y` created for the SAXPY transform device,
-resources must be allocated to store the data using `cuMemAlloc`. To allow for
-more overlap between compute and data movement, use the asynchronous function
-`cuMemcpyHtoDAsync`. It returns control to the CPU immediately following command
-execution.
-
-Python doesn’t have a natural concept of pointers, yet `cuMemcpyHtoDAsync` expects
-`void*`. Therefore, `XX.ctypes.data` retrieves the pointer value associated with
-XX.
-
-```{code-cell} python
-dXclass = checkCudaErrors(driver.cuMemAlloc(bufferSize))
-dYclass = checkCudaErrors(driver.cuMemAlloc(bufferSize))
-dOutclass = checkCudaErrors(driver.cuMemAlloc(bufferSize))
-
-stream = checkCudaErrors(driver.cuStreamCreate(0))
-
-checkCudaErrors(driver.cuMemcpyHtoDAsync(
-   dXclass, hX.ctypes.data, bufferSize, stream
-))
-checkCudaErrors(driver.cuMemcpyHtoDAsync(
-   dYclass, hY.ctypes.data, bufferSize, stream
-))
-```
-
-With data prep and resources allocation finished, the kernel is ready to be
-launched. To pass the location of the data on the device to the kernel execution
-configuration, you must retrieve the device pointer. In the following code
-example, `int(dXclass)` retries the pointer value of `dXclass`, which is
-`CUdeviceptr`, and assigns a memory size to store this value using `np.array`.
-
-Like `cuMemcpyHtoDAsync`, `cuLaunchKernel` expects `void**` in the argument list. In
-the earlier code example, it creates `void**` by grabbing the `void*` value of each
-individual argument and placing them into its own contiguous memory.
-
-```{code-cell} python
-# The following code example is not intuitive 
-# Subject to change in a future release
-dX = np.array([int(dXclass)], dtype=np.uint64)
-dY = np.array([int(dYclass)], dtype=np.uint64)
-dOut = np.array([int(dOutclass)], dtype=np.uint64)
-
-args = [a, dX, dY, dOut, n]
-args = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
-```
-
-Now the kernel can be launched:
-
-```{code-cell} python
-checkCudaErrors(driver.cuLaunchKernel(
-   kernel,
-   NUM_BLOCKS,  # grid x dim
-   1,  # grid y dim
-   1,  # grid z dim
-   NUM_THREADS,  # block x dim
-   1,  # block y dim
-   1,  # block z dim
-   0,  # dynamic shared memory
-   stream,  # stream
-   args.ctypes.data,  # kernel arguments
-   0,  # extra (ignore)
-))
-
-checkCudaErrors(driver.cuMemcpyDtoHAsync(
-   hOut.ctypes.data, dOutclass, bufferSize, stream
-))
-checkCudaErrors(driver.cuStreamSynchronize(stream))
-```
-
-The `cuLaunchKernel` function takes the compiled module kernel and execution
-configuration parameters. The device code is launched in the same stream as the
-data transfers. That ensures that the kernel’s compute is performed only after
-the data has finished transfer, as all API calls and kernel launches within a
-stream are serialized. After the call to transfer data back to the host is
-executed, `cuStreamSynchronize` is used to halt CPU execution until all operations
-in the designated stream are finished.
-
-```{code-cell} python
-# Assert values are same after running kernel
-hZ = a * hX + hY
-if not np.allclose(hOut, hZ):
-   raise ValueError("Error outside tolerance for host-device vectors")
-```
-
-Perform verification of the data to ensure correctness and finish the code with
-memory clean up.
-
-```{code-cell} python
-checkCudaErrors(driver.cuStreamDestroy(stream))
-checkCudaErrors(driver.cuMemFree(dXclass))
-checkCudaErrors(driver.cuMemFree(dYclass))
-checkCudaErrors(driver.cuMemFree(dOutclass))
-checkCudaErrors(driver.cuModuleUnload(module))
-checkCudaErrors(driver.cuCtxDestroy(context))
-```
-
-## Performance
-
-Performance is a primary driver in targeting GPUs in your application. So, how
-does the above code compare to its C++ version? Table 1 shows that the results
-are nearly identical. [NVIDIA NSight
-Systems](https://developer.nvidia.com/nsight-systems) was used to retrieve
-kernel performance and [CUDA
-Events](https://developer.nvidia.com/blog/how-implement-performance-metrics-cuda-cc/)
-was used for application performance.
-
-The following command was used to profile the applications:
-
-```{code-block} shell
-nsys profile -s none -t cuda --stats=true <executable>
-```
-
-```{list-table} Kernel and application performance comparison.
-:header-rows: 1
-
-* -
-  - C++
-  - Python 
-* - Kernel execution
-  - 352µs
-  - 352µs
-* - Application execution
-  - 1076ms
-  - 1080ms
-```
-
-CUDA Python is also compatible with [NVIDIA Nsight
-Compute](https://developer.nvidia.com/nsight-compute), which is an
-interactive kernel profiler for CUDA applications. It allows you to have
-detailed insights into kernel performance. This is useful when you’re trying to
-maximize performance ({numref}`Figure 1`).
-
-```{figure} _static/images/Nsigth-Compute-CLI-625x473.png
-:name: Figure 1
-
-Screenshot of Nsight Compute CLI output of CUDA Python example.
-```
-
-## Future of CUDA Python
-
-The current bindings are built to match the C APIs as closely as possible.
-
-The next goal is to build a higher-level "object oriented" API on top of
-current CUDA Python bindings and provide an overall more Pythonic experience.
-One such example would be to raise exceptions on errors.
diff --git a/docs_src/source/release.md b/docs_src/source/release.md
deleted file mode 100644
index c3ae5a30..00000000
--- a/docs_src/source/release.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# Release Notes
-
-```{toctree}
----
-maxdepth: 3
----
-
-    12.6.1 <release/12.6.1-notes>
-    12.6.0 <release/12.6.0-notes>
-    12.5.0 <release/12.5.0-notes>
-    12.4.0 <release/12.4.0-notes>
-    12.3.0 <release/12.3.0-notes>
-    12.2.1 <release/12.2.1-notes>
-    12.2.0 <release/12.2.0-notes>
-    12.1.0 <release/12.1.0-notes>
-    12.0.0 <release/12.0.0-notes>
-    11.8.4 <release/11.8.4-notes>
-    11.8.3 <release/11.8.3-notes>
-    11.8.2 <release/11.8.2-notes>
-    11.8.1 <release/11.8.1-notes>
-    11.8.0 <release/11.8.0-notes>
-    11.7.1 <release/11.7.1-notes>
-    11.7.0 <release/11.7.0-notes>
-    11.6.1 <release/11.6.1-notes>
-    11.6.0 <release/11.6.0-notes>
-    11.5.0 <release/11.5.0-notes>
-    11.4.0 <release/11.4.0-notes>
-```
diff --git a/docs_src/source/release/11.4.0-notes.md b/docs_src/source/release/11.4.0-notes.md
deleted file mode 100644
index f7611688..00000000
--- a/docs_src/source/release/11.4.0-notes.md
+++ /dev/null
@@ -1,42 +0,0 @@
-# CUDA Python 11.4.0 Release notes
-
-Released on August 16, 2021
-
-## Hightlights
-- Initial EA release for CUDA Python
-- Supports all platforms that CUDA is supported
-- Supports all CUDA 11.x releases
-- Low-level CUDA Cython bindings and Python wrappers
-
-## Limitations
-
-- Source code release only; Python packages coming in a future release.
-
-### CUDA Functions Not Supported in this Release
-
-- cudaGetTextureReference
-- cudaGetSurfaceReference
-- cudaBindTexture
-- cudaBindTexture2D
-- cudaBindTextureToArray
-- cudaBindTextureToMipmappedArray
-- cudaLaunchKernel
-- cudaLaunchCooperativeKernel
-- cudaLaunchCooperativeKernelMultiDevice
-- cudaMemcpyToSymbol
-- cudaMemcpyFromSymbol
-- cudaMemcpyToSymbolAsync
-- cudaMemcpyFromSymbolAsync
-- cudaGetSymbolAddress
-- cudaGetSymbolSize
-- cudaUnbindTexture
-- cudaGetTextureAlignmentOffset
-- cudaBindSurfaceToArray
-- cudaGetFuncBySymbol
-- cudaSetValidDevices
-- cudaGraphExecMemcpyNodeSetParamsFromSymbol
-- cudaGraphExecMemcpyNodeSetParamsToSymbol
-- cudaGraphAddMemcpyNodeToSymbol
-- cudaGraphAddMemcpyNodeFromSymbol
-- cudaGraphMemcpyNodeSetParamsToSymbol
-- cudaGraphMemcpyNodeSetParamsFromSymbol
diff --git a/docs_src/source/release/11.5.0-notes.md b/docs_src/source/release/11.5.0-notes.md
deleted file mode 100644
index a7f8fddc..00000000
--- a/docs_src/source/release/11.5.0-notes.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# CUDA Python 11.5.0 Release notes
-
-Released on October 18, 2021
-
-## Hightlights
-- PyPi support
-- Conda support
-- GA release for CUDA Python
-- Supports all platforms that CUDA is supported
-- Supports all CUDA 11.x releases
-- Low-level CUDA Cython bindings and Python wrappers
-
-## Limitations
-
-- Changing default stream not supported; coming in future release
-
-### CUDA Functions Not Supported in this Release
-
-- cudaGetTextureReference
-- cudaGetSurfaceReference
-- cudaBindTexture
-- cudaBindTexture2D
-- cudaBindTextureToArray
-- cudaBindTextureToMipmappedArray
-- cudaLaunchKernel
-- cudaLaunchCooperativeKernel
-- cudaLaunchCooperativeKernelMultiDevice
-- cudaMemcpyToSymbol
-- cudaMemcpyFromSymbol
-- cudaMemcpyToSymbolAsync
-- cudaMemcpyFromSymbolAsync
-- cudaGetSymbolAddress
-- cudaGetSymbolSize
-- cudaUnbindTexture
-- cudaGetTextureAlignmentOffset
-- cudaBindSurfaceToArray
-- cudaGetFuncBySymbol
-- cudaSetValidDevices
-- cudaGraphExecMemcpyNodeSetParamsFromSymbol
-- cudaGraphExecMemcpyNodeSetParamsToSymbol
-- cudaGraphAddMemcpyNodeToSymbol
-- cudaGraphAddMemcpyNodeFromSymbol
-- cudaGraphMemcpyNodeSetParamsToSymbol
-- cudaGraphMemcpyNodeSetParamsFromSymbol
-- cudaProfilerInitialize
-- cudaProfilerStart
-- cudaProfilerStop
-- cuProfilerInitialize
-- cuProfilerStart
-- cuProfilerStop
-- EGL
-    - cuGraphicsEGLRegisterImage
-    - cuEGLStreamConsumerConnect
-    - cuEGLStreamConsumerConnectWithFlags
-    - cuEGLStreamConsumerDisconnect
-    - cuEGLStreamConsumerAcquireFrame
-    - cuEGLStreamConsumerReleaseFrame
-    - cuEGLStreamProducerConnect
-    - cuEGLStreamProducerDisconnect
-    - cuEGLStreamProducerPresentFrame
-    - cuEGLStreamProducerReturnFrame
-    - cuGraphicsResourceGetMappedEglFrame
-    - cuEventCreateFromEGLSync
-    - cudaGraphicsEGLRegisterImage
-    - cudaEGLStreamConsumerConnect
-    - cudaEGLStreamConsumerConnectWithFlags
-    - cudaEGLStreamConsumerDisconnect
-    - cudaEGLStreamConsumerAcquireFrame
-    - cudaEGLStreamConsumerReleaseFrame
-    - cudaEGLStreamProducerConnect
-    - cudaEGLStreamProducerDisconnect
-    - cudaEGLStreamProducerPresentFrame
-    - cudaEGLStreamProducerReturnFrame
-    - cudaGraphicsResourceGetMappedEglFrame
-    - cudaEventCreateFromEGLSync
-- GL
-    - cuGraphicsGLRegisterBuffer
-    - cuGraphicsGLRegisterImage
-    - cuWGLGetDevice
-    - cuGLGetDevices
-    - cuGLCtxCreate
-    - cuGLInit
-    - cuGLRegisterBufferObject
-    - cuGLMapBufferObject
-    - cuGLUnmapBufferObject
-    - cuGLUnregisterBufferObject
-    - cuGLSetBufferObjectMapFlags
-    - cuGLMapBufferObjectAsync
-    - cuGLUnmapBufferObjectAsync
-    - cudaGLGetDevices
-    - cudaGraphicsGLRegisterImage
-    - cudaGraphicsGLRegisterBuffer
-    - cudaWGLGetDevice
-    - cudaGLSetGLDevice
-    - cudaGLRegisterBufferObject
-    - cudaGLMapBufferObject
-    - cudaGLUnmapBufferObject
-    - cudaGLUnregisterBufferObject
-    - cudaGLSetBufferObjectMapFlags
-    - cudaGLMapBufferObjectAsync
-    - cudaGLUnmapBufferObjectAsync
-- VDPAU
-    - cuVDPAUGetDevice
-    - cuVDPAUCtxCreate
-    - cuGraphicsVDPAURegisterVideoSurface
-    - cuGraphicsVDPAURegisterOutputSurface
-    - cudaVDPAUGetDevice
-    - cudaVDPAUSetVDPAUDevice
-    - cudaGraphicsVDPAURegisterVideoSurface
-    - cudaGraphicsVDPAURegisterOutputSurface
diff --git a/docs_src/source/release/11.6.0-notes.md b/docs_src/source/release/11.6.0-notes.md
deleted file mode 100644
index 60a9d920..00000000
--- a/docs_src/source/release/11.6.0-notes.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# CUDA Python 11.6.0 Release notes
-
-Released on Januray 12, 2022
-
-## Hightlights
-- Support CUDA Toolkit 11.6
-- Support Profiler APIs
-- Support Graphic APIs (EGL, GL, VDPAU)
-- Support changing default stream
-- Relaxed primitive interoperability
-
-### Default stream
-
-Changing default stream to Per-Thread-Default-Stream (PTDS) is done through environment variable before execution:
-
-```{code-block} shell
-export CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM=1
-```
-
-When set to 1, the default stream is the per-thread default stream. When set to 0, the default stream is the legacy default stream. This defaults to 0, for the legacy default stream. See [Stream Synchronization Behavior](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) for an explanation of the legacy and per-thread default streams.
-
-### Primitive interoperability
-
-APIs accepting classes that wrap a primitive value are now interoperable with the underlining value.
-
-Example 1: Structure member handles interoperability.
-
-```{code-block} python
->>> waitParams = cuda.CUstreamMemOpWaitValueParams_st()
->>> waitParams.value64 = 1
->>> waitParams.value64
-<cuuint64_t 1>
->>> waitParams.value64 = cuda.cuuint64_t(2)
->>> waitParams.value64
-<cuuint64_t 2>
-```
-
-Example 2: Function signature handles interoperability.
-
-```{code-block} python
->>> cudart.cudaStreamQuery(cudart.cudaStreamNonBlocking)
-(<cudaError_t.cudaSuccess: 0>,)
->>> cudart.cudaStreamQuery(cudart.cudaStream_t(cudart.cudaStreamNonBlocking))
-(<cudaError_t.cudaSuccess: 0>,)
-```
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-
-```{note} Deprecated APIs are removed from tracking
-```
diff --git a/docs_src/source/release/11.6.1-notes.md b/docs_src/source/release/11.6.1-notes.md
deleted file mode 100644
index bc2ba329..00000000
--- a/docs_src/source/release/11.6.1-notes.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# CUDA Python 11.6.1 Release notes
-
-Released on March 18, 2022
-
-## Hightlights
-- Fix string decomposition for WSL library load
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.7.0-notes.md b/docs_src/source/release/11.7.0-notes.md
deleted file mode 100644
index 91ab215e..00000000
--- a/docs_src/source/release/11.7.0-notes.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# CUDA Python 11.7.0 Release notes
-
-Released on May 11, 2022
-
-## Hightlights
-- Support CUDA Toolkit 11.7
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.7.1-notes.md b/docs_src/source/release/11.7.1-notes.md
deleted file mode 100644
index 8d07b19d..00000000
--- a/docs_src/source/release/11.7.1-notes.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# CUDA Python 11.7.1 Release notes
-
-Released on June 29, 2022
-
-## Hightlights
-- Fix error propagation in CUDA Runtime bindings
-- Resolves [issue #22](https://github.com/NVIDIA/cuda-python/issues/22)
-
-## Limitations
-
-### Source builds
-
-CUDA Python no longer re-declares CUDA types, instead it uses the types from CUDA C headers. As such source builds now need to access to latest CTK headers. In particular:
-1. "$CUDA_HOME/include" has latest CTK headers
-2. CTK headers have all types defined
-
-(2) Certain CUDA types are not declared on mobile platforms and may face a "has not been declared" error during source builds. A temporary workaround is to use the headers found in [https://gitlab.com/nvidia/headers/cuda](https://gitlab.com/nvidia/headers/cuda). In particular CUDA Python needs the following headers and their dependencies:
-- cuda.h
-- cudaProfiler.h
-- driver_types.h
-- cuda_runtime.h
-- nvrtc.h
-
-This a short-term limitation and will be relaxed in a future release.
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.8.0-notes.md b/docs_src/source/release/11.8.0-notes.md
deleted file mode 100644
index f860e5fb..00000000
--- a/docs_src/source/release/11.8.0-notes.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# CUDA Python 11.8.0 Release notes
-
-Released on October 3, 2022
-
-## Hightlights
-- Support CUDA Toolkit 11.8
-- Source builds allow for missing types and APIs
-- Resolves source builds for mobile platforms
-- Resolves [issue #24](https://github.com/NVIDIA/cuda-python/issues/24)
-
-### Source Builds
-
-CUDA Python source builds now parse CUDA headers located in $CUDA_HOME directory, enabling/disabling types and APIs if defined. Therefore this removes the need for CTK headers to have all types defined. By allowing minor variations, previous [11.7.1 mobile platform workaround](https://nvidia.github.io/cuda-python/release/11.7.1-notes.html#source-builds) is no longer needed.
-
-It's still required that source builds use the latest CTK headers (i.e. “$CUDA_HOME/include” has latest CTK headers).
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.8.1-notes.md b/docs_src/source/release/11.8.1-notes.md
deleted file mode 100644
index 94565355..00000000
--- a/docs_src/source/release/11.8.1-notes.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# CUDA Python 11.8.1 Release notes
-
-Released on November 4, 2022
-
-## Hightlights
-- Resolves [issue #27](https://github.com/NVIDIA/cuda-python/issues/27)
-- Update install instructions to use latest CTK
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.8.2-notes.md b/docs_src/source/release/11.8.2-notes.md
deleted file mode 100644
index 84d781b5..00000000
--- a/docs_src/source/release/11.8.2-notes.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# CUDA Python 11.8.2 Release notes
-
-Released on May 18, 2023
-
-## Hightlights
-- Open libcuda.so.1 instead of libcuda.so
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.8.3-notes.md b/docs_src/source/release/11.8.3-notes.md
deleted file mode 100644
index 91bbc491..00000000
--- a/docs_src/source/release/11.8.3-notes.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# CUDA Python 11.8.3 Release notes
-
-Released on October 23, 2023
-
-## Hightlights
-- Compatability with Cython 3
-- New API cudart.getLocalRuntimeVersion()
-- Modernize build config
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/11.8.4-notes.md b/docs_src/source/release/11.8.4-notes.md
deleted file mode 100644
index 9cae2915..00000000
--- a/docs_src/source/release/11.8.4-notes.md
+++ /dev/null
@@ -1,32 +0,0 @@
-# CUDA Python 11.8.4 Release notes
-
-Released on October 7, 2024
-
-## Hightlights
-- Resolve [Issue #89](https://github.com/NVIDIA/cuda-python/issues/89): Fix getLocalRuntimeVersion searching for wrong libcudart version
-- Resolve [Issue #90](https://github.com/NVIDIA/cuda-python/issues/90): Use new layout in preperation for cuda-python becoming a metapackage
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/12.0.0-notes.md b/docs_src/source/release/12.0.0-notes.md
deleted file mode 100644
index df1bf1f9..00000000
--- a/docs_src/source/release/12.0.0-notes.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# CUDA Python 12.0.0 Release notes
-
-Released on December 8, 2022
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.0
-- Fix example from [MR28](https://github.com/NVIDIA/cuda-python/pull/28)
-- Apply [MR35](https://github.com/NVIDIA/cuda-python/pull/35)
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/12.1.0-notes.md b/docs_src/source/release/12.1.0-notes.md
deleted file mode 100644
index aec56999..00000000
--- a/docs_src/source/release/12.1.0-notes.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# CUDA Python 12.1.0 Release notes
-
-Released on February 28, 2023
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.1
-- Resolve [Issue #41](https://github.com/NVIDIA/cuda-python/issues/41): Add support for Python 3.11
-- Resolve [Issue #42](https://github.com/NVIDIA/cuda-python/issues/42): Dropping Python 3.7
-- Resolve [Issue #43](https://github.com/NVIDIA/cuda-python/issues/43): Trim Conda package dependencies
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/12.2.0-notes.md b/docs_src/source/release/12.2.0-notes.md
deleted file mode 100644
index d6bd6675..00000000
--- a/docs_src/source/release/12.2.0-notes.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# CUDA Python 12.2.0 Release notes
-
-Released on June 28, 2023
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.2
-- Resolve [Issue #44](https://github.com/NVIDIA/cuda-python/issues/44): nogil must be at the end of the function signature line
-- Resolve [Issue #45](https://github.com/NVIDIA/cuda-python/issues/45): Error with pyparsing when no CUDA is found
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/12.2.1-notes.md b/docs_src/source/release/12.2.1-notes.md
deleted file mode 100644
index 41704a56..00000000
--- a/docs_src/source/release/12.2.1-notes.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# CUDA Python 12.2.1 Release notes
-
-Released on January 8, 2024
-
-## Hightlights
-- Compatibility with Cython 3
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
diff --git a/docs_src/source/release/12.3.0-notes.md b/docs_src/source/release/12.3.0-notes.md
deleted file mode 100644
index 016ee0de..00000000
--- a/docs_src/source/release/12.3.0-notes.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# CUDA Python 12.3.0 Release notes
-
-Released on October 19, 2023
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.3
-- Resolve [Issue #16](https://github.com/NVIDIA/cuda-python/issues/16): cuda.cudart.cudaRuntimeGetVersion() hard-codes the runtime version, rather than querying the runtime
-    - New API cudart.getLocalRuntimeVersion()
-- Resolve [Issue #48](https://github.com/NVIDIA/cuda-python/issues/48): Dropping Python 3.8
-- Resolve [Issue #51](https://github.com/NVIDIA/cuda-python/issues/51): Dropping package releases for ppc64 on PYPI and conda-nvidia channel
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-- cudaFuncGetName
diff --git a/docs_src/source/release/12.4.0-notes.md b/docs_src/source/release/12.4.0-notes.md
deleted file mode 100644
index 6daedb20..00000000
--- a/docs_src/source/release/12.4.0-notes.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# CUDA Python 12.4.0 Release notes
-
-Released on March 5, 2024
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.4
-- Add PyPI/Conda support for Python 12
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-- cudaFuncGetName
-- cudaFuncGetParamInfo
diff --git a/docs_src/source/release/12.5.0-notes.md b/docs_src/source/release/12.5.0-notes.md
deleted file mode 100644
index 701f0320..00000000
--- a/docs_src/source/release/12.5.0-notes.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# CUDA Python 12.5.0 Release notes
-
-Released on May 21, 2024
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.5
-- Resolve [Issue #58](https://github.com/NVIDIA/cuda-python/issues/58): Interop between CUdeviceptr and Runtime
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-- cudaFuncGetName
-- cudaFuncGetParamInfo
diff --git a/docs_src/source/release/12.6.0-notes.md b/docs_src/source/release/12.6.0-notes.md
deleted file mode 100644
index 2531e89b..00000000
--- a/docs_src/source/release/12.6.0-notes.md
+++ /dev/null
@@ -1,36 +0,0 @@
-# CUDA Python 12.6.0 Release notes
-
-Released on August 1, 2024
-
-## Hightlights
-- Rebase to CUDA Toolkit 12.6
-- Resolve [Issue #32](https://github.com/NVIDIA/cuda-python/issues/32): Add 'pywin32' as Windows requirement
-- Resolve [Issue #72](https://github.com/NVIDIA/cuda-python/issues/72): Allow both lists and tuples as parameter
-- Resolve [Issue #73](https://github.com/NVIDIA/cuda-python/issues/73): Fix 'cuLibraryLoadData' processing of parameters
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-- cudaFuncGetName
-- cudaFuncGetParamInfo
diff --git a/docs_src/source/release/12.6.1-notes.md b/docs_src/source/release/12.6.1-notes.md
deleted file mode 100644
index bf196213..00000000
--- a/docs_src/source/release/12.6.1-notes.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# CUDA Python 12.6.1 Release notes
-
-Released on October 7, 2024
-
-## Hightlights
-- Resolve [Issue #90](https://github.com/NVIDIA/cuda-python/issues/90): Use new layout in preparation for cuda-python becoming a metapackage
-- Resolve [Issue #75](https://github.com/NVIDIA/cuda-python/issues/75): CUDA namespace cleanup
-
-## CUDA namespace cleanup with a new module layout
-
-[Issue #75](https://github.com/NVIDIA/cuda-python/issues/75) explains in detail what the new module layout is, what problem it fixes and how it impacts the users. However for the sake of completeness, this release notes will highlight key points of this change.
-
-Before this change, `cuda-python` was tightly coupled to CUDA Toolkit releases and all new features would inherit this coupling regardless of their applicability. As we develop new features, this coupling was becoming overly restrictive and motivated a new solution: Convert `cuda-python` into a metapackage where we use `cuda` as a namespace with existing bindings code moved to a `cuda_bindings` subpackage.
-
-This patch release applies the new module layout for the bindings as follows:
-- `cuda.cuda` -> `cuda.bindings.driver`
-- `cuda.ccuda` -> `cuda.bindings.cydriver`
-- `cuda.cudart` -> `cuda.bindings.runtime`
-- `cuda.ccudart` -> `cuda.bindings.cyruntime`
-- `cuda.nvrtc` -> `cuda.bindings.nvrtc`
-- `cuda.cnvrtc` -> `cuda.bindings.cynvrtc`
-
-Deprecation warnings are turned on as a notice to switch to the new module layout.
-
-```{note} This is non-breaking, backwards compatible change. All old module path will continue work as they "forward" user calls towards the new layout.
-```
-
-## Limitations
-
-### CUDA Functions Not Supported in this Release
-
-- Symbol APIs
-    - cudaGraphExecMemcpyNodeSetParamsFromSymbol
-    - cudaGraphExecMemcpyNodeSetParamsToSymbol
-    - cudaGraphAddMemcpyNodeToSymbol
-    - cudaGraphAddMemcpyNodeFromSymbol
-    - cudaGraphMemcpyNodeSetParamsToSymbol
-    - cudaGraphMemcpyNodeSetParamsFromSymbol
-    - cudaMemcpyToSymbol
-    - cudaMemcpyFromSymbol
-    - cudaMemcpyToSymbolAsync
-    - cudaMemcpyFromSymbolAsync
-    - cudaGetSymbolAddress
-    - cudaGetSymbolSize
-    - cudaGetFuncBySymbol
-- Launch Options
-    - cudaLaunchKernel
-    - cudaLaunchCooperativeKernel
-    - cudaLaunchCooperativeKernelMultiDevice
-- cudaSetValidDevices
-- cudaVDPAUSetVDPAUDevice
-- cudaFuncGetName
-- cudaFuncGetParamInfo