Skip to content

Commit

Permalink
Merge branch 'main' into multi-gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
leofang authored Jan 2, 2025
2 parents fde06d5 + 527f137 commit b041f32
Show file tree
Hide file tree
Showing 10 changed files with 230 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
on: workflow_call
name: "CI: Build and test"

concurrency:
group: ${{ github.workflow }}-${{
github.ref_name == 'main' && format('ci-main-build-test-{0}', github.run_id) ||
format('ci-pr-build-test-on-{0}-against-branch-{1}', github.event_name, github.ref_name)
}}
cancel-in-progress: true

on:
push:
branches:
- "pull-request/[0-9]+"
- "main"

jobs:
build:
Expand Down Expand Up @@ -192,7 +205,7 @@ jobs:
runner: H100
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
# The build stage could fail but we want the CI to keep moving.
if: ${{ (github.repository_owner == 'nvidia') && always() }}
if: ${{ github.repository_owner == 'nvidia' && always() }}
permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
Expand All @@ -209,7 +222,7 @@ jobs:
needs:
- build
steps:
- name: Run nvidia-smi to make sure GPU is working
- name: Ensure GPU is working
shell: bash --noprofile --norc -xeuo pipefail {0}
run: nvidia-smi

Expand Down Expand Up @@ -319,3 +332,18 @@ jobs:
pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt"
pytest -rxXs tests/
popd
doc:
# The build stage could fail but we want the CI to keep moving.
if: ${{ github.repository_owner == 'nvidia' && always() }}
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
id-token: write
contents: write
needs:
- build
secrets: inherit
uses:
./.github/workflows/build-docs.yml
with:
build_ctk_ver: ${{ needs.build.outputs.BUILD_CTK_VER }}
129 changes: 129 additions & 0 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
name: "CI: Build and update docs"

on:
workflow_call:
inputs:
build_ctk_ver:
type: string
required: true

jobs:
build:
name: Build docs
# The build stage could fail but we want the CI to keep moving.
if: ${{ github.repository_owner == 'nvidia' && always() }}
# WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326,327)
runs-on: linux-amd64-gpu-t4-latest-1-testing
#runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
steps:
# WAR: Building the doc currently requires a GPU (NVIDIA/cuda-python#326,327)
- name: Ensure GPU is working
run: nvidia-smi

- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0

# TODO: cache conda env to speed up the workflow once conda-incubator/setup-miniconda#267
# is resolved

- name: Set up miniforge
uses: conda-incubator/setup-miniconda@v3
with:
activate-environment: cuda-python-docs
environment-file: ./cuda_python/docs/environment-docs.yml
miniforge-version: latest
conda-remove-defaults: "true"
python-version: 3.12

- name: Check conda env
run: |
conda info
conda list
conda config --show-sources
conda config --show
# WAR: Building the doc currently requires CTK installed (NVIDIA/cuda-python#326,327)
- name: Set up mini CTK
uses: ./.github/actions/fetch_ctk
continue-on-error: false
with:
host-platform: linux-64
cuda-version: ${{ inputs.build_ctk_ver }}

- name: Set environment variables
run: |
PYTHON_VERSION_FORMATTED="312" # see above
REPO_DIR=$(pwd)
# make outputs from the previous job as env vars
echo "CUDA_CORE_ARTIFACT_NAME=cuda-core-python${PYTHON_VERSION_FORMATTED}-linux-64-${{ github.sha }}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build_ctk_ver }}-linux-64-${{ github.sha }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
- name: Download cuda.bindings build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}

- name: Display structure of downloaded cuda.bindings artifacts
run: |
pwd
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
- name: Download cuda.core build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}

- name: Display structure of downloaded cuda.core build artifacts
run: |
pwd
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
- name: Install all packages
run: |
pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
pip install *.whl
popd
pushd "${CUDA_CORE_ARTIFACTS_DIR}"
pip install *.whl
popd
- name: Build all (latest) docs
id: build
run: |
pushd cuda_python/docs/
./build_all_docs.sh latest-only
ls -l build
popd
mkdir -p artifacts/docs
mv cuda_python/docs/build/html/* artifacts/docs/
# Note: currently this is only for manual inspection. This step will become
# required once we switch to use GHA for doc deployment (see the bottom).
- name: Upload doc artifacts
uses: actions/upload-pages-artifact@v3
with:
path: artifacts/
retention-days: 3

# The step below is not executed unless when building on main.
- name: Deploy doc update
if: ${{ github.ref_name == 'main' && success() }}
uses: JamesIves/github-pages-deploy-action@v4
with:
folder: artifacts/docs/
git-config-name: cuda-python-bot
target-folder: docs/
commit-message: "Deploy latest docs: ${{ github.sha }}"
clean: false
17 changes: 0 additions & 17 deletions .github/workflows/ci-gh.yml

This file was deleted.

15 changes: 14 additions & 1 deletion cuda_bindings/docs/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

set -ex

if [[ "$#" == "0" ]]; then
LATEST_ONLY="0"
elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then
LATEST_ONLY="1"
else
echo "usage: ./build_docs.sh [latest-only]"
exit 1
fi

# SPHINX_CUDA_BINDINGS_VER is used to create a subdir under build/html
# (the Makefile file for sphinx-build also honors it if defined).
# If there's a post release (ex: .post1) we don't want it to show up in the
Expand All @@ -28,7 +37,11 @@ cp ./versions.json build/html
cp source/_templates/main.html build/html/index.html

# ensure that the latest docs is the one we built
cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest
if [[ $LATEST_ONLY == "0" ]]; then
cp -r build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest
else
mv build/html/${SPHINX_CUDA_BINDINGS_VER} build/html/latest
fi

# ensure that the Sphinx reference uses the latest docs
cp build/html/latest/objects.inv build/html
10 changes: 9 additions & 1 deletion cuda_core/cuda/core/experimental/_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE

import ctypes
import warnings
import weakref
from contextlib import contextmanager
from dataclasses import dataclass
Expand Down Expand Up @@ -42,6 +43,12 @@ def _decide_nvjitlink_or_driver():
_nvjitlink = None

if _nvjitlink is None:
warnings.warn(
"nvJitLink is not installed or too old (<12.3). Therefore it is not usable "
"and the culink APIs will be used instead.",
stacklevel=3,
category=RuntimeWarning,
)
_driver = cuda
return True
else:
Expand Down Expand Up @@ -80,7 +87,8 @@ class LinkerOptions:
"""Customizable :obj:`Linker` options.
Since the linker would choose to use nvJitLink or the driver APIs as the linking backed,
not all options are applicable.
not all options are applicable. When the system's installed nvJitLink is too old (<12.3),
or not installed, the driver APIs (cuLink) will be used instead.
Attributes
----------
Expand Down
15 changes: 14 additions & 1 deletion cuda_core/docs/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

set -ex

if [[ "$#" == "0" ]]; then
LATEST_ONLY="0"
elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then
LATEST_ONLY="1"
else
echo "usage: ./build_docs.sh [latest-only]"
exit 1
fi

# SPHINX_CUDA_CORE_VER is used to create a subdir under build/html
# (the Makefile file for sphinx-build also honors it if defined)
if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then
Expand All @@ -24,7 +33,11 @@ cp ./versions.json build/html
cp source/_templates/main.html build/html/index.html

# ensure that the latest docs is the one we built
cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest
if [[ $LATEST_ONLY == "0" ]]; then
cp -r build/html/${SPHINX_CUDA_CORE_VER} build/html/latest
else
mv build/html/${SPHINX_CUDA_CORE_VER} build/html/latest
fi

# ensure that the Sphinx reference uses the latest docs
cp build/html/latest/objects.inv build/html
Expand Down
15 changes: 15 additions & 0 deletions cuda_core/docs/source/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@ and likewise use `[cu11]` for CUDA 11.
Note that using `cuda.core` with NVRTC or nvJitLink installed from PyPI via `pip install` is currently
not supported. This will be fixed in a future release.

## Installing from Conda (conda-forge)

Same as above, `cuda.core` can be installed in a CUDA 11 or 12 environment. For example with CUDA 12:
```console
$ conda install -c conda-forge cuda-core cuda-version=12
```
and likewise use `cuda-version=11` for CUDA 11.

Note that to use `cuda.core` with nvJitLink installed from conda-forge currently requires it to
be separately installed:
```console
$ conda install -c conda-forge libnvjitlink
```
(can be combined with the command above). This extra step will be removed in a future release.

## Installing from Source

```console
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/release/0.1.1-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Released on Dec 20, 2024
- Add a `cuda.core.experimental.system` module for querying system- or process- wide information.
- Add `LaunchConfig.cluster` to support thread block clusters on Hopper GPUs.

## Enchancements
## Enhancements

- The internal handle held by `ObjectCode` is now lazily initialized upon first touch.
- Support TCC devices with a default synchronous memory resource to avoid the use of memory pools.
Expand Down
6 changes: 3 additions & 3 deletions cuda_python/docs/build_all_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ set -ex

# build cuda-python docs
rm -rf build
./build_docs.sh
./build_docs.sh $@

# build cuda-bindings docs
CUDA_BINDINGS_PATH=build/html/cuda-bindings
mkdir -p $CUDA_BINDINGS_PATH
pushd .
cd ../../cuda_bindings/docs
rm -rf build
./build_docs.sh
./build_docs.sh $@
cp -r build/html/* "$(dirs -l +1)"/$CUDA_BINDINGS_PATH
popd

Expand All @@ -22,6 +22,6 @@ mkdir -p $CUDA_CORE_PATH
pushd .
cd ../../cuda_core/docs
rm -rf build
./build_docs.sh
./build_docs.sh $@
cp -r build/html/* "$(dirs -l +1)"/$CUDA_CORE_PATH
popd
15 changes: 14 additions & 1 deletion cuda_python/docs/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

set -ex

if [[ "$#" == "0" ]]; then
LATEST_ONLY="0"
elif [[ "$#" == "1" && "$1" == "latest-only" ]]; then
LATEST_ONLY="1"
else
echo "usage: ./build_docs.sh [latest-only]"
exit 1
fi

# SPHINX_CUDA_PYTHON_VER is used to create a subdir under build/html
# (the Makefile file for sphinx-build also honors it if defined).
# If there's a post release (ex: .post1) we don't want it to show up in the
Expand All @@ -28,7 +37,11 @@ cp ./versions.json build/html
cp source/_templates/main.html build/html/index.html

# ensure that the latest docs is the one we built
cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest
if [[ $LATEST_ONLY == "0" ]]; then
cp -r build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest
else
mv build/html/${SPHINX_CUDA_PYTHON_VER} build/html/latest
fi

# ensure that the Sphinx reference uses the latest docs
cp build/html/latest/objects.inv build/html
Expand Down

0 comments on commit b041f32

Please sign in to comment.