Added CI job with TSAN and free-threading #20
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI - Free-threading and Thread Sanitizer (nightly) | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
on: | |
schedule: | |
- cron: "0 12 * * *" # Daily at 12:00 UTC | |
workflow_dispatch: # allows triggering the workflow run manually | |
pull_request: # Automatically trigger on pull requests affecting this file | |
# branches: | |
# - main | |
paths: | |
- '**/workflows/tsan.yaml' | |
jobs: | |
tsan: | |
runs-on: linux-x86-n2-64 | |
container: | |
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04 | |
strategy: | |
fail-fast: false | |
defaults: | |
run: | |
shell: bash -l {0} | |
steps: | |
# Install git before actions/checkout as otherwise it will download the code with the GitHub | |
# REST API and therefore any subsequent git commands will fail. | |
- name: Install clang 18 | |
env: | |
DEBIAN_FRONTEND: noninteractive | |
run: | | |
apt update | |
apt install -y clang-18 libstdc++-14-dev build-essential libssl-dev \ | |
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \ | |
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ | |
libffi-dev liblzma-dev | |
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
path: jax | |
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 | |
with: | |
repository: python/cpython | |
path: cpython | |
ref: "3.13" | |
- name: Build CPython with TSAN enabled | |
run: | | |
cd cpython | |
mkdir ${GITHUB_WORKSPACE}/cpython-tsan | |
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer | |
make -j64 | |
make install | |
# Check whether free-threading mode is enabled | |
PYTHON_GIL=0 ${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 -c "import sys; assert not sys._is_gil_enabled()" | |
# Create archive to be used with bazel as hermetic python: | |
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan | |
- name: Build and install JAX | |
run: | | |
cd jax | |
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz)) | |
echo "Python sha256: ${PYTHON_SHA256}" | |
${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 build/build.py build --wheels=jaxlib \ | |
--python_version=3.13-ft \ | |
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \ | |
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \ | |
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \ | |
--bazel_options=--color=yes \ | |
--bazel_options=--copt=-fsanitize=thread \ | |
--bazel_options=--linkopt="-fsanitize=thread" \ | |
--bazel_options=--copt=-g \ | |
--clang_path=/usr/bin/clang-18 | |
- name: Run tests | |
timeout-minutes: 120 | |
env: | |
JAX_NUM_GENERATED_CASES: 1 | |
JAX_ENABLE_X64: true | |
JAX_SKIP_SLOW_TESTS: true | |
PY_COLORS: 1 | |
run: | | |
cd jax | |
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES" | |
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64" | |
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS" | |
# As we do not have yet free-threading support | |
# there will be the following warning: | |
# RuntimeWarning: The global interpreter lock (GIL) has been enabled to load module 'jaxlib.utils', | |
# which has not declared that it can run safely without the GIL. | |
# To avoid that we temporarily define PYTHON_GIL | |
export PYTHON_GIL=0 | |
# Set symlink to the bazel executable | |
bazel_exec=($(ls bazel-*)) | |
ln -s ${bazel_exec} bazel | |
# Create tsan suppressions file | |
cat << EOF > $PWD/.tsan_ignore | |
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads | |
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally. | |
race:llvm::RuntimeDyldELF::registerEHFrames | |
# https://github.com/python/cpython/issues/128050 | |
race:partial_vectorcall_fallback | |
# https://github.com/python/cpython/issues/128100 | |
race:ensure_nonmanaged_dict | |
# https://github.com/openxla/xla/issues/20686 | |
race:dnnl_sgemm | |
# https://github.com/numpy/numpy/issues/28041 | |
race:get_initial_from_ufunc | |
# https://github.com/numpy/numpy/issues/28042 | |
race:PyArray_UpdateFlags | |
# https://github.com/python/cpython/issues/128130 | |
race_top:run_eval_code_obj | |
race:dump_traceback | |
# https://github.com/numpy/numpy/issues/28045 not sure about this one | |
race:arraymethod_dealloc | |
# https://github.com/python/cpython/issues/128133 | |
race:bytes_hash | |
# https://github.com/python/cpython/issues/128137 | |
race:immortalize_interned | |
# https://github.com/python/cpython/issues/128144 | |
race_top:PyMember_GetOne | |
# https://github.com/python/cpython/issues/128657 | |
race:py_digest_by_name | |
EOF | |
./bazel test \ | |
--repo_env=HERMETIC_PYTHON_VERSION=3.13-ft \ | |
--//jax:build_jaxlib=false \ | |
--repo_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \ | |
--repo_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \ | |
--repo_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \ | |
--repo_env=PYTHON_GIL=$PYTHON_GIL \ | |
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.tsan_ignore \ | |
--test_env=JAX_TEST_NUM_THREADS=8 \ | |
--nocache_test_results \ | |
--test_output=all \ | |
//tests:cpu_tests |