Skip to content

Added CI job with TSAN and free-threading #19

Added CI job with TSAN and free-threading

Added CI job with TSAN and free-threading #19

Workflow file for this run

name: CI - Free-threading and Thread Sanitizer (nightly)
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
on:
schedule:
- cron: "0 12 * * *" # Daily at 12:00 UTC
workflow_dispatch: # allows triggering the workflow run manually
pull_request: # Automatically trigger on pull requests affecting this file
# branches:
# - main
paths:
- '**/workflows/tsan.yaml'
jobs:
tsan:
runs-on: linux-x86-n2-64
container:
image: index.docker.io/library/ubuntu@sha256:b359f1067efa76f37863778f7b6d0e8d911e3ee8efa807ad01fbf5dc1ef9006b # ratchet:ubuntu:24.04
strategy:
fail-fast: false
defaults:
run:
shell: bash -l {0}
steps:
# Install git before actions/checkout as otherwise it will download the code with the GitHub
# REST API and therefore any subsequent git commands will fail.
- name: Install clang 18
env:
DEBIAN_FRONTEND: noninteractive
run: |
apt update
apt install -y clang-18 libstdc++-14-dev build-essential libssl-dev \
zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git \
libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \
libffi-dev liblzma-dev
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
path: jax
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
repository: python/cpython
path: cpython
ref: "3.13"
- name: Build CPython with TSAN enabled
run: |
cd cpython
mkdir ${GITHUB_WORKSPACE}/cpython-tsan
CC=clang-18 CXX=clang++-18 ./configure --prefix ${GITHUB_WORKSPACE}/cpython-tsan --disable-gil --with-thread-sanitizer
make -j64
make install
# Check whether free-threading mode is enabled
PYTHON_GIL=0 ${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 -c "import sys; assert not sys._is_gil_enabled()"
# Create archive to be used with bazel as hermetic python:
cd ${GITHUB_WORKSPACE} && tar -czpf python-tsan.tgz cpython-tsan
- name: Build and install JAX
run: |
cd jax
export PYTHON_SHA256=($(sha256sum ${GITHUB_WORKSPACE}/python-tsan.tgz))
echo "Python sha256: ${PYTHON_SHA256}"
${GITHUB_WORKSPACE}/cpython-tsan/bin/python3 build/build.py build --wheels=jaxlib \
--python_version=3.13-ft \
--bazel_options=--repo_env=HERMETIC_PYTHON_URL="file://${GITHUB_WORKSPACE}/python-tsan.tgz" \
--bazel_options=--repo_env=HERMETIC_PYTHON_SHA256=${PYTHON_SHA256} \
--bazel_options=--repo_env=HERMETIC_PYTHON_PREFIX="cpython-tsan/" \
--bazel_options=--color=yes \
--bazel_options=--copt=-fsanitize=thread \
--bazel_options=--linkopt="-fsanitize=thread" \
--bazel_options=--copt=-g \
--clang_path=/usr/bin/clang-18
- name: Run tests
timeout-minutes: 120
env:
JAX_NUM_GENERATED_CASES: 1
JAX_ENABLE_X64: true
JAX_SKIP_SLOW_TESTS: true
PY_COLORS: 1
run: |
cd jax
echo "JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES"
echo "JAX_ENABLE_X64=$JAX_ENABLE_X64"
echo "JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS"
# As we do not have yet free-threading support
# there will be the following warning:
# RuntimeWarning: The global interpreter lock (GIL) has been enabled to load module 'jaxlib.utils',
# which has not declared that it can run safely without the GIL.
# To avoid that we temporarily define PYTHON_GIL
export PYTHON_GIL=0
# Set symlink to the bazel executable
bazel_exec=($(ls bazel-*))
ln -s ${bazel_exec} bazel
# Create tsan suppressions file
cat << EOF > $PWD/.tsan_ignore
# false-positive caused because we haven't tsan-instrumented libgcc_s. Multiple threads
# are racing on a call to __register_frame_info(), but that function appears to be correctly locked internally.
race:llvm::RuntimeDyldELF::registerEHFrames
# https://github.com/python/cpython/issues/128050
race:partial_vectorcall_fallback
# https://github.com/python/cpython/issues/128100
race:ensure_nonmanaged_dict
# https://github.com/openxla/xla/issues/20686
race:dnnl_sgemm
# https://github.com/numpy/numpy/issues/28041
race:get_initial_from_ufunc
# https://github.com/numpy/numpy/issues/28042
race:PyArray_UpdateFlags
# https://github.com/python/cpython/issues/128130
race_top:run_eval_code_obj
race:dump_traceback
# https://github.com/numpy/numpy/issues/28045 not sure about this one
race:arraymethod_dealloc
# https://github.com/python/cpython/issues/128133
race:bytes_hash
# https://github.com/python/cpython/issues/128137
race:immortalize_interned
# https://github.com/python/cpython/issues/128144
race_top:PyMember_GetOne
# https://github.com/python/cpython/issues/128657
race:py_digest_by_name
EOF
./bazel test \
--repo_env=HERMETIC_PYTHON_VERSION=3.13-ft \
--//jax:build_jaxlib=false \
--repo_env=JAX_NUM_GENERATED_CASES=$JAX_NUM_GENERATED_CASES \
--repo_env=JAX_ENABLE_X64=$JAX_ENABLE_X64 \
--repo_env=JAX_SKIP_SLOW_TESTS=$JAX_SKIP_SLOW_TESTS \
--repo_env=PYTHON_GIL=$PYTHON_GIL \
--test_env=TSAN_OPTIONS=halt_on_error=1,suppressions=$PWD/.tsan_ignore \
--test_env=JAX_TEST_NUM_THREADS=8 \
--nocache_test_results \
--test_output=all \
//tests:cpu_tests