Remove deprecated flag xla_gpu_enable_triton_softmax_fusion.

References: https://github.com/NVIDIA/JAX-Toolbox/blob/9dd32f50257a405ae766aea2bcbcf51c217ed75c/rosetta/docs/GPU_performance.md?plain=1#L141 https://github.com/openxla/xla/blob/092b8dd65d0d961265665a27432795d302762ae6/xla/debug_options_flags.cc#L1706-L1708
NVIDIA · Jan 10, 2025 · cc4fb78 · cc4fb78
1 parent 9dd32f5
commit cc4fb78
Show file tree

Hide file tree

Showing 5 changed files with 1 addition and 5 deletions.
diff --git a/rosetta/rosetta/projects/diffusion/common/set_gpu_xla_flags.sh b/rosetta/rosetta/projects/diffusion/common/set_gpu_xla_flags.sh
@@ -1,2 +1,2 @@
 # These XLA flags are meant to be used with the JAX version in the imagen container
-export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=false --xla_gpu_enable_async_all_gather=false --xla_gpu_enable_async_reduce_scatter=false --xla_gpu_enable_triton_gemm=false --xla_gpu_cuda_graph_level=0 --xla_gpu_enable_triton_softmax_fusion=false --xla_gpu_enable_async_all_reduce=false ${XLA_FLAGS}"
+export XLA_FLAGS="--xla_gpu_enable_latency_hiding_scheduler=false --xla_gpu_enable_async_all_gather=false --xla_gpu_enable_async_reduce_scatter=false --xla_gpu_enable_triton_gemm=false --xla_gpu_cuda_graph_level=0 --xla_gpu_enable_async_all_reduce=false ${XLA_FLAGS}"
diff --git a/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env b/rosetta/rosetta/projects/maxtext/xla_flags/llama2-7b-1N8G.env
@@ -14,7 +14,6 @@ export XLA_FLAGS="\
     --xla_gpu_enable_pipelined_reduce_scatter=true \
     --xla_gpu_enable_pipelined_all_reduce=true \
     --xla_gpu_enable_while_loop_double_buffering=true \
-    --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_enable_all_gather_combine_by_dim=false \
     --xla_gpu_enable_reduce_scatter_combine_by_dim=false \
     --xla_disable_hlo_passes=rematerialization \

diff --git a/rosetta/rosetta/projects/pax/xla_flags/common.env b/rosetta/rosetta/projects/pax/xla_flags/common.env
@@ -4,7 +4,6 @@ export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_allow_excess_precision \
     --xla_gpu_enable_highest_priority_async_stream=true \
-    --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
     --xla_gpu_graph_level=0 \
     "

diff --git a/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env b/rosetta/rosetta/projects/pax/xla_flags/gpt-126m.env
@@ -4,7 +4,6 @@ export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_allow_excess_precision \
     --xla_gpu_enable_highest_priority_async_stream=true \
-    --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${THRESHOLD_BYTES} \
     --xla_gpu_graph_level=0 \
     --xla_gpu_enable_cudnn_fmha=false \

diff --git a/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env b/rosetta/rosetta/projects/pax/xla_flags/grok-proxy.env
@@ -6,7 +6,6 @@ export XLA_FLAGS="\
     --xla_gpu_enable_latency_hiding_scheduler=true \
     --xla_allow_excess_precision \
     --xla_gpu_enable_highest_priority_async_stream=true \
-    --xla_gpu_enable_triton_softmax_fusion=false \
     --xla_gpu_all_reduce_combine_threshold_bytes=${ALL_REDUCE_THRESHOLD_BYTES} \
     --xla_gpu_graph_level=0 \
     --xla_gpu_all_gather_combine_threshold_bytes=${ALL_GATHER_THRESHOLD_BYTES} \