From 7561726b3314f713fffefe52210582f60a1de44c Mon Sep 17 00:00:00 2001 From: rudderbucky <38639307+rudderbucky@users.noreply.github.com> Date: Mon, 11 Nov 2024 08:09:52 -0800 Subject: [PATCH 1/5] Update writer.rs --- naga/src/back/msl/writer.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 83d937d486..19e56a8a6e 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -803,14 +803,21 @@ impl Writer { } self.loop_reachable_macro_name = self.namer.call("LOOP_IS_REACHABLE"); - let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); - writeln!( - self.out, - "#define {} if (volatile bool {} = true; {})", - self.loop_reachable_macro_name, - loop_reachable_volatile_name, - loop_reachable_volatile_name, - )?; + #[cfg(target_arch = "wasm32")] + { + let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); + writeln!( + self.out, + "#define {} if (volatile bool {} = true; {})", + self.loop_reachable_macro_name, + loop_reachable_volatile_name, + loop_reachable_volatile_name, + )?; + } + #[cfg(not(target_arch = "wasm32"))] + { + writeln!(self.out, "#define {}", self.loop_reachable_macro_name,)?; + } Ok(()) } From 2bea408af76bcc6e4a0dbb38180978763cc0687a Mon Sep 17 00:00:00 2001 From: rudderbucky <38639307+rudderbucky@users.noreply.github.com> Date: Mon, 11 Nov 2024 18:55:53 -0800 Subject: [PATCH 2/5] Revert "Update writer.rs" This reverts commit 7561726b3314f713fffefe52210582f60a1de44c. --- naga/src/back/msl/writer.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 19e56a8a6e..83d937d486 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -803,21 +803,14 @@ impl Writer { } self.loop_reachable_macro_name = self.namer.call("LOOP_IS_REACHABLE"); - #[cfg(target_arch = "wasm32")] - { - let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); - writeln!( - self.out, - "#define {} if (volatile bool {} = true; {})", - self.loop_reachable_macro_name, - loop_reachable_volatile_name, - loop_reachable_volatile_name, - )?; - } - #[cfg(not(target_arch = "wasm32"))] - { - writeln!(self.out, "#define {}", self.loop_reachable_macro_name,)?; - } + let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); + writeln!( + self.out, + "#define {} if (volatile bool {} = true; {})", + self.loop_reachable_macro_name, + loop_reachable_volatile_name, + loop_reachable_volatile_name, + )?; Ok(()) } From ac60902244f3287a47bba4bf5da1a8233153c40a Mon Sep 17 00:00:00 2001 From: rudderbucky <38639307+rudderbucky@users.noreply.github.com> Date: Tue, 12 Nov 2024 18:39:14 -0800 Subject: [PATCH 3/5] Only emit loop macro for checked index --- naga/src/back/msl/writer.rs | 5 ++++- naga/src/proc/index.rs | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 83d937d486..19b0263b30 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -3028,7 +3028,10 @@ impl Writer { ref continuing, break_if, } => { - self.emit_loop_reachable_macro()?; + // We only emit the macro if the index policy is not checked. + if context.expression.policies.index != index::BoundsCheckPolicy::Unchecked { + self.emit_loop_reachable_macro()?; + } if !continuing.is_empty() || break_if.is_some() { let gate_name = self.namer.call("loop_init"); writeln!(self.out, "{level}bool {gate_name} = true;")?; diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs index d0a7f73e2a..828f1f2984 100644 --- a/naga/src/proc/index.rs +++ b/naga/src/proc/index.rs @@ -67,6 +67,7 @@ pub enum BoundsCheckPolicy { pub struct BoundsCheckPolicies { /// How should the generated code handle array, vector, or matrix indices /// that are out of range? + /// On Metal, this policy also dictates how loops are checked for UB. #[cfg_attr(feature = "deserialize", serde(default))] pub index: BoundsCheckPolicy, From ae33c5b0e9b1d7d81d8e2f57aaf89ece4dfc0f5a Mon Sep 17 00:00:00 2001 From: rudderbucky Date: Wed, 13 Nov 2024 17:21:18 -0800 Subject: [PATCH 4/5] Update naga/src/proc/index.rs Co-authored-by: Erich Gubler --- naga/src/proc/index.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs index 828f1f2984..f6a78db94c 100644 --- a/naga/src/proc/index.rs +++ b/naga/src/proc/index.rs @@ -67,6 +67,7 @@ pub enum BoundsCheckPolicy { pub struct BoundsCheckPolicies { /// How should the generated code handle array, vector, or matrix indices /// that are out of range? + /// /// On Metal, this policy also dictates how loops are checked for UB. #[cfg_attr(feature = "deserialize", serde(default))] pub index: BoundsCheckPolicy, From 7bad686bef4806c68dba9bda514fbd68be5ec681 Mon Sep 17 00:00:00 2001 From: rudderbucky <38639307+rudderbucky@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:15:22 -0800 Subject: [PATCH 5/5] Surface loop checking at PipelineCompilationOptions --- deno_webgpu/pipeline.rs | 3 +++ naga/src/back/msl/mod.rs | 3 +++ naga/src/back/msl/writer.rs | 8 ++++++-- naga/src/proc/index.rs | 2 -- wgpu-core/src/device/global.rs | 3 +++ wgpu-core/src/device/resource.rs | 3 +++ wgpu-core/src/indirect_validation.rs | 1 + wgpu-core/src/pipeline.rs | 6 ++++++ wgpu-hal/examples/halmark/main.rs | 2 ++ wgpu-hal/examples/ray-traced-triangle/main.rs | 1 + wgpu-hal/src/dynamic/mod.rs | 1 + wgpu-hal/src/lib.rs | 4 ++++ wgpu-hal/src/metal/device.rs | 1 + wgpu/src/api/common_pipeline.rs | 4 ++++ wgpu/src/backend/wgpu_core.rs | 6 ++++++ 15 files changed, 44 insertions(+), 4 deletions(-) diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs index 0ab3c40262..34ca29c0ea 100644 --- a/deno_webgpu/pipeline.rs +++ b/deno_webgpu/pipeline.rs @@ -112,6 +112,7 @@ pub fn op_webgpu_create_compute_pipeline( entry_point: compute.entry_point.map(Cow::from), constants: Cow::Owned(compute.constants.unwrap_or_default()), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, cache: None, }; @@ -344,6 +345,7 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(fragment.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, targets: Cow::Owned(fragment.targets), }) @@ -369,6 +371,7 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(args.vertex.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, buffers: Cow::Owned(vertex_buffers), }, diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index fbeaa4cc8d..a070b19943 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -211,6 +211,8 @@ pub struct Options { pub bounds_check_policies: index::BoundsCheckPolicies, /// Should workgroup variables be zero initialized (by polyfilling)? pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out. + pub enable_loop_ub_checking: bool, } impl Default for Options { @@ -223,6 +225,7 @@ impl Default for Options { fake_missing_bindings: true, bounds_check_policies: index::BoundsCheckPolicies::default(), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, } } } diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 19b0263b30..65d20a89a1 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -600,6 +600,9 @@ struct ExpressionContext<'a> { /// accesses. These may need to be cached in temporary variables. See /// `index::find_checked_indexes` for details. guarded_indices: HandleSet, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl<'a> ExpressionContext<'a> { @@ -3028,8 +3031,7 @@ impl Writer { ref continuing, break_if, } => { - // We only emit the macro if the index policy is not checked. - if context.expression.policies.index != index::BoundsCheckPolicy::Unchecked { + if context.expression.enable_loop_ub_checking { self.emit_loop_reachable_macro()?; } if !continuing.is_empty() || break_if.is_some() { @@ -4868,6 +4870,7 @@ template module, mod_info, pipeline_options, + enable_loop_ub_checking: options.enable_loop_ub_checking, }, result_struct: None, }; @@ -5768,6 +5771,7 @@ template module, mod_info, pipeline_options, + enable_loop_ub_checking: options.enable_loop_ub_checking, }, result_struct: Some(&stage_out_name), }; diff --git a/naga/src/proc/index.rs b/naga/src/proc/index.rs index f6a78db94c..d0a7f73e2a 100644 --- a/naga/src/proc/index.rs +++ b/naga/src/proc/index.rs @@ -67,8 +67,6 @@ pub enum BoundsCheckPolicy { pub struct BoundsCheckPolicies { /// How should the generated code handle array, vector, or matrix indices /// that are out of range? - /// - /// On Metal, this policy also dictates how loops are checked for UB. #[cfg_attr(feature = "deserialize", serde(default))] pub index: BoundsCheckPolicy, diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index b6ad2354c3..d87bd1bc8d 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1266,6 +1266,7 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking, }; ResolvedVertexState { stage, @@ -1294,6 +1295,7 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.vertex.stage.enable_loop_ub_checking, }; Some(ResolvedFragmentState { stage, @@ -1492,6 +1494,7 @@ impl Global { entry_point: desc.stage.entry_point.clone(), constants: desc.stage.constants.clone(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.stage.enable_loop_ub_checking, }; let desc = ResolvedComputePipelineDescriptor { diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index afbf73bc03..ac1ddaf4d3 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2829,6 +2829,7 @@ impl Device { entry_point: final_entry_point_name.as_ref(), constants: desc.stage.constants.as_ref(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.stage.enable_loop_ub_checking, }, cache: cache.as_ref().map(|it| it.raw()), }; @@ -3250,6 +3251,7 @@ impl Device { entry_point: &vertex_entry_point_name, constants: stage_desc.constants.as_ref(), zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, + enable_loop_ub_checking: stage_desc.enable_loop_ub_checking, } }; @@ -3306,6 +3308,7 @@ impl Device { zero_initialize_workgroup_memory: fragment_state .stage .zero_initialize_workgroup_memory, + enable_loop_ub_checking: fragment_state.stage.enable_loop_ub_checking, }) } None => None, diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 35a95f8bbf..5976ea7f80 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -204,6 +204,7 @@ impl IndirectValidation { entry_point: "main", constants: &Default::default(), zero_initialize_workgroup_memory: false, + enable_loop_ub_checking: true, }, cache: None, }; diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index 01ceabf669..5d2f3a8434 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -145,6 +145,9 @@ pub struct ProgrammableStageDescriptor<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } /// Describes a programmable pipeline stage. @@ -172,6 +175,9 @@ pub struct ResolvedProgrammableStageDescriptor<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } /// Number of implicit bind groups derived at pipeline creation. diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 8ab7f1cb47..010c74aa8e 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -259,6 +259,7 @@ impl Example { entry_point: "vs_main", constants: &constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, vertex_buffers: &[], fragment_stage: Some(hal::ProgrammableStage { @@ -266,6 +267,7 @@ impl Example { entry_point: "fs_main", constants: &constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }), primitive: wgt::PrimitiveState { topology: wgt::PrimitiveTopology::TriangleStrip, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 4eedfe7817..de4e5a9b41 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -400,6 +400,7 @@ impl Example { entry_point: "main", constants: &Default::default(), zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, }, cache: None, }) diff --git a/wgpu-hal/src/dynamic/mod.rs b/wgpu-hal/src/dynamic/mod.rs index 5509d7cce6..336ca3de5a 100644 --- a/wgpu-hal/src/dynamic/mod.rs +++ b/wgpu-hal/src/dynamic/mod.rs @@ -146,6 +146,7 @@ impl<'a> ProgrammableStage<'a, dyn DynShaderModule> { entry_point: self.entry_point, constants: self.constants, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, + enable_loop_ub_checking: self.enable_loop_ub_checking, } } } diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 0cddb69976..1daba70194 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -2138,6 +2138,9 @@ pub struct ProgrammableStage<'a, M: DynShaderModule + ?Sized> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl Clone for ProgrammableStage<'_, M> { @@ -2147,6 +2150,7 @@ impl Clone for ProgrammableStage<'_, M> { entry_point: self.entry_point, constants: self.constants, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, + enable_loop_ub_checking: self.enable_loop_ub_checking, } } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 4cc8ef0eb0..fc67044455 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -150,6 +150,7 @@ impl super::Device { binding_array: naga::proc::BoundsCheckPolicy::Unchecked, }, zero_initialize_workgroup_memory: stage.zero_initialize_workgroup_memory, + enable_loop_ub_checking: stage.enable_loop_ub_checking, }; let pipeline_options = naga::back::msl::PipelineOptions { diff --git a/wgpu/src/api/common_pipeline.rs b/wgpu/src/api/common_pipeline.rs index 697507bca2..3187ecb5a4 100644 --- a/wgpu/src/api/common_pipeline.rs +++ b/wgpu/src/api/common_pipeline.rs @@ -20,6 +20,9 @@ pub struct PipelineCompilationOptions<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, + /// Specifies whether shader loops are forcibly prevented from being optimized out, which may lead + /// to UB on Metal. Loop checking may have significant overhead. + pub enable_loop_ub_checking: bool, } impl<'a> Default for PipelineCompilationOptions<'a> { @@ -33,6 +36,7 @@ impl<'a> Default for PipelineCompilationOptions<'a> { Self { constants, zero_initialize_workgroup_memory: true, + enable_loop_ub_checking: true, } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index befec4bd78..174614f2bf 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1092,6 +1092,10 @@ impl crate::Context for ContextWgpuCore { .vertex .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc + .vertex + .compilation_options + .enable_loop_ub_checking, }, buffers: Borrowed(&vertex_buffers), }, @@ -1106,6 +1110,7 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: frag .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: frag.compilation_options.enable_loop_ub_checking, }, targets: Borrowed(frag.targets), }), @@ -1150,6 +1155,7 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: desc .compilation_options .zero_initialize_workgroup_memory, + enable_loop_ub_checking: desc.compilation_options.enable_loop_ub_checking, }, cache: desc.cache.map(downcast_pipeline_cache).copied(), };