From e1b63f3fbb1017d69979d64914beaed0f91a6825 Mon Sep 17 00:00:00 2001 From: Yang Gu Date: Wed, 3 Jun 2026 10:14:58 +0800 Subject: [PATCH] Skip quadBroadcast/quadSwap split tests when subgroup size < 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the implementation selects a subgroup size < 8 for the test's workgroup, the split predicate `id < subgroupSize / 2` bisects the only quad in the subgroup, leaving no fully active quad — which is undefined behavior for quad operations. This is observed on WARP (which selects its native D3D12 wave size: 4 on arm64 NEON, often 4 on x86 for small workgroups) and may occur on any implementation that picks a small native subgroup size at runtime. Two coordinated guards: * In the shader, the quad call is wrapped in `if subgroupSize >= 8u { ... }` so it never executes when the split predicate would be unsafe. * In the JS checker, the actual subgroupSize is read out of metadata.subgroup_size[0] and the test is skipped with t.skip when it is < 8, so the missing output doesn't get flagged as a failure. Querying GPUAdapterInfo.subgroupMinSize would not be sufficient: the size the implementation actually selects depends on the shader (its workgroup size, register pressure, etc.), not just the adapter's minimum supported size. Reading subgroupSize from inside the test shader itself is the only reliable signal. --- .../call/builtin/quadBroadcast.spec.ts | 26 ++++++++++++++++--- .../expression/call/builtin/quadSwap.spec.ts | 26 ++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts index 4d556618f624..8556a3d3ec42 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts @@ -326,6 +326,13 @@ predication filters are skipped. const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; const testcase = kPredicateCases[t.params.predicate]; + // Quad operations require a fully active quad. If the implementation + // selects a subgroup size < 8 for this workgroup, the split predicate + // (`id < subgroupSize / 2`) bisects the only quad, leaving no fully + // active quad (undefined behavior). The shader reads subgroupSize and + // skips the quad call when it would be unsafe; the checker observes + // the actual selected size in metadata and skips the test entirely so + // it doesn't get flagged as a regression. const wgsl = ` enable subgroups; @@ -363,9 +370,13 @@ fn main( metadata.id[lid] = id; metadata.subgroup_size[lid] = subgroupSize; - if ${testcase.cond} { - let b = quadBroadcast(lid, ${t.params.id}); - output.results[lid] = b; + // Only run the quad op when a (subgroupSize / 2) split predicate is + // guaranteed to keep every quad fully active. See checker for skip. + if subgroupSize >= 8u { + if ${testcase.cond} { + let b = quadBroadcast(lid, ${t.params.id}); + output.results[lid] = b; + } } }`; @@ -377,6 +388,15 @@ fn main( uintsPerOutput, new Uint32Array([0]), // unused (metadata: Uint32Array, output: Uint32Array) => { + const bound = Math.floor(output.length / 2); + // metadata layout: [id, ..., subgroup_size, ...]. The first entry + // of the second half is the subgroupSize of invocation 0. + if (metadata[bound] < 8) { + t.skip( + `Implementation selected subgroup size ${metadata[bound]}; a split ` + + `predicate would leave no fully active quad (undefined behavior).` + ); + } return checkBroadcastCompute(metadata, output, t.params.id, testcase.filter); } ); diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts index 08f1b75cfb84..93983d196270 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts @@ -345,6 +345,13 @@ predication filters are skipped. const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; const testcase = kPredicateCases[t.params.predicate]; + // Quad operations require a fully active quad. If the implementation + // selects a subgroup size < 8 for this workgroup, the split predicate + // (`id < subgroupSize / 2`) bisects the only quad, leaving no fully + // active quad (undefined behavior). The shader reads subgroupSize and + // skips the quad call when it would be unsafe; the checker observes + // the actual selected size in metadata and skips the test entirely so + // it doesn't get flagged as a regression. const wgsl = ` enable subgroups; @@ -382,9 +389,13 @@ fn main( metadata.id[lid] = id; metadata.subgroup_size[lid] = subgroupSize; - if ${testcase.cond} { - let b = ${t.params.op}(lid); - output.results[lid] = b; + // Only run the quad op when a (subgroupSize / 2) split predicate is + // guaranteed to keep every quad fully active. See checker for skip. + if subgroupSize >= 8u { + if ${testcase.cond} { + let b = ${t.params.op}(lid); + output.results[lid] = b; + } } }`; @@ -396,6 +407,15 @@ fn main( uintsPerOutput, new Uint32Array([0]), // unused (metadata: Uint32Array, output: Uint32Array) => { + const bound = Math.floor(output.length / 2); + // metadata layout: [id, ..., subgroup_size, ...]. The first entry + // of the second half is the subgroupSize of invocation 0. + if (metadata[bound] < 8) { + t.skip( + `Implementation selected subgroup size ${metadata[bound]}; a split ` + + `predicate would leave no fully active quad (undefined behavior).` + ); + } return checkSwapCompute(metadata, output, t.params.op, testcase.filter); } );