Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions test/WaveOps/WaveActiveBitXor.convergence.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#--- source.hlsl
StructuredBuffer<uint> In : register(t0);

RWStructuredBuffer<uint> Out1 : register(u1); // branch A
RWStructuredBuffer<uint> Out2 : register(u2); // branch B
RWStructuredBuffer<uint> Out3 : register(u3); // reconverged
RWStructuredBuffer<uint> Out4 : register(u4); // loop
RWStructuredBuffer<uint> Out5 : register(u5); // divergent loop

[numthreads(4,1,1)]
void main(uint3 TID : SV_GroupThreadID) {
uint V = In[TID.x];

// divergent branch
if (TID.x < 2)
Out1[TID.x] = WaveActiveBitXor(V);
else
Out2[TID.x] = WaveActiveBitXor(V);

// reconverged wave op
Out3[TID.x] = WaveActiveBitXor(V);

// loop case
uint R = V;
for (uint i = 0; i < 2; i++)
R = WaveActiveBitXor(R);

Out4[TID.x] = R;

// divergent loop: each thread iterates TID.x times
// thread 0: 0 iters, thread 1: 1 iter, thread 2: 2 iters, thread 3: 3 iters
uint R2 = V;
for (uint j = 0; j < TID.x; j++)
R2 = WaveActiveBitXor(R2);

Out5[TID.x] = R2;
}

#--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]

Buffers:

- Name: In
Format: UInt32
Stride: 4
Data: [ 0x11, 0x12, 0x14, 0x18 ]
- Name: Out1
Format: UInt32
Stride: 4
FillSize: 16
- Name: Out2
Format: UInt32
Stride: 4
FillSize: 16
- Name: Out3
Format: UInt32
Stride: 4
FillSize: 16
- Name: Out4
Format: UInt32
Stride: 4
FillSize: 16
- Name: Out5
Format: UInt32
Stride: 4
FillSize: 16
- Name: ExpectedOut1
Format: UInt32
Stride: 4
Data: [ 0x3, 0x3, 0x0, 0x0 ]
- Name: ExpectedOut2
Format: UInt32
Stride: 4
Data: [ 0x0, 0x0, 0xc, 0xc ]
- Name: ExpectedOut3
Format: UInt32
Stride: 4
Data: [ 0xf, 0xf, 0xf, 0xf ]
- Name: ExpectedOut4
Format: UInt32
Stride: 4
Data: [ 0x0, 0x0, 0x0, 0x0 ]
- Name: ExpectedOut5
Format: UInt32
Stride: 4
Data: [ 0x11, 0x1e, 0x0, 0x0 ]

Results:
- Result: ExpectedOut1
Rule: BufferExact
Actual: Out1
Expected: ExpectedOut1
- Result: ExpectedOut2
Rule: BufferExact
Actual: Out2
Expected: ExpectedOut2
- Result: ExpectedOut3
Rule: BufferExact
Actual: Out3
Expected: ExpectedOut3
- Result: ExpectedOut4
Rule: BufferExact
Actual: Out4
Expected: ExpectedOut4
- Result: ExpectedOut5
Rule: BufferExact
Actual: Out5
Expected: ExpectedOut5


DescriptorSets:
- Resources:
- Name: In
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: Out1
Kind: RWStructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: Out2
Kind: RWStructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out3
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
- Name: Out4
Kind: RWStructuredBuffer
DirectXBinding:
Register: 4
Space: 0
VulkanBinding:
Binding: 4
- Name: Out5
Kind: RWStructuredBuffer
DirectXBinding:
Register: 5
Space: 0
VulkanBinding:
Binding: 5
...
#--- end

# Bug: https://github.com/llvm/llvm-project/issues/188323
# XFAIL: Vulkan && Clang

# Bug: https://github.com/llvm/offload-test-suite/issues/1022
# XFAIL: WARP

# RUN: split-file %s %t
# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
201 changes: 201 additions & 0 deletions test/WaveOps/WaveActiveBitXor.int.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
#--- source.hlsl
StructuredBuffer<uint4> In : register(t0);

RWStructuredBuffer<uint> Out1 : register(u1);
RWStructuredBuffer<uint2> Out2 : register(u2);
RWStructuredBuffer<uint3> Out3 : register(u3);
RWStructuredBuffer<uint4> Out4 : register(u4);
RWStructuredBuffer<uint4> Out5 : register(u5);


[numthreads(4,1,1)]
void main(uint3 TID : SV_GroupThreadID) {
uint4 V = In[TID.x];

Out1[TID.x] = WaveActiveBitXor(V.x);

// 3 thread case
if (TID.x != 1)
Out1[TID.x + 4] = WaveActiveBitXor(V.x);

Out2[TID.x] = WaveActiveBitXor(V.xy);

uint3 R3 = WaveActiveBitXor(V.xyz);
Out3[TID.x].xyz = R3;

Out4[TID.x] = WaveActiveBitXor(V);

// constant folding
Out5[TID.x] = WaveActiveBitXor(uint4(1,2,3,4));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even number of identical values XOR'd together will always be zero, and expecting all zero doesn't really test much.

There are also no tests that include an odd number of active lanes, and ensure that a bit set in each lane is set after that.

You could do something more interesting, like:

Suggested change
Out5[TID.x] = WaveActiveBitXor(uint4(1,2,3,4));
Out5[TID.x] = WaveActiveBitXor(uint4(1,2,3,4));
if (TID.x != 1)
Out5[TID.x + 4] = WaveActiveBitXor(uint4(1,2,3,4));
if (TID.x % 2)
Out5[TID.x + 4 * 2] = WaveActiveBitXor(uint4(1,2,3,4));
if (TID.x == 1)
Out5[4 * 3] = WaveActiveBitXor(uint4(1,2,3,4));

Which should result in:

    Data: [
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, // 4 threads (0,1,2,3)
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0, // 3 threads (0,2,3)
        0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4,
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, // 2 threads (0,2)
        0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
        0x1, 0x2, 0x3, 0x4 // 1 thread (1)
    ]

It would also be more interesting if you did something like the 3-thread case with the non-constant inputs (it could be just scalar at that point).


if (TID.x != 1)
Out5[TID.x + 4] = WaveActiveBitXor(uint4(1,2,3,4));
if (TID.x % 2)
Out5[TID.x + 4 * 2] = WaveActiveBitXor(uint4(1,2,3,4));
if (TID.x == 1)
Out5[TID.x + 4 * 3] = WaveActiveBitXor(uint4(1,2,3,4));

}

#--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1,1,1]

Buffers:
- Name: In
Format: UInt32
Stride: 16
Data: [
0x11, 0x2, 0x4, 0x8,
0x10, 0x20, 0x40, 0x80,
0x100, 0x200, 0x400, 0x800,
0x1000, 0x2000, 0x4000, 0x8000
]

- Name: Out1
Format: UInt32
Stride: 4
FillSize: 32
- Name: Out2
Format: UInt32
Stride: 8
FillSize: 32
- Name: Out3
Format: UInt32
Stride: 12
FillSize: 48
- Name: Out4
Format: UInt32
Stride: 16
FillSize: 64
- Name: Out5
Format: UInt32
Stride: 16
FillSize: 256

- Name: ExpectedOut1
Format: UInt32
Stride: 4
Data: [ 0x1101, 0x1101, 0x1101, 0x1101, 0x1111, 0x0, 0x1111, 0x1111 ]
- Name: ExpectedOut2
Format: UInt32
Stride: 8
Data: [
0x1101, 0x2222,
0x1101, 0x2222,
0x1101, 0x2222,
0x1101, 0x2222
]
- Name: ExpectedOut3
Format: UInt32
Stride: 12
Data: [
0x1101, 0x2222, 0x4444,
0x1101, 0x2222, 0x4444,
0x1101, 0x2222, 0x4444,
0x1101, 0x2222, 0x4444
]
- Name: ExpectedOut4
Format: UInt32
Stride: 16
Data: [
0x1101, 0x2222, 0x4444, 0x8888,
0x1101, 0x2222, 0x4444, 0x8888,
0x1101, 0x2222, 0x4444, 0x8888,
0x1101, 0x2222, 0x4444, 0x8888
]
- Name: ExpectedOut5
Format: UInt32
Stride: 16
Data: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0,
0x1, 0x2, 0x3, 0x4, 0x1, 0x2, 0x3, 0x4,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x4,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
]

Results:
- Result: ExpectedOut1
Rule: BufferExact
Actual: Out1
Expected: ExpectedOut1
- Result: ExpectedOut2
Rule: BufferExact
Actual: Out2
Expected: ExpectedOut2
- Result: ExpectedOut3
Rule: BufferExact
Actual: Out3
Expected: ExpectedOut3
- Result: ExpectedOut4
Rule: BufferExact
Actual: Out4
Expected: ExpectedOut4
- Result: ExpectedOut5
Rule: BufferExact
Actual: Out5
Expected: ExpectedOut5

DescriptorSets:
- Resources:
- Name: In
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: Out1
Kind: RWStructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: Out2
Kind: RWStructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out3
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
- Name: Out4
Kind: RWStructuredBuffer
DirectXBinding:
Register: 4
Space: 0
VulkanBinding:
Binding: 4
- Name: Out5
Kind: RWStructuredBuffer
DirectXBinding:
Register: 5
Space: 0
VulkanBinding:
Binding: 5
...
#--- end

# Bug https://github.com/llvm/offload-test-suite/issues/1058
# XFAIL: QC && DirectX

# Bug https://github.com/llvm/offload-test-suite/issues/1058
# XFAIL: QC && Clang && Vulkan

# RUN: split-file %s %t
# RUN: %dxc_target -T cs_6_5 -fvk-use-dx-layout -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
Loading
Loading