From 7c4d3222adfd71d42ffaf29ce42e2704a2c96234 Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Sun, 22 Mar 2026 08:12:41 +0100 Subject: [PATCH 1/2] Narrow blanket SPIR-V legalization work in optimizer recipes --- include/spirv-tools/optimizer.hpp | 18 ++-- source/opt/mem_pass.cpp | 40 +++++--- source/opt/mem_pass.h | 7 +- source/opt/optimizer.cpp | 147 +++++++++++++++--------------- source/opt/ssa_rewrite_pass.h | 3 +- 5 files changed, 120 insertions(+), 95 deletions(-) diff --git a/include/spirv-tools/optimizer.hpp b/include/spirv-tools/optimizer.hpp index fd4527bf50..b2d63bef07 100644 --- a/include/spirv-tools/optimizer.hpp +++ b/include/spirv-tools/optimizer.hpp @@ -32,6 +32,13 @@ class Pass; struct DescriptorSetAndBinding; } // namespace opt +enum class SSARewriteMode { + None, + All, + OpaqueOnly, + SpecialTypes, +}; + // C++ interface for SPIR-V optimization functionalities. It wraps the context // (including target environment and the corresponding SPIR-V grammar) and // provides methods for registering optimization passes and optimizing. @@ -125,6 +132,9 @@ class SPIRV_TOOLS_EXPORT Optimizer { // interface are considered live and are not eliminated. Optimizer& RegisterLegalizationPasses(); Optimizer& RegisterLegalizationPasses(bool preserve_interface); + Optimizer& RegisterLegalizationPasses(bool preserve_interface, + bool include_loop_unroll, + SSARewriteMode ssa_rewrite_mode); // Register passes specified in the list of |flags|. Each flag must be a // string of a form accepted by Optimizer::FlagHasValidForm(). @@ -645,11 +655,6 @@ Optimizer::PassToken CreateLoopPeelingPass(); // Works best after LICM and local multi store elimination pass. Optimizer::PassToken CreateLoopUnswitchPass(); -// Creates a pass to legalize multidimensional arrays for Vulkan. -// This pass will replace multidimensional arrays of resources with a single -// dimensional array. Combine-access-chains should be run before this pass. -Optimizer::PassToken CreateLegalizeMultidimArrayPass(); - // Create global value numbering pass. // This pass will look for instructions where the same value is computed on all // paths leading to the instruction. Those instructions are deleted. @@ -709,7 +714,8 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor = 0); // operations on SSA IDs. This allows SSA optimizers to act on these variables. // Only variables that are local to the function and of supported types are // processed (see IsSSATargetVar for details). -Optimizer::PassToken CreateSSARewritePass(); +Optimizer::PassToken CreateSSARewritePass( + SSARewriteMode mode = SSARewriteMode::All); // Create pass to convert relaxed precision instructions to half precision. // This pass converts as many relaxed float32 arithmetic operations to half as diff --git a/source/opt/mem_pass.cpp b/source/opt/mem_pass.cpp index 4d061ff0c2..aeb5df693d 100644 --- a/source/opt/mem_pass.cpp +++ b/source/opt/mem_pass.cpp @@ -53,7 +53,27 @@ bool MemPass::IsBaseTargetType(const Instruction* typeInst) const { } bool MemPass::IsTargetType(const Instruction* typeInst) const { - if (IsBaseTargetType(typeInst)) return true; + switch (ssa_rewrite_mode_) { + case SSARewriteMode::None: + return false; + case SSARewriteMode::OpaqueOnly: + if (typeInst->IsOpaqueType()) return true; + break; + case SSARewriteMode::SpecialTypes: + if (typeInst->IsOpaqueType()) return true; + switch (typeInst->opcode()) { + case spv::Op::OpTypePointer: + case spv::Op::OpTypeCooperativeMatrixNV: + case spv::Op::OpTypeCooperativeMatrixKHR: + return true; + default: + break; + } + break; + case SSARewriteMode::All: + if (IsBaseTargetType(typeInst)) return true; + break; + } if (typeInst->opcode() == spv::Op::OpTypeArray) { if (!IsTargetType( get_def_use_mgr()->GetDef(typeInst->GetSingleWordOperand(1)))) { @@ -72,8 +92,7 @@ bool MemPass::IsTargetType(const Instruction* typeInst) const { bool MemPass::IsNonPtrAccessChain(const spv::Op opcode) const { return opcode == spv::Op::OpAccessChain || - opcode == spv::Op::OpInBoundsAccessChain || - opcode == spv::Op::OpUntypedAccessChainKHR; + opcode == spv::Op::OpInBoundsAccessChain; } bool MemPass::IsPtr(uint32_t ptrId) { @@ -89,14 +108,11 @@ bool MemPass::IsPtr(uint32_t ptrId) { ptrInst = get_def_use_mgr()->GetDef(varId); } const spv::Op op = ptrInst->opcode(); - if (op == spv::Op::OpVariable || op == spv::Op::OpUntypedVariableKHR || - IsNonPtrAccessChain(op)) - return true; + if (op == spv::Op::OpVariable || IsNonPtrAccessChain(op)) return true; const uint32_t varTypeId = ptrInst->type_id(); if (varTypeId == 0) return false; const Instruction* varTypeInst = get_def_use_mgr()->GetDef(varTypeId); - return varTypeInst->opcode() == spv::Op::OpTypePointer || - varTypeInst->opcode() == spv::Op::OpTypeUntypedPointerKHR; + return varTypeInst->opcode() == spv::Op::OpTypePointer; } Instruction* MemPass::GetPtr(uint32_t ptrId, uint32_t* varId) { @@ -106,13 +122,11 @@ Instruction* MemPass::GetPtr(uint32_t ptrId, uint32_t* varId) { switch (ptrInst->opcode()) { case spv::Op::OpVariable: - case spv::Op::OpUntypedVariableKHR: case spv::Op::OpFunctionParameter: varInst = ptrInst; break; case spv::Op::OpAccessChain: case spv::Op::OpInBoundsAccessChain: - case spv::Op::OpUntypedAccessChainKHR: case spv::Op::OpPtrAccessChain: case spv::Op::OpInBoundsPtrAccessChain: case spv::Op::OpImageTexelPointer: @@ -125,8 +139,7 @@ Instruction* MemPass::GetPtr(uint32_t ptrId, uint32_t* varId) { break; } - if (varInst->opcode() == spv::Op::OpVariable || - varInst->opcode() == spv::Op::OpUntypedVariableKHR) { + if (varInst->opcode() == spv::Op::OpVariable) { *varId = varInst->result_id(); } else { *varId = 0; @@ -241,7 +254,8 @@ void MemPass::DCEInst(Instruction* inst, } } -MemPass::MemPass() {} +MemPass::MemPass(SSARewriteMode ssa_rewrite_mode) + : ssa_rewrite_mode_(ssa_rewrite_mode) {} bool MemPass::HasOnlySupportedRefs(uint32_t varId) { return get_def_use_mgr()->WhileEachUser(varId, [this](Instruction* user) { diff --git a/source/opt/mem_pass.h b/source/opt/mem_pass.h index 496286b5f8..b121076691 100644 --- a/source/opt/mem_pass.h +++ b/source/opt/mem_pass.h @@ -25,6 +25,7 @@ #include #include +#include "spirv-tools/optimizer.hpp" #include "source/opt/basic_block.h" #include "source/opt/def_use_manager.h" #include "source/opt/dominator_analysis.h" @@ -68,7 +69,7 @@ class MemPass : public Pass { void CollectTargetVars(Function* func); protected: - MemPass(); + explicit MemPass(SSARewriteMode ssa_rewrite_mode = SSARewriteMode::All); // Returns true if |typeInst| is a scalar type // or a vector or matrix @@ -133,7 +134,9 @@ class MemPass : public Pass { // Cache of verified non-target vars std::unordered_set seen_non_target_vars_; - private: +private: + SSARewriteMode ssa_rewrite_mode_ = SSARewriteMode::All; + // Return true if all uses of |varId| are only through supported reference // operations ie. loads and store. Also cache in supported_ref_vars_. // TODO(dnovillo): This function is replicated in other passes and it's diff --git a/source/opt/optimizer.cpp b/source/opt/optimizer.cpp index 6986501dba..606fbef9ee 100644 --- a/source/opt/optimizer.cpp +++ b/source/opt/optimizer.cpp @@ -120,8 +120,10 @@ Optimizer& Optimizer::RegisterPass(PassToken&& p) { // The legalization problem is essentially a very general copy propagation // problem. The optimization we use are all used to either do copy propagation // or enable more copy propagation. -Optimizer& Optimizer::RegisterLegalizationPasses(bool preserve_interface) { - return +Optimizer& Optimizer::RegisterLegalizationPasses(bool preserve_interface, + bool include_loop_unroll, + SSARewriteMode ssa_rewrite_mode) { + auto& optimizer = // Wrap OpKill instructions so all other code can be inlined. RegisterPass(CreateWrapOpKillPass()) // Remove unreachable block so that merge return works. @@ -130,87 +132,93 @@ Optimizer& Optimizer::RegisterLegalizationPasses(bool preserve_interface) { .RegisterPass(CreateMergeReturnPass()) // Make sure uses and definitions are in the same function. .RegisterPass(CreateInlineExhaustivePass()) - // Make private variable function scope - .RegisterPass(CreateEliminateDeadFunctionsPass()) - .RegisterPass(CreatePrivateToLocalPass()) - // Fix up the storage classes that DXC may have purposely generated - // incorrectly. All functions are inlined, and a lot of dead code has - // been removed. - .RegisterPass(CreateFixStorageClassPass()) - // Propagate the value stored to the loads in very simple cases. - .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) - .RegisterPass(CreateLocalSingleStoreElimPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - // Split up aggregates so they are easier to deal with. - .RegisterPass(CreateScalarReplacementPass(0)) - // Remove loads and stores so everything is in intermediate values. - // Takes care of copy propagation of non-members. - .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) - .RegisterPass(CreateLocalSingleStoreElimPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateLocalMultiStoreElimPass()) - .RegisterPass(CreateCombineAccessChainsPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateLegalizeMultidimArrayPass()) - // Propagate constants to get as many constant conditions on branches - // as possible. - .RegisterPass(CreateCCPPass()) - .RegisterPass(CreateLoopUnrollPass(true)) - .RegisterPass(CreateDeadBranchElimPass()) - // Copy propagate members. Cleans up code sequences generated by - // scalar replacement. Also important for removing OpPhi nodes. - .RegisterPass(CreateSimplificationPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateCopyPropagateArraysPass()) - // May need loop unrolling here see - // https://github.com/Microsoft/DirectXShaderCompiler/pull/930 - // Get rid of unused code that contain traces of illegal code - // or unused references to unbound external objects - .RegisterPass(CreateVectorDCEPass()) - .RegisterPass(CreateDeadInsertElimPass()) - .RegisterPass(CreateReduceLoadSizePass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateRemoveUnusedInterfaceVariablesPass()) - .RegisterPass(CreateInterpolateFixupPass()) - .RegisterPass(CreateInvocationInterlockPlacementPass()) - .RegisterPass(CreateOpExtInstWithForwardReferenceFixupPass()); + .RegisterPass(CreateEliminateDeadFunctionsPass()); + optimizer.RegisterPass(CreatePrivateToLocalPass()); + // Fix up the storage classes that DXC may have purposely generated + // incorrectly. All functions are inlined, and a lot of dead code has + // been removed. + optimizer.RegisterPass(CreateFixStorageClassPass()); + // Propagate the value stored to the loads in very simple cases. + optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) + .RegisterPass(CreateLocalSingleStoreElimPass()) + .RegisterPass(CreateAggressiveDCEPass(preserve_interface)); + optimizer + // Split up aggregates so they are easier to deal with. + .RegisterPass(CreateScalarReplacementPass(0)); + // Remove loads and stores so everything is in intermediate values. + // Takes care of copy propagation of non-members. + optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) + .RegisterPass(CreateLocalSingleStoreElimPass()) + .RegisterPass(CreateAggressiveDCEPass(preserve_interface)); + if (ssa_rewrite_mode != SSARewriteMode::None) { + optimizer.RegisterPass(CreateSSARewritePass(ssa_rewrite_mode)); + } + optimizer + // Propagate constants to get as many constant conditions on branches + // as possible. + .RegisterPass(CreateCCPPass()); + if (include_loop_unroll) { + optimizer.RegisterPass(CreateLoopUnrollPass(true)); + } + optimizer.RegisterPass(CreateDeadBranchElimPass()) + // Copy propagate members. Cleans up code sequences generated by scalar + // replacement. Also important for removing OpPhi nodes. + .RegisterPass(CreateSimplificationPass()); + return optimizer + // May need loop unrolling here see + // https://github.com/Microsoft/DirectXShaderCompiler/pull/930 + // Get rid of unused code that contain traces of illegal code + // or unused references to unbound external objects + .RegisterPass(CreateVectorDCEPass()) + .RegisterPass(CreateDeadInsertElimPass()) + .RegisterPass(CreateReduceLoadSizePass()) + .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) + .RegisterPass(CreateRemoveUnusedInterfaceVariablesPass()) + .RegisterPass(CreateInterpolateFixupPass()) + .RegisterPass(CreateInvocationInterlockPlacementPass()) + .RegisterPass(CreateOpExtInstWithForwardReferenceFixupPass()); } Optimizer& Optimizer::RegisterLegalizationPasses() { - return RegisterLegalizationPasses(false); + return RegisterLegalizationPasses(false, true, SSARewriteMode::All); +} + +Optimizer& Optimizer::RegisterLegalizationPasses(bool preserve_interface) { + return RegisterLegalizationPasses(preserve_interface, true, + SSARewriteMode::All); } Optimizer& Optimizer::RegisterPerformancePasses(bool preserve_interface) { - return RegisterPass(CreateWrapOpKillPass()) + auto& optimizer = RegisterPass(CreateWrapOpKillPass()) .RegisterPass(CreateDeadBranchElimPass()) .RegisterPass(CreateMergeReturnPass()) .RegisterPass(CreateInlineExhaustivePass()) .RegisterPass(CreateEliminateDeadFunctionsPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) .RegisterPass(CreatePrivateToLocalPass()) .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) .RegisterPass(CreateLocalSingleStoreElimPass()) .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) .RegisterPass(CreateScalarReplacementPass(0)) - .RegisterPass(CreateLocalAccessChainConvertPass()) - .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) + .RegisterPass(CreateLocalAccessChainConvertPass()); + optimizer.RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) .RegisterPass(CreateLocalSingleStoreElimPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateLocalMultiStoreElimPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateCCPPass()) - .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateLoopUnrollPass(true)) - .RegisterPass(CreateDeadBranchElimPass()) - .RegisterPass(CreateRedundancyEliminationPass()) - .RegisterPass(CreateCombineAccessChainsPass()) + .RegisterPass(CreateAggressiveDCEPass(preserve_interface)); + optimizer.RegisterPass(CreateCCPPass()) + .RegisterPass(CreateAggressiveDCEPass(preserve_interface)); + // Preserve LoopControl::Unroll in the IR instead of always materializing + // it here. The optimizer-side full unroll is very costly on large modules + // with many tiny [unroll]-annotated loops, while the hint remains available + // to downstream consumers in the final SPIR-V. + optimizer.RegisterPass(CreateDeadBranchElimPass()); + optimizer.RegisterPass(CreateLocalRedundancyEliminationPass()); + optimizer.RegisterPass(CreateCombineAccessChainsPass()) .RegisterPass(CreateSimplificationPass()) .RegisterPass(CreateScalarReplacementPass(0)) .RegisterPass(CreateLocalAccessChainConvertPass()) .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass()) .RegisterPass(CreateLocalSingleStoreElimPass()) .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateSSARewritePass()) + .RegisterPass(CreateSSARewritePass(SSARewriteMode::SpecialTypes)) .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) .RegisterPass(CreateVectorDCEPass()) .RegisterPass(CreateDeadInsertElimPass()) @@ -220,9 +228,9 @@ Optimizer& Optimizer::RegisterPerformancePasses(bool preserve_interface) { .RegisterPass(CreateCopyPropagateArraysPass()) .RegisterPass(CreateReduceLoadSizePass()) .RegisterPass(CreateAggressiveDCEPass(preserve_interface)) - .RegisterPass(CreateBlockMergePass()) - .RegisterPass(CreateRedundancyEliminationPass()) - .RegisterPass(CreateDeadBranchElimPass()) + .RegisterPass(CreateBlockMergePass()); + optimizer.RegisterPass(CreateLocalRedundancyEliminationPass()); + return optimizer.RegisterPass(CreateDeadBranchElimPass()) .RegisterPass(CreateBlockMergePass()) .RegisterPass(CreateSimplificationPass()); } @@ -401,8 +409,6 @@ bool Optimizer::RegisterPassFromFlag(const std::string& flag, RegisterPass(CreateFoldSpecConstantOpAndCompositePass()); } else if (pass_name == "loop-unswitch") { RegisterPass(CreateLoopUnswitchPass()); - } else if (pass_name == "legalize-multidim-array") { - RegisterPass(CreateLegalizeMultidimArrayPass()); } else if (pass_name == "scalar-replacement") { if (pass_args.size() == 0) { RegisterPass(CreateScalarReplacementPass(0)); @@ -965,11 +971,6 @@ Optimizer::PassToken CreateLoopUnswitchPass() { MakeUnique()); } -Optimizer::PassToken CreateLegalizeMultidimArrayPass() { - return MakeUnique( - MakeUnique()); -} - Optimizer::PassToken CreateRedundancyEliminationPass() { return MakeUnique( MakeUnique()); @@ -1019,9 +1020,9 @@ Optimizer::PassToken CreateLoopUnrollPass(bool fully_unroll, int factor) { MakeUnique(fully_unroll, factor)); } -Optimizer::PassToken CreateSSARewritePass() { +Optimizer::PassToken CreateSSARewritePass(SSARewriteMode mode) { return MakeUnique( - MakeUnique()); + MakeUnique(mode)); } Optimizer::PassToken CreateCopyPropagateArraysPass() { diff --git a/source/opt/ssa_rewrite_pass.h b/source/opt/ssa_rewrite_pass.h index 076d9e1651..8a5b04364d 100644 --- a/source/opt/ssa_rewrite_pass.h +++ b/source/opt/ssa_rewrite_pass.h @@ -294,7 +294,8 @@ class SSARewriter { class SSARewritePass : public MemPass { public: - SSARewritePass() = default; + explicit SSARewritePass(SSARewriteMode mode = SSARewriteMode::All) + : MemPass(mode) {} const char* name() const override { return "ssa-rewrite"; } Status Process() override; From 2a730e127a32ac8b0713f5e1490d7b9be9d1cc9a Mon Sep 17 00:00:00 2001 From: Arkadiusz Lachowicz Date: Mon, 23 Mar 2026 10:39:54 +0100 Subject: [PATCH 2/2] Handle image texel pointers in local single-store elim --- source/opt/local_single_store_elim_pass.cpp | 23 +++++++ test/opt/local_single_store_elim_test.cpp | 76 +++++++++++++++++++++ 2 files changed, 99 insertions(+) diff --git a/source/opt/local_single_store_elim_pass.cpp b/source/opt/local_single_store_elim_pass.cpp index df35401ebc..7aa8ea43b7 100644 --- a/source/opt/local_single_store_elim_pass.cpp +++ b/source/opt/local_single_store_elim_pass.cpp @@ -305,6 +305,18 @@ bool LocalSingleStoreElimPass::RewriteLoads( else stored_id = store_inst->GetSingleWordInOperand(kVariableInitIdInIdx); + const auto get_image_pointer_id = [this](uint32_t value_id) { + Instruction* value_inst = context()->get_def_use_mgr()->GetDef(value_id); + while (value_inst && value_inst->opcode() == spv::Op::OpCopyObject) { + value_id = value_inst->GetSingleWordInOperand(0); + value_inst = context()->get_def_use_mgr()->GetDef(value_id); + } + if (!value_inst || value_inst->opcode() != spv::Op::OpLoad) { + return uint32_t{0}; + } + return value_inst->GetSingleWordInOperand(0); + }; + *all_rewritten = true; bool modified = false; for (Instruction* use : uses) { @@ -319,6 +331,17 @@ bool LocalSingleStoreElimPass::RewriteLoads( context()->KillNamesAndDecorates(use->result_id()); context()->ReplaceAllUsesWith(use->result_id(), stored_id); context()->KillInst(use); + } else if (use->opcode() == spv::Op::OpImageTexelPointer && + dominator_analysis->Dominates(store_inst, use)) { + const uint32_t image_ptr_id = get_image_pointer_id(stored_id); + if (image_ptr_id == 0) { + *all_rewritten = false; + continue; + } + modified = true; + context()->ForgetUses(use); + use->SetInOperand(0, {image_ptr_id}); + context()->AnalyzeUses(use); } else { *all_rewritten = false; } diff --git a/test/opt/local_single_store_elim_test.cpp b/test/opt/local_single_store_elim_test.cpp index 8fd5c9d2f1..8f4fd7f888 100644 --- a/test/opt/local_single_store_elim_test.cpp +++ b/test/opt/local_single_store_elim_test.cpp @@ -908,6 +908,82 @@ OpFunctionEnd SinglePassRunAndCheck(before, after, true, true); } +TEST_F(LocalSingleStoreElimTest, RewriteImageTexelPointerImageOperand) { + const std::string before = R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %g_rwTexture3d +OpExecutionMode %main LocalSize 256 1 1 +OpSource HLSL 660 +OpName %type_3d_image "type.3d.image" +OpName %g_rwTexture3d "g_rwTexture3d" +OpName %main "main" +OpDecorate %g_rwTexture3d DescriptorSet 0 +OpDecorate %g_rwTexture3d Binding 0 +%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%uint_1 = OpConstant %uint 1 +%uint_2 = OpConstant %uint 2 +%uint_3 = OpConstant %uint 3 +%v3uint = OpTypeVector %uint 3 +%10 = OpConstantComposite %v3uint %uint_1 %uint_2 %uint_3 +%type_3d_image = OpTypeImage %uint 3D 2 0 0 2 R32ui +%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +%void = OpTypeVoid +%13 = OpTypeFunction %void +%_ptr_Function_type_3d_image = OpTypePointer Function %type_3d_image +%_ptr_Image_uint = OpTypePointer Image %uint +%g_rwTexture3d = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant +%main = OpFunction %void None %13 +%16 = OpLabel +%17 = OpVariable %_ptr_Function_type_3d_image Function +%18 = OpLoad %type_3d_image %g_rwTexture3d +OpStore %17 %18 +%19 = OpImageTexelPointer %_ptr_Image_uint %17 %10 %uint_0 +%20 = OpAtomicIAdd %uint %19 %uint_1 %uint_0 %uint_1 +OpReturn +OpFunctionEnd +)"; + + const std::string after = R"(OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %g_rwTexture3d +OpExecutionMode %main LocalSize 256 1 1 +OpSource HLSL 660 +OpName %type_3d_image "type.3d.image" +OpName %g_rwTexture3d "g_rwTexture3d" +OpName %main "main" +OpDecorate %g_rwTexture3d DescriptorSet 0 +OpDecorate %g_rwTexture3d Binding 0 +%uint = OpTypeInt 32 0 +%uint_0 = OpConstant %uint 0 +%uint_1 = OpConstant %uint 1 +%uint_2 = OpConstant %uint 2 +%uint_3 = OpConstant %uint 3 +%v3uint = OpTypeVector %uint 3 +%10 = OpConstantComposite %v3uint %uint_1 %uint_2 %uint_3 +%type_3d_image = OpTypeImage %uint 3D 2 0 0 2 R32ui +%_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +%void = OpTypeVoid +%13 = OpTypeFunction %void +%_ptr_Function_type_3d_image = OpTypePointer Function %type_3d_image +%_ptr_Image_uint = OpTypePointer Image %uint +%g_rwTexture3d = OpVariable %_ptr_UniformConstant_type_3d_image UniformConstant +%main = OpFunction %void None %13 +%16 = OpLabel +%17 = OpVariable %_ptr_Function_type_3d_image Function +%18 = OpLoad %type_3d_image %g_rwTexture3d +OpStore %17 %18 +%19 = OpImageTexelPointer %_ptr_Image_uint %g_rwTexture3d %10 %uint_0 +%20 = OpAtomicIAdd %uint %19 %uint_1 %uint_0 %uint_1 +OpReturn +OpFunctionEnd +)"; + + SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS); + SetTargetEnv(SPV_ENV_UNIVERSAL_1_4); + SinglePassRunAndCheck(before, after, true, true); +} + // Test that that an unused OpAccessChain between a store and a use does does // not hinders the replacement of the use. We need to check this because // local-access-chain-convert does always remove the OpAccessChain instructions