From 09b83dfa11b833646aa50059c1d89d8cb8a6aa70 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 4 Feb 2026 17:02:36 -0800 Subject: [PATCH 1/8] first attempt --- .../lib/DxilConvPasses/CMakeLists.txt | 12 +++ .../lib/DxilConvPasses/ScopeNestedCFG.cpp | 83 ++++++++++++++++++- .../no_cloning_convergent_ops.ll | 50 +++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll diff --git a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt index ec76aedc6f..65ec1cf760 100644 --- a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt +++ b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt @@ -14,3 +14,15 @@ PUBLIC $ $ ) + + +# Link against LLVMDXIL so that ScopeNestedCFG can use DxilUtil functions +target_link_libraries(DxilConvPasses + PRIVATE LLVMDXIL +) + +target_include_directories(DxilConvPasses + PRIVATE + ${DXC_SOURCE_DIR}/lib/DXIL # adjust if DXIL is not in DXC_SOURCE_DIR/lib/DXIL + ${DXC_SOURCE_DIR}/lib/IR # adjust if DXIL is not in DXC_SOURCE_DIR/lib/DXIL +) diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 3744568e53..4cf0b73286 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -10,6 +10,16 @@ /////////////////////////////////////////////////////////////////////////////// #include "DxilConvPasses/ScopeNestedCFG.h" + +#include "dxc/DXIL/DxilUtil.h" +#include "dxc/DXIL/DxilInstructions.h" +#include "dxc/DXIL/DxilModule.h" +#include "dxc/DXIL/DxilOperations.h" +#include "dxc/DXIL/DxilTypeSystem.h" +#include "dxc/HLSL/DxilConvergentName.h" + +#include "llvm/IR/Attributes.h" + #include "dxc/Support/Global.h" #include "llvm/Analysis/ReducibilityAnalysis.h" @@ -161,6 +171,8 @@ class ScopeNestedCFG : public FunctionPass { bool IsAcyclicRegionTerminator(const BasicBlock *pBB); BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB); + bool IsSwitchCaseBlock(BasicBlock *BB); + bool IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB); bool IsMergePoint(BasicBlock *pBB); BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, @@ -646,7 +658,57 @@ BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) { return pEffectiveSuccessor; } +bool ScopeNestedCFG::IsSwitchCaseBlock(BasicBlock *BB) { + for (BasicBlock *Pred : predecessors(BB)) { + if (auto *SI = dyn_cast(Pred->getTerminator())) { + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + if (SI->getSuccessor(i) == BB) + return true; + } + } + } + return false; +} + +bool ScopeNestedCFG::IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB) { + // 1. Predecessor must NOT be the switch dispatch block + if (isa(Pred->getTerminator())) + return false; + + // 2. Predecessor must end in unconditional branch + auto *Br = dyn_cast(Pred->getTerminator()); + if (!Br || !Br->isUnconditional()) + return false; + + // 3. BB must be reached by that unconditional branch + if (Br->getSuccessor(0) != BB) + return false; + + // 4. Predecessor must be a switch case block + if (!IsSwitchCaseBlock(Pred)) + return false; + + // 5. Current block must be another switch case block + if (!IsSwitchCaseBlock(BB)) + return false; + + return true; +} + +bool HasConvergentCall(BasicBlock *BB) { + for (Instruction &I : *BB) { + if (auto *CI = dyn_cast(&I)) { + Function *CF = CI->getCalledFunction(); + if (CF && CF->hasFnAttribute(Attribute::AttrKind::Convergent)) + return true; + } + } + return false; +} + bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { + + unordered_set UniquePredecessors; for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); itPred != endPred; ++itPred) { @@ -657,7 +719,7 @@ bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { UniquePredecessors.insert(pPredBB); } - return UniquePredecessors.size() >= 2; + return UniquePredecessors.size() >= 2; } bool ScopeNestedCFG::IsAcyclicRegionTerminator(const BasicBlock *pNode) { @@ -1080,6 +1142,11 @@ void ScopeNestedCFG::DetermineScopeEndPoints( pEndBB = BTO.GetBlock(MPI.MP); } + if (HasConvergentCall(pBB)) { + // Force scope end point to be the block itself + pEndBB = pBB; + } + auto itOldEndPointBB = ScopeEndPoints.find(pBB); if (itOldEndPointBB != ScopeEndPoints.end() && itOldEndPointBB->second != pEndBB) { @@ -1255,6 +1322,13 @@ void ScopeNestedCFG::DetermineReachableMergePoints( DXASSERT_NOMSG(m_LE2LBMap.find(pBB) == m_LE2LBMap.cend()); MPI.CandidateSet.insert(iBB); } + + if (HasConvergentCall(pBB)) { + // Force the merge point to be the block itself + MPI.MP = BTO.GetBlockId(pBB); + MPI.CandidateSet.clear(); + MPI.CandidateSet.insert(MPI.MP); + } } // TODO: during final testing consider to remove. @@ -1619,6 +1693,13 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { // BasicBlock *pSuccBB = pScopeBeginTI->getSuccessor(Scope.SuccIdx); + if (HasConvergentCall(Scope.pScopeBeginBB)) { + // For convergent blocks, force the successor to end the scope here + Scope.pScopeEndBB = Scope.pScopeBeginBB; + Scope.pClonedScopeEndBB = nullptr; // or clone if needed + continue; + } + // 7. Already processed successor. if (bIfScope || bSwitchScope) { if (pSuccBB == Scope.pPrevSuccBB) { diff --git a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll new file mode 100644 index 0000000000..e4f31d5814 --- /dev/null +++ b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll @@ -0,0 +1,50 @@ +; RUN: %opt-exe %s -scopenested -S | FileCheck %s + +; verify that this pass won't identify fallthrough blocks +; as merge points and in turn won't duplicate blocks +; which contain convergent operations + +; previously, blocks with wave operations would be cloned, +; violating the principle that wave operations should +; only be called by different threads when control flow +; is distinct between those threads + +declare float @dx.wave.op(float) convergent + +; CHECK-LABEL: define void @CSMain + +define void @CSMain(i32 %tid, float %v) convergent { +entry: + switch i32 %tid, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + ] + +; CHECK: case0: +case0: ; switch case 0 + +; CHECK: call float @dx.wave.op(float %v) +; CHECK: br label %case1 + %w0 = call float @dx.wave.op(float %v) + br label %case1 ; FALLTHROUGH + + +; CHECK: case1: +case1: ; switch case 1 + %a = phi float [ %w0, %case0 ], + [ 0.0, %entry ] + +; CHECK: call float @dx.wave.op(float %v) +; CHECK: br label %exit + %w1 = call float @dx.wave.op(float %v) + %sum = fadd float %a, %w1 + br label %exit + +; no cloning, so there should be no more waveops after this point +; CHECK-NOT: call float @dx.wave.op + +exit: + %r = phi float [ %sum, %case1 ], + [ 0.0, %entry ] + ret void +} From 04942c00de67e57ee625a77927047d43312a305d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 4 Feb 2026 17:22:13 -0800 Subject: [PATCH 2/8] cleanup --- .../lib/DxilConvPasses/CMakeLists.txt | 5 +- .../lib/DxilConvPasses/ScopeNestedCFG.cpp | 57 +------------------ 2 files changed, 5 insertions(+), 57 deletions(-) diff --git a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt index 65ec1cf760..966791150b 100644 --- a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt +++ b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt @@ -15,7 +15,6 @@ PUBLIC $ ) - # Link against LLVMDXIL so that ScopeNestedCFG can use DxilUtil functions target_link_libraries(DxilConvPasses PRIVATE LLVMDXIL @@ -23,6 +22,6 @@ target_link_libraries(DxilConvPasses target_include_directories(DxilConvPasses PRIVATE - ${DXC_SOURCE_DIR}/lib/DXIL # adjust if DXIL is not in DXC_SOURCE_DIR/lib/DXIL - ${DXC_SOURCE_DIR}/lib/IR # adjust if DXIL is not in DXC_SOURCE_DIR/lib/DXIL + ${DXC_SOURCE_DIR}/lib/DXIL + ${DXC_SOURCE_DIR}/lib/IR ) diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 4cf0b73286..8fb5996547 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -10,16 +10,6 @@ /////////////////////////////////////////////////////////////////////////////// #include "DxilConvPasses/ScopeNestedCFG.h" - -#include "dxc/DXIL/DxilUtil.h" -#include "dxc/DXIL/DxilInstructions.h" -#include "dxc/DXIL/DxilModule.h" -#include "dxc/DXIL/DxilOperations.h" -#include "dxc/DXIL/DxilTypeSystem.h" -#include "dxc/HLSL/DxilConvergentName.h" - -#include "llvm/IR/Attributes.h" - #include "dxc/Support/Global.h" #include "llvm/Analysis/ReducibilityAnalysis.h" @@ -171,8 +161,6 @@ class ScopeNestedCFG : public FunctionPass { bool IsAcyclicRegionTerminator(const BasicBlock *pBB); BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB); - bool IsSwitchCaseBlock(BasicBlock *BB); - bool IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB); bool IsMergePoint(BasicBlock *pBB); BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, @@ -658,43 +646,6 @@ BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) { return pEffectiveSuccessor; } -bool ScopeNestedCFG::IsSwitchCaseBlock(BasicBlock *BB) { - for (BasicBlock *Pred : predecessors(BB)) { - if (auto *SI = dyn_cast(Pred->getTerminator())) { - for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { - if (SI->getSuccessor(i) == BB) - return true; - } - } - } - return false; -} - -bool ScopeNestedCFG::IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB) { - // 1. Predecessor must NOT be the switch dispatch block - if (isa(Pred->getTerminator())) - return false; - - // 2. Predecessor must end in unconditional branch - auto *Br = dyn_cast(Pred->getTerminator()); - if (!Br || !Br->isUnconditional()) - return false; - - // 3. BB must be reached by that unconditional branch - if (Br->getSuccessor(0) != BB) - return false; - - // 4. Predecessor must be a switch case block - if (!IsSwitchCaseBlock(Pred)) - return false; - - // 5. Current block must be another switch case block - if (!IsSwitchCaseBlock(BB)) - return false; - - return true; -} - bool HasConvergentCall(BasicBlock *BB) { for (Instruction &I : *BB) { if (auto *CI = dyn_cast(&I)) { @@ -706,9 +657,7 @@ bool HasConvergentCall(BasicBlock *BB) { return false; } -bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { - - +bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { unordered_set UniquePredecessors; for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); itPred != endPred; ++itPred) { @@ -719,7 +668,7 @@ bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { UniquePredecessors.insert(pPredBB); } - return UniquePredecessors.size() >= 2; + return UniquePredecessors.size() >= 2; } bool ScopeNestedCFG::IsAcyclicRegionTerminator(const BasicBlock *pNode) { @@ -1696,7 +1645,7 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { if (HasConvergentCall(Scope.pScopeBeginBB)) { // For convergent blocks, force the successor to end the scope here Scope.pScopeEndBB = Scope.pScopeBeginBB; - Scope.pClonedScopeEndBB = nullptr; // or clone if needed + Scope.pClonedScopeEndBB = nullptr; continue; } From 2dc57f5a709ac57411e20918e28bef3f9f0abaf8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 5 Feb 2026 01:25:24 +0000 Subject: [PATCH 3/8] chore: autopublish 2026-02-05T01:25:24Z --- projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 8fb5996547..7a36e4e81e 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -657,7 +657,7 @@ bool HasConvergentCall(BasicBlock *BB) { return false; } -bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { +bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { unordered_set UniquePredecessors; for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); itPred != endPred; ++itPred) { @@ -1271,7 +1271,7 @@ void ScopeNestedCFG::DetermineReachableMergePoints( DXASSERT_NOMSG(m_LE2LBMap.find(pBB) == m_LE2LBMap.cend()); MPI.CandidateSet.insert(iBB); } - + if (HasConvergentCall(pBB)) { // Force the merge point to be the block itself MPI.MP = BTO.GetBlockId(pBB); From d764c8826b59ad22033b8973db1b66f71159d36e Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Feb 2026 10:42:40 -0800 Subject: [PATCH 4/8] remove cmakelists --- projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt index 966791150b..ec76aedc6f 100644 --- a/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt +++ b/projects/dxilconv/lib/DxilConvPasses/CMakeLists.txt @@ -14,14 +14,3 @@ PUBLIC $ $ ) - -# Link against LLVMDXIL so that ScopeNestedCFG can use DxilUtil functions -target_link_libraries(DxilConvPasses - PRIVATE LLVMDXIL -) - -target_include_directories(DxilConvPasses - PRIVATE - ${DXC_SOURCE_DIR}/lib/DXIL - ${DXC_SOURCE_DIR}/lib/IR -) From da3a13e87b555a320d53664171bf741f64764f5d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Feb 2026 14:28:24 -0800 Subject: [PATCH 5/8] don't depend on convergent fxn attr --- include/dxc/DXIL/DxilOperations.h | 1 + lib/DXIL/DxilOperations.cpp | 16 +++++++++++ .../lib/DxilConvPasses/ScopeNestedCFG.cpp | 27 +++++++++++++++---- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index fabf07ee14..9391e8791f 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -148,6 +148,7 @@ class OP { static bool IsDxilOpFuncCallInst(const llvm::Instruction *I); static bool IsDxilOpFuncCallInst(const llvm::Instruction *I, OpCode opcode); static bool IsDxilOpWave(OpCode C); + static bool IsConvergentOp(OpCode C); static bool IsDxilOpGradient(OpCode C); static bool IsDxilOpFeedback(OpCode C); static bool IsDxilOpBarrier(OpCode C); diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index eb5b2a2ceb..4acb72f444 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -9,6 +9,10 @@ // // /////////////////////////////////////////////////////////////////////////////// +// This file contains functions with generated code, +// and must be clang-format compliant, unless +// clang-format is turned off. + #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilInstructions.h" @@ -3365,6 +3369,18 @@ bool OP::IsDxilOpWave(OpCode C) { // OPCODE-WAVE:END } +bool OP::IsConvergentOp(OpCode C) { + // This accounts for wave and quad ops + unsigned op = (unsigned)C; + if (OP::IsDxilOpWave(C)) + return true; + // Account for derivative ops as well: + // DerivCoarseX = 83, DerivCoarseY = 84, DerivFineX = 85, DerivFineY = 86 + if (83 <= op && op <= 86) + return true; + return false; +} + bool OP::IsDxilOpGradient(OpCode C) { unsigned op = (unsigned)C; // clang-format off diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 7a36e4e81e..4f1b4be4b3 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// #include "DxilConvPasses/ScopeNestedCFG.h" +#include "dxc/DXIL/DxilOperations.h" #include "dxc/Support/Global.h" #include "llvm/Analysis/ReducibilityAnalysis.h" @@ -645,15 +646,31 @@ BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) { return pEffectiveSuccessor; } - +// Returns true if this basic block contains an instruction that +// is *control-flow convergence sensitive*. +// +// In DXIL, this includes: +// - Wave operations (WaveActive*, WaveReadLane*, etc.) +// - Quad / derivative operations (ddx, ddy, fwidth) +// +// Such instructions must NEVER be cloned or executed along +// structurally duplicated control-flow paths. bool HasConvergentCall(BasicBlock *BB) { for (Instruction &I : *BB) { - if (auto *CI = dyn_cast(&I)) { - Function *CF = CI->getCalledFunction(); - if (CF && CF->hasFnAttribute(Attribute::AttrKind::Convergent)) - return true; + auto *CI = dyn_cast(&I); + if (!CI) + continue; + + if (!hlsl::OP::IsDxilOpFuncCallInst(&I)) + continue; + + hlsl::OP::OpCode OpCode = hlsl::OP::GetDxilOpFuncCallInst(&I); + + if (hlsl::OP::IsConvergentOp(OpCode)) { + return true; } } + return false; } From 5a9e0e559ddc8c06ae06f682ec4ffeb9e7de0f50 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Feb 2026 16:23:58 -0800 Subject: [PATCH 6/8] add switchfallthrough branchkind, and handling in scopenest iterator --- .../DxilConvPasses/ScopeNestIterator.h | 20 +++++++ .../include/DxilConvPasses/ScopeNestedCFG.h | 1 + .../lib/DxilConvPasses/ScopeNestedCFG.cpp | 52 ++++++++++++++++++- .../no_cloning_convergent_ops.ll | 19 ++++--- 4 files changed, 83 insertions(+), 9 deletions(-) diff --git a/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h b/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h index f86d8c69b7..8b801dc97a 100644 --- a/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h +++ b/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h @@ -205,6 +205,15 @@ class ScopeNestIterator { } } + bool IsIntermediate() { + switch (Kind) { + case BranchKind::SwitchFallthrough: + return true; + default: + return false; + } + } + // Translate a branch annoatation to the corresponding event type. ScopeNestEvent::Type TranslateToNestType() { switch (Kind) { @@ -226,6 +235,8 @@ class ScopeNestIterator { return ScopeNestEvent::Type::Switch_Begin; case BranchKind::SwitchBreak: return ScopeNestEvent::Type::Switch_Break; + case BranchKind::SwitchFallthrough: + return ScopeNestEvent::Type::Body; case BranchKind::LoopBegin: return ScopeNestEvent::Type::Loop_Begin; @@ -868,6 +879,12 @@ class ScopeNestIterator { MoveFromTopOfStack(); break; + // Fallthrough just continues in the current scope + case BranchKind::SwitchFallthrough: + SetCurrent(ScopeNestEvent::Type::Body, m_current.Block); + MoveToFirstSuccessor(); // continue exploring successors + break; + // Already exited an old scope. case BranchKind::IfEnd: case BranchKind::SwitchEnd: @@ -914,6 +931,9 @@ class ScopeNestIterator { if (BranchAnnotation annotation = BranchAnnotation::Read(B)) { if (annotation.IsEndScope()) { EnterEndOfScope(B, annotation.Get()); + } else if (annotation.IsIntermediate()) { + // Just treat it as body but keep the annotation + SetCurrent(ScopeNestEvent::Type::Body, B); } else { DXASSERT_NOMSG(annotation.IsBeginScope()); StartNewScope(B, annotation.Get()); diff --git a/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h b/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h index 9f7fb0b2f6..f8402ef410 100644 --- a/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h +++ b/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h @@ -39,6 +39,7 @@ enum class BranchKind { SwitchEnd, SwitchNoEnd, SwitchBreak, + SwitchFallthrough, LoopBegin, LoopExit, diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 4f1b4be4b3..2a7405706d 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -162,6 +162,8 @@ class ScopeNestedCFG : public FunctionPass { bool IsAcyclicRegionTerminator(const BasicBlock *pBB); BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB); + bool IsSwitchCaseBlock(BasicBlock *BB); + bool IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB); bool IsMergePoint(BasicBlock *pBB); BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, @@ -646,6 +648,44 @@ BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) { return pEffectiveSuccessor; } + +bool ScopeNestedCFG::IsSwitchCaseBlock(BasicBlock *BB) { + for (BasicBlock *Pred : predecessors(BB)) { + if (auto *SI = dyn_cast(Pred->getTerminator())) { + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + if (SI->getSuccessor(i) == BB) + return true; + } + } + } + return false; +} + +bool ScopeNestedCFG::IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB) { + // 1. Predecessor must NOT be the switch dispatch block + if (isa(Pred->getTerminator())) + return false; + + // 2. Predecessor must end in unconditional branch + auto *Br = dyn_cast(Pred->getTerminator()); + if (!Br || !Br->isUnconditional()) + return false; + + // 3. BB must be reached by that unconditional branch + if (Br->getSuccessor(0) != BB) + return false; + + // 4. Predecessor must be a switch case block + if (!IsSwitchCaseBlock(Pred)) + return false; + + // 5. Current block must be another switch case block + if (!IsSwitchCaseBlock(BB)) + return false; + + return true; +} + // Returns true if this basic block contains an instruction that // is *control-flow convergence sensitive*. // @@ -1659,8 +1699,13 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { // BasicBlock *pSuccBB = pScopeBeginTI->getSuccessor(Scope.SuccIdx); + // Annotate all switch fallthrough branches + if (IsSwitchFallthrough(Scope.pScopeBeginBB, pSuccBB)) { + AnnotateBranch(Scope.pClonedScopeBeginBB, BranchKind::SwitchFallthrough); + } + + // Only for convergent blocks, end the scope here if (HasConvergentCall(Scope.pScopeBeginBB)) { - // For convergent blocks, force the successor to end the scope here Scope.pScopeEndBB = Scope.pScopeBeginBB; Scope.pClonedScopeEndBB = nullptr; continue; @@ -1721,6 +1766,11 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { BasicBlock *pClonedSucc = CloneNode(pSuccBB, BlockClones, RegionValueRemap); + if (bSwitchScope && IsSwitchFallthrough(Scope.pScopeBeginBB, pSuccBB)) { + AnnotateBranch(Scope.pClonedScopeBeginBB, + BranchKind::SwitchFallthrough); + } + if (bIfScope || bSwitchScope) { ScopeStackItem *pParentScope = GetScope(); pParentScope->pPrevSuccBB = pSuccBB; diff --git a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll index e4f31d5814..ae0c75dc61 100644 --- a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll +++ b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll @@ -9,7 +9,7 @@ ; only be called by different threads when control flow ; is distinct between those threads -declare float @dx.wave.op(float) convergent +declare float @dx.op.waveActiveOp.f32(i32, float, i8, i8) ; CHECK-LABEL: define void @CSMain @@ -23,9 +23,9 @@ entry: ; CHECK: case0: case0: ; switch case 0 -; CHECK: call float @dx.wave.op(float %v) -; CHECK: br label %case1 - %w0 = call float @dx.wave.op(float %v) +; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) +; CHECK: br label %case1, !dx.BranchKind ![[BK:.*]] + %w0 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) br label %case1 ; FALLTHROUGH @@ -34,17 +34,20 @@ case1: ; switch case 1 %a = phi float [ %w0, %case0 ], [ 0.0, %entry ] -; CHECK: call float @dx.wave.op(float %v) -; CHECK: br label %exit - %w1 = call float @dx.wave.op(float %v) +; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) +; CHECK: br label %exit, !dx.BranchKind ![[BK]] + %w1 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) %sum = fadd float %a, %w1 br label %exit ; no cloning, so there should be no more waveops after this point -; CHECK-NOT: call float @dx.wave.op +; CHECK-NOT: call float @dx.waveActiveOp.f32(i32 119 exit: %r = phi float [ %sum, %case1 ], [ 0.0, %entry ] ret void } + +; The BranchKind::SwitchFallthrough Kind ID is 8 +; CHECK: ![[BK]] = !{i32 8} From 1d7338a6153ec542f9b2e6cbc861fcf57a68c3d9 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Feb 2026 17:05:07 -0800 Subject: [PATCH 7/8] remove unneeded block of code --- projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 2a7405706d..60f308ed24 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -1766,11 +1766,6 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { BasicBlock *pClonedSucc = CloneNode(pSuccBB, BlockClones, RegionValueRemap); - if (bSwitchScope && IsSwitchFallthrough(Scope.pScopeBeginBB, pSuccBB)) { - AnnotateBranch(Scope.pClonedScopeBeginBB, - BranchKind::SwitchFallthrough); - } - if (bIfScope || bSwitchScope) { ScopeStackItem *pParentScope = GetScope(); pParentScope->pPrevSuccBB = pSuccBB; From 8f3aa1681f22af1917cc2b538c7f23b00914342a Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Feb 2026 21:41:19 -0800 Subject: [PATCH 8/8] address Chris: add scopenestinfo test --- .../no_cloning_convergent_ops_nestinfo.ll | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll diff --git a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll new file mode 100644 index 0000000000..db2afd4e0a --- /dev/null +++ b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll @@ -0,0 +1,54 @@ +; RUN: %opt-exe %s -scopenested -scopenestinfo -analyze -S | FileCheck %s + +; verify that scope nest info looks correct for fallthrough cases +; we expect to see switch fallthrough treated as "Body", so +; it should not effect the scope indentation level. + + +; CHECK: @TopLevel_Begin +; CHECK: entry +; CHECK: @Switch_Begin +; CHECK: @Switch_Case +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_Case +; CHECK: case0 +; CHECK: case1 +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_Case +; CHECK: case1 +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_End +; CHECK: @TopLevel_End + +declare float @dx.op.waveActiveOp.f32(i32, float, i8, i8) + + +define void @CSMain(i32 %tid, float %v) convergent { +entry: + switch i32 %tid, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + ] + +case0: ; switch case 0 + + %w0 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + br label %case1 ; FALLTHROUGH + + +case1: ; switch case 1 + %a = phi float [ %w0, %case0 ], + [ 0.0, %entry ] + + %w1 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + %sum = fadd float %a, %w1 + br label %exit + +exit: + %r = phi float [ %sum, %case1 ], + [ 0.0, %entry ] + ret void +}