diff --git a/include/dxc/DXIL/DxilOperations.h b/include/dxc/DXIL/DxilOperations.h index fabf07ee14..9391e8791f 100644 --- a/include/dxc/DXIL/DxilOperations.h +++ b/include/dxc/DXIL/DxilOperations.h @@ -148,6 +148,7 @@ class OP { static bool IsDxilOpFuncCallInst(const llvm::Instruction *I); static bool IsDxilOpFuncCallInst(const llvm::Instruction *I, OpCode opcode); static bool IsDxilOpWave(OpCode C); + static bool IsConvergentOp(OpCode C); static bool IsDxilOpGradient(OpCode C); static bool IsDxilOpFeedback(OpCode C); static bool IsDxilOpBarrier(OpCode C); diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index eb5b2a2ceb..4acb72f444 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -9,6 +9,10 @@ // // /////////////////////////////////////////////////////////////////////////////// +// This file contains functions with generated code, +// and must be clang-format compliant, unless +// clang-format is turned off. + #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilInstructions.h" @@ -3365,6 +3369,18 @@ bool OP::IsDxilOpWave(OpCode C) { // OPCODE-WAVE:END } +bool OP::IsConvergentOp(OpCode C) { + // This accounts for wave and quad ops + unsigned op = (unsigned)C; + if (OP::IsDxilOpWave(C)) + return true; + // Account for derivative ops as well: + // DerivCoarseX = 83, DerivCoarseY = 84, DerivFineX = 85, DerivFineY = 86 + if (83 <= op && op <= 86) + return true; + return false; +} + bool OP::IsDxilOpGradient(OpCode C) { unsigned op = (unsigned)C; // clang-format off diff --git a/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h b/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h index f86d8c69b7..8b801dc97a 100644 --- a/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h +++ b/projects/dxilconv/include/DxilConvPasses/ScopeNestIterator.h @@ -205,6 +205,15 @@ class ScopeNestIterator { } } + bool IsIntermediate() { + switch (Kind) { + case BranchKind::SwitchFallthrough: + return true; + default: + return false; + } + } + // Translate a branch annoatation to the corresponding event type. ScopeNestEvent::Type TranslateToNestType() { switch (Kind) { @@ -226,6 +235,8 @@ class ScopeNestIterator { return ScopeNestEvent::Type::Switch_Begin; case BranchKind::SwitchBreak: return ScopeNestEvent::Type::Switch_Break; + case BranchKind::SwitchFallthrough: + return ScopeNestEvent::Type::Body; case BranchKind::LoopBegin: return ScopeNestEvent::Type::Loop_Begin; @@ -868,6 +879,12 @@ class ScopeNestIterator { MoveFromTopOfStack(); break; + // Fallthrough just continues in the current scope + case BranchKind::SwitchFallthrough: + SetCurrent(ScopeNestEvent::Type::Body, m_current.Block); + MoveToFirstSuccessor(); // continue exploring successors + break; + // Already exited an old scope. case BranchKind::IfEnd: case BranchKind::SwitchEnd: @@ -914,6 +931,9 @@ class ScopeNestIterator { if (BranchAnnotation annotation = BranchAnnotation::Read(B)) { if (annotation.IsEndScope()) { EnterEndOfScope(B, annotation.Get()); + } else if (annotation.IsIntermediate()) { + // Just treat it as body but keep the annotation + SetCurrent(ScopeNestEvent::Type::Body, B); } else { DXASSERT_NOMSG(annotation.IsBeginScope()); StartNewScope(B, annotation.Get()); diff --git a/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h b/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h index 9f7fb0b2f6..f8402ef410 100644 --- a/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h +++ b/projects/dxilconv/include/DxilConvPasses/ScopeNestedCFG.h @@ -39,6 +39,7 @@ enum class BranchKind { SwitchEnd, SwitchNoEnd, SwitchBreak, + SwitchFallthrough, LoopBegin, LoopExit, diff --git a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp index 3744568e53..60f308ed24 100644 --- a/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp +++ b/projects/dxilconv/lib/DxilConvPasses/ScopeNestedCFG.cpp @@ -10,6 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// #include "DxilConvPasses/ScopeNestedCFG.h" +#include "dxc/DXIL/DxilOperations.h" #include "dxc/Support/Global.h" #include "llvm/Analysis/ReducibilityAnalysis.h" @@ -161,6 +162,8 @@ class ScopeNestedCFG : public FunctionPass { bool IsAcyclicRegionTerminator(const BasicBlock *pBB); BasicBlock *GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB); + bool IsSwitchCaseBlock(BasicBlock *BB); + bool IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB); bool IsMergePoint(BasicBlock *pBB); BasicBlock *SplitEdge(BasicBlock *pBB, unsigned SuccIdx, const Twine &Name, @@ -646,6 +649,71 @@ BasicBlock *ScopeNestedCFG::GetEffectiveNodeToFollowSuccessor(BasicBlock *pBB) { return pEffectiveSuccessor; } +bool ScopeNestedCFG::IsSwitchCaseBlock(BasicBlock *BB) { + for (BasicBlock *Pred : predecessors(BB)) { + if (auto *SI = dyn_cast(Pred->getTerminator())) { + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + if (SI->getSuccessor(i) == BB) + return true; + } + } + } + return false; +} + +bool ScopeNestedCFG::IsSwitchFallthrough(BasicBlock *Pred, BasicBlock *BB) { + // 1. Predecessor must NOT be the switch dispatch block + if (isa(Pred->getTerminator())) + return false; + + // 2. Predecessor must end in unconditional branch + auto *Br = dyn_cast(Pred->getTerminator()); + if (!Br || !Br->isUnconditional()) + return false; + + // 3. BB must be reached by that unconditional branch + if (Br->getSuccessor(0) != BB) + return false; + + // 4. Predecessor must be a switch case block + if (!IsSwitchCaseBlock(Pred)) + return false; + + // 5. Current block must be another switch case block + if (!IsSwitchCaseBlock(BB)) + return false; + + return true; +} + +// Returns true if this basic block contains an instruction that +// is *control-flow convergence sensitive*. +// +// In DXIL, this includes: +// - Wave operations (WaveActive*, WaveReadLane*, etc.) +// - Quad / derivative operations (ddx, ddy, fwidth) +// +// Such instructions must NEVER be cloned or executed along +// structurally duplicated control-flow paths. +bool HasConvergentCall(BasicBlock *BB) { + for (Instruction &I : *BB) { + auto *CI = dyn_cast(&I); + if (!CI) + continue; + + if (!hlsl::OP::IsDxilOpFuncCallInst(&I)) + continue; + + hlsl::OP::OpCode OpCode = hlsl::OP::GetDxilOpFuncCallInst(&I); + + if (hlsl::OP::IsConvergentOp(OpCode)) { + return true; + } + } + + return false; +} + bool ScopeNestedCFG::IsMergePoint(BasicBlock *pBB) { unordered_set UniquePredecessors; for (auto itPred = pred_begin(pBB), endPred = pred_end(pBB); @@ -1080,6 +1148,11 @@ void ScopeNestedCFG::DetermineScopeEndPoints( pEndBB = BTO.GetBlock(MPI.MP); } + if (HasConvergentCall(pBB)) { + // Force scope end point to be the block itself + pEndBB = pBB; + } + auto itOldEndPointBB = ScopeEndPoints.find(pBB); if (itOldEndPointBB != ScopeEndPoints.end() && itOldEndPointBB->second != pEndBB) { @@ -1255,6 +1328,13 @@ void ScopeNestedCFG::DetermineReachableMergePoints( DXASSERT_NOMSG(m_LE2LBMap.find(pBB) == m_LE2LBMap.cend()); MPI.CandidateSet.insert(iBB); } + + if (HasConvergentCall(pBB)) { + // Force the merge point to be the block itself + MPI.MP = BTO.GetBlockId(pBB); + MPI.CandidateSet.clear(); + MPI.CandidateSet.insert(MPI.MP); + } } // TODO: during final testing consider to remove. @@ -1619,6 +1699,18 @@ void ScopeNestedCFG::TransformAcyclicRegion(BasicBlock *pEntry) { // BasicBlock *pSuccBB = pScopeBeginTI->getSuccessor(Scope.SuccIdx); + // Annotate all switch fallthrough branches + if (IsSwitchFallthrough(Scope.pScopeBeginBB, pSuccBB)) { + AnnotateBranch(Scope.pClonedScopeBeginBB, BranchKind::SwitchFallthrough); + } + + // Only for convergent blocks, end the scope here + if (HasConvergentCall(Scope.pScopeBeginBB)) { + Scope.pScopeEndBB = Scope.pScopeBeginBB; + Scope.pClonedScopeEndBB = nullptr; + continue; + } + // 7. Already processed successor. if (bIfScope || bSwitchScope) { if (pSuccBB == Scope.pPrevSuccBB) { diff --git a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll new file mode 100644 index 0000000000..ae0c75dc61 --- /dev/null +++ b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops.ll @@ -0,0 +1,53 @@ +; RUN: %opt-exe %s -scopenested -S | FileCheck %s + +; verify that this pass won't identify fallthrough blocks +; as merge points and in turn won't duplicate blocks +; which contain convergent operations + +; previously, blocks with wave operations would be cloned, +; violating the principle that wave operations should +; only be called by different threads when control flow +; is distinct between those threads + +declare float @dx.op.waveActiveOp.f32(i32, float, i8, i8) + +; CHECK-LABEL: define void @CSMain + +define void @CSMain(i32 %tid, float %v) convergent { +entry: + switch i32 %tid, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + ] + +; CHECK: case0: +case0: ; switch case 0 + +; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) +; CHECK: br label %case1, !dx.BranchKind ![[BK:.*]] + %w0 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + br label %case1 ; FALLTHROUGH + + +; CHECK: case1: +case1: ; switch case 1 + %a = phi float [ %w0, %case0 ], + [ 0.0, %entry ] + +; CHECK: call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) +; CHECK: br label %exit, !dx.BranchKind ![[BK]] + %w1 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + %sum = fadd float %a, %w1 + br label %exit + +; no cloning, so there should be no more waveops after this point +; CHECK-NOT: call float @dx.waveActiveOp.f32(i32 119 + +exit: + %r = phi float [ %sum, %case1 ], + [ 0.0, %entry ] + ret void +} + +; The BranchKind::SwitchFallthrough Kind ID is 8 +; CHECK: ![[BK]] = !{i32 8} diff --git a/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll new file mode 100644 index 0000000000..db2afd4e0a --- /dev/null +++ b/projects/dxilconv/test/scope_nest_iterator/no_cloning_convergent_ops_nestinfo.ll @@ -0,0 +1,54 @@ +; RUN: %opt-exe %s -scopenested -scopenestinfo -analyze -S | FileCheck %s + +; verify that scope nest info looks correct for fallthrough cases +; we expect to see switch fallthrough treated as "Body", so +; it should not effect the scope indentation level. + + +; CHECK: @TopLevel_Begin +; CHECK: entry +; CHECK: @Switch_Begin +; CHECK: @Switch_Case +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_Case +; CHECK: case0 +; CHECK: case1 +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_Case +; CHECK: case1 +; CHECK: exit +; CHECK: @Switch_Break +; CHECK: @Switch_End +; CHECK: @TopLevel_End + +declare float @dx.op.waveActiveOp.f32(i32, float, i8, i8) + + +define void @CSMain(i32 %tid, float %v) convergent { +entry: + switch i32 %tid, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + ] + +case0: ; switch case 0 + + %w0 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + br label %case1 ; FALLTHROUGH + + +case1: ; switch case 1 + %a = phi float [ %w0, %case0 ], + [ 0.0, %entry ] + + %w1 = call float @dx.op.waveActiveOp.f32(i32 119, float %v, i8 0, i8 0) + %sum = fadd float %a, %w1 + br label %exit + +exit: + %r = phi float [ %sum, %case1 ], + [ 0.0, %entry ] + ret void +}