diff --git a/include/dxc/DxilPIXPasses/DxilPIXPasses.h b/include/dxc/DxilPIXPasses/DxilPIXPasses.h index 5cc7c4aa50..8f34efca23 100644 --- a/include/dxc/DxilPIXPasses/DxilPIXPasses.h +++ b/include/dxc/DxilPIXPasses/DxilPIXPasses.h @@ -28,6 +28,7 @@ ModulePass *createDxilShaderAccessTrackingPass(); ModulePass *createDxilPIXAddTidToAmplificationShaderPayloadPass(); ModulePass *createDxilPIXDXRInvocationsLogPass(); ModulePass *createDxilNonUniformResourceIndexInstrumentationPass(); +ModulePass *createDxilDebugBreakInstrumentationPass(); void initializeDxilAddPixelHitInstrumentationPass(llvm::PassRegistry &); void initializeDxilDbgValueToDbgDeclarePass(llvm::PassRegistry &); @@ -44,5 +45,6 @@ void initializeDxilPIXAddTidToAmplificationShaderPayloadPass( void initializeDxilPIXDXRInvocationsLogPass(llvm::PassRegistry &); void initializeDxilNonUniformResourceIndexInstrumentationPass( llvm::PassRegistry &); +void initializeDxilDebugBreakInstrumentationPass(llvm::PassRegistry &); } // namespace llvm diff --git a/lib/DxilPIXPasses/CMakeLists.txt b/lib/DxilPIXPasses/CMakeLists.txt index 67e77f17cd..9565e6800f 100644 --- a/lib/DxilPIXPasses/CMakeLists.txt +++ b/lib/DxilPIXPasses/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_library(LLVMDxilPIXPasses DxilPIXAddTidToAmplificationShaderPayload.cpp DxilPIXDXRInvocationsLog.cpp DxilNonUniformResourceIndexInstrumentation.cpp + DxilDebugBreakInstrumentation.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR diff --git a/lib/DxilPIXPasses/DxilDebugBreakInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugBreakInstrumentation.cpp new file mode 100644 index 0000000000..896d0689cb --- /dev/null +++ b/lib/DxilPIXPasses/DxilDebugBreakInstrumentation.cpp @@ -0,0 +1,152 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// DxilDebugBreakInstrumentation.cpp // +// Copyright (C) Microsoft Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// Provides a pass to instrument DebugBreak() calls for PIX. Each // +// DebugBreak call is replaced with a UAV bit-write so PIX can detect // +// which DebugBreak locations were hit without halting the GPU. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "PixPassHelpers.h" +#include "dxc/DXIL/DxilOperations.h" +#include "dxc/DxilPIXPasses/DxilPIXPasses.h" +#include "dxc/DxilPIXPasses/DxilPIXVirtualRegisters.h" +#include "dxc/Support/Global.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +using namespace hlsl; + +class DxilDebugBreakInstrumentation : public ModulePass { + +public: + static char ID; // Pass identification, replacement for typeid + explicit DxilDebugBreakInstrumentation() : ModulePass(ID) {} + StringRef getPassName() const override { + return "DXIL DebugBreak Instrumentation"; + } + bool runOnModule(Module &M) override; +}; + +bool DxilDebugBreakInstrumentation::runOnModule(Module &M) { + DxilModule &DM = M.GetOrCreateDxilModule(); + LLVMContext &Ctx = M.getContext(); + OP *HlslOP = DM.GetOP(); + + hlsl::DxilResource *PixUAVResource = nullptr; + + UndefValue *UndefArg = UndefValue::get(Type::getInt32Ty(Ctx)); + + // Atomic operation to use for writing to the result UAV resource + Function *AtomicOpFunc = + HlslOP->GetOpFunc(OP::OpCode::AtomicBinOp, Type::getInt32Ty(Ctx)); + Constant *AtomicBinOpcode = + HlslOP->GetU32Const((uint32_t)OP::OpCode::AtomicBinOp); + Constant *AtomicOr = HlslOP->GetU32Const((uint32_t)DXIL::AtomicBinOpCode::Or); + + std::map FunctionToUAVHandle; + + // Collect all DebugBreak calls first, then modify. + // This avoids invalidating iterators during modification. + std::vector DebugBreakCalls; + + Function *DebugBreakFunc = + HlslOP->GetOpFunc(OP::OpCode::DebugBreak, Type::getVoidTy(Ctx)); + for (auto UI = DebugBreakFunc->use_begin(); + UI != DebugBreakFunc->use_end();) { + auto &Use = *UI++; + DebugBreakCalls.push_back(cast(Use.getUser())); + } + + for (CallInst *CI : DebugBreakCalls) { + if (!PixUAVResource) { + PixUAVResource = + PIXPassHelpers::CreateGlobalUAVResource(DM, 0, "PixUAVResource"); + } + + Function *F = CI->getParent()->getParent(); + + CallInst *PixUAVHandle = nullptr; + const auto FunctionToUAVHandleIter = FunctionToUAVHandle.lower_bound(F); + + if ((FunctionToUAVHandleIter != FunctionToUAVHandle.end()) && + (FunctionToUAVHandleIter->first == F)) { + PixUAVHandle = FunctionToUAVHandleIter->second; + } else { + IRBuilder<> Builder(F->getEntryBlock().getFirstInsertionPt()); + + PixUAVHandle = PIXPassHelpers::CreateHandleForResource( + DM, Builder, PixUAVResource, "PixUAVHandle"); + + FunctionToUAVHandle.insert(FunctionToUAVHandleIter, {F, PixUAVHandle}); + } + + IRBuilder<> Builder(CI); + + uint32_t InstructionNumber = 0; + if (!pix_dxil::PixDxilInstNum::FromInst(CI, &InstructionNumber)) { + DXASSERT_NOMSG(false); + } + + // The output UAV is treated as a bit array where each bit corresponds + // to an instruction number. + const uint32_t InstructionNumByteOffset = + (InstructionNumber / 32u) * sizeof(uint32_t); + const uint32_t InstructionNumBitPosition = (InstructionNumber % 32u); + const uint32_t InstructionNumBitMask = 1u << InstructionNumBitPosition; + + Constant *UAVByteOffsetArg = HlslOP->GetU32Const(InstructionNumByteOffset); + Constant *BitMaskArg = HlslOP->GetU32Const(InstructionNumBitMask); + + // Write a 1 bit at the position corresponding to this DebugBreak's + // instruction number, indicating it was hit. + Builder.CreateCall( + AtomicOpFunc, + { + AtomicBinOpcode, // i32, ; opcode + PixUAVHandle, // %dx.types.Handle, ; resource handle + AtomicOr, // i32, ; binary operation code + UAVByteOffsetArg, // i32, ; coordinate c0: byte offset + UndefArg, // i32, ; coordinate c1 (unused) + UndefArg, // i32, ; coordinate c2 (unused) + BitMaskArg // i32); value + }, + "DebugBreakBitSet"); + + // Remove the original DebugBreak call to prevent GPU halt + CI->eraseFromParent(); + } + + // Remove the now-unused dx.op.debugBreak function declaration so the + // DebugBreak operation is fully eliminated from the module. + if (DebugBreakFunc->use_empty()) + DebugBreakFunc->eraseFromParent(); + + const bool modified = (PixUAVResource != nullptr); + + if (modified) { + DM.ReEmitDxilResources(); + + if (OSOverride != nullptr) { + formatted_raw_ostream FOS(*OSOverride); + FOS << "\nFoundDebugBreak\n"; + } + } + + return modified; +} + +char DxilDebugBreakInstrumentation::ID = 0; + +ModulePass *llvm::createDxilDebugBreakInstrumentationPass() { + return new DxilDebugBreakInstrumentation(); +} + +INITIALIZE_PASS(DxilDebugBreakInstrumentation, + "hlsl-dxil-debugbreak-instrumentation", + "HLSL DXIL DebugBreak instrumentation for PIX", false, false) diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentation.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentation.hlsl new file mode 100644 index 0000000000..833d296ab9 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentation.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -Emain -Tcs_6_10 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debugbreak-instrumentation | %FileCheck %s + +// Verify the PIX UAV handle is created for DebugBreak instrumentation: +// CHECK: %PixUAVHandle = call %dx.types.Handle @dx.op.createHandleFromBinding( + +// Verify an AtomicBinOp (opcode 78) was emitted to record the DebugBreak hit: +// CHECK: %DebugBreakBitSet = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle + +// Verify the original DebugBreak call was removed: +// CHECK-NOT: @dx.op.debugBreak + +[numthreads(1, 1, 1)] +void main() { + DebugBreak(); +} diff --git a/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentationMultiple.hlsl b/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentationMultiple.hlsl new file mode 100644 index 0000000000..8e834e42fe --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/DebugBreakInstrumentationMultiple.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -Emain -Tcs_6_10 %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debugbreak-instrumentation | %FileCheck %s + +// Verify the PIX UAV handle is created: +// CHECK: %PixUAVHandle = call %dx.types.Handle @dx.op.createHandleFromBinding( + +// Verify two AtomicBinOp calls were emitted (one per DebugBreak): +// CHECK: DebugBreakBitSet{{.*}} = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle +// CHECK: DebugBreakBitSet{{.*}} = call i32 @dx.op.atomicBinOp.i32(i32 78, %dx.types.Handle + +// Verify no DebugBreak calls remain: +// CHECK-NOT: @dx.op.debugBreak + +RWByteAddressBuffer buf : register(u0); + +[numthreads(1, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + if (tid.x == 0) + DebugBreak(); + + buf.Store(0, tid.x); + + if (tid.x == 1) + DebugBreak(); +} diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index 991c4f38fc..93aad0281f 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -152,6 +152,10 @@ class PixTest : public ::testing::Test { TEST_METHOD(DebugInstrumentation_VectorAllocaWrite_Structs) + TEST_METHOD(DebugBreakInstrumentation_Basic) + TEST_METHOD(DebugBreakInstrumentation_NoDebugBreak) + TEST_METHOD(DebugBreakInstrumentation_Multiple) + TEST_METHOD(NonUniformResourceIndex_Resource) TEST_METHOD(NonUniformResourceIndex_DescriptorHeap) TEST_METHOD(NonUniformResourceIndex_Raytracing) @@ -238,6 +242,27 @@ class PixTest : public ::testing::Test { std::move(pOptimizedModule), {}, Tokenize(outputText.c_str(), "\n")}; } + PassOutput RunDebugBreakPass(IDxcBlob *dxil) { + CComPtr pOptimizer; + VERIFY_SUCCEEDED( + m_dllSupport.CreateInstance(CLSID_DxcOptimizer, &pOptimizer)); + std::vector Options; + Options.push_back(L"-opt-mod-passes"); + Options.push_back(L"-dxil-annotate-with-virtual-regs"); + Options.push_back(L"-hlsl-dxil-debugbreak-instrumentation"); + Options.push_back(L"-hlsl-dxilemit"); + + CComPtr pOptimizedModule; + CComPtr pText; + VERIFY_SUCCEEDED(pOptimizer->RunOptimizer( + dxil, Options.data(), Options.size(), &pOptimizedModule, &pText)); + + std::string outputText = BlobToUtf8(pText); + + return { + std::move(pOptimizedModule), {}, Tokenize(outputText.c_str(), "\n")}; + } + CComPtr FindModule(hlsl::DxilFourCC fourCC, IDxcBlob *pSource) { const UINT32 BC_C0DE = ((INT32)(INT8)'B' | (INT32)(INT8)'C' << 8 | (INT32)0xDEC0 << 16); // BC0xc0de in big endian @@ -3362,3 +3387,79 @@ void RaygenInternalName() for (auto const &b : RayPayloadElementCoverage) VERIFY_IS_TRUE(b); } + +TEST_F(PixTest, DebugBreakInstrumentation_Basic) { + + const char *source = R"x( +[numthreads(1, 1, 1)] +void main() { + DebugBreak(); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"cs_6_10", {}); + auto output = RunDebugBreakPass(compiled); + bool foundDebugBreak = false; + for (auto const &line : output.lines) { + if (line.find("FoundDebugBreak") != std::string::npos) + foundDebugBreak = true; + } + VERIFY_IS_TRUE(foundDebugBreak); +} + +TEST_F(PixTest, DebugBreakInstrumentation_NoDebugBreak) { + + const char *source = R"x( +RWByteAddressBuffer buf : register(u0); +[numthreads(1, 1, 1)] +void main() { + buf.Store(0, 1); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"cs_6_0", {}); + auto output = RunDebugBreakPass(compiled); + bool foundDebugBreak = false; + for (auto const &line : output.lines) { + if (line.find("FoundDebugBreak") != std::string::npos) + foundDebugBreak = true; + } + VERIFY_IS_FALSE(foundDebugBreak); +} + +TEST_F(PixTest, DebugBreakInstrumentation_Multiple) { + + const char *source = R"x( +RWByteAddressBuffer buf : register(u0); +[numthreads(1, 1, 1)] +void main(uint3 tid : SV_DispatchThreadID) { + if (tid.x == 0) + DebugBreak(); + buf.Store(0, tid.x); + if (tid.x == 1) + DebugBreak(); +})x"; + + auto compiled = Compile(m_dllSupport, source, L"cs_6_10", {}); + auto output = RunDebugBreakPass(compiled); + bool foundDebugBreak = false; + for (auto const &line : output.lines) { + if (line.find("FoundDebugBreak") != std::string::npos) + foundDebugBreak = true; + } + VERIFY_IS_TRUE(foundDebugBreak); + + // Verify the disassembly contains the expected AtomicBinOp calls + // and no remaining DebugBreak calls + auto disassembly = Disassemble(output.blob); + VERIFY_IS_TRUE(disassembly.find("dx.op.debugBreak") == std::string::npos); + + // Count the number of DebugBreakBitSet calls to verify both + // DebugBreak() calls were instrumented + int debugBreakBitSetCount = 0; + std::string::size_type pos = 0; + while ((pos = disassembly.find("DebugBreakBitSet", pos)) != + std::string::npos) { + debugBreakBitSetCount++; + pos += strlen("DebugBreakBitSet"); + } + VERIFY_ARE_EQUAL(debugBreakBitSetCount, 2); +} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 11deef2267..683b1228ba 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -7134,6 +7134,12 @@ def add_pass(name, type_name, doc, opts): "HLSL DXIL NonUniformResourceIndex instrumentation for PIX", [], ) + add_pass( + "hlsl-dxil-debugbreak-instrumentation", + "DxilDebugBreakInstrumentation", + "HLSL DXIL DebugBreak instrumentation for PIX", + [], + ) category_lib = "dxil_gen"