diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index 85d746b75..e8ec015e3 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -227,6 +227,13 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour FFTBloomApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // Remember to call the base class initialization! @@ -730,12 +737,27 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour // Normalization doesn't require full subgroups params[i].cached.requireFullSubgroups = bool(2-i); params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } } - + smart_refctd_ptr pipelines[3]; if(!m_device->createComputePipelines(nullptr, { params, 3 }, pipelines)) return logFail("Failed to create Compute Pipelines!\n"); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + const char* kernelNames[] = {"Kernel First Axis FFT", "Kernel Second Axis FFT", "Kernel Spectrum Normalize"}; + for (auto i = 0u; i < 3; i++) + { + auto report = system::to_string(pipelines[i]->getExecutableInfo()); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, kernelNames[i], report.c_str()); + } + } + // Push Constants - only need to specify BDAs here PushConstantData pushConstants; pushConstants.colMajorBufferAddress = m_colMajorBufferAddress[0]; @@ -933,12 +955,27 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour params[i].shader.entryPoint = "main"; params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); params[i].cached.requireFullSubgroups = true; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } } smart_refctd_ptr pipelines[3]; if (!m_device->createComputePipelines(nullptr, { params, 3 }, pipelines)) return logFail("Failed to create Compute Pipelines!\n"); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + const char* imageNames[] = {"Image First Axis FFT", "FFT Convolve IFFT", "Image First Axis IFFT"}; + for (auto i = 0u; i < 3; i++) + { + auto report = system::to_string(pipelines[i]->getExecutableInfo()); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, imageNames[i], report.c_str()); + } + } + m_firstAxisFFTPipeline = pipelines[0]; m_lastAxisFFT_convolution_lastAxisIFFTPipeline = pipelines[1]; m_firstAxisIFFTPipeline = pipelines[2]; diff --git a/29_Arithmetic2Bench/main.cpp b/29_Arithmetic2Bench/main.cpp index 5809c4a9a..889401d3d 100644 --- a/29_Arithmetic2Bench/main.cpp +++ b/29_Arithmetic2Bench/main.cpp @@ -180,6 +180,13 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub ArithmeticBenchApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) @@ -508,9 +515,20 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub .entries = nullptr, }; params.cached.requireFullSubgroups = true; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } core::smart_refctd_ptr pipeline; if (!m_device->createComputePipelines(nullptr,{¶ms,1},&pipeline)) return nullptr; + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(pipeline->getExecutableInfo()); + m_logger->log("Arithmetic Bench Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } return pipeline; } diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index 82ab9fb91..1ba8c53ef 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -71,6 +71,13 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B inline bool isComputeOnly() const override { return false; } + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) @@ -361,9 +368,21 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B params.shader.entries = nullptr; params.cached.requireFullSubgroups = true; params.shader.requiredSubgroupSize = static_cast(5); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { return logFail("Failed to create compute pipeline!\n"); } + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_PTPipelines[index]->getExecutableInfo()); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); + } } } @@ -500,7 +519,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); std::exit(-1); } - }; + } { asset::ICPUImage::SCreationParams info; info.format = asset::E_FORMAT::EF_R32G32_UINT; @@ -1282,4 +1301,4 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; }; -NBL_MAIN_FUNC(ComputeShaderPathtracer) +NBL_MAIN_FUNC(ComputeShaderPathtracer) \ No newline at end of file diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index ea8def7ba..7919f68c5 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -37,6 +37,13 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso CompatibilityTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // since emulated_float64_t rounds to zero @@ -317,8 +324,19 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; params.shader.shader = shader.get(); + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_pipeline->getExecutableInfo()); + base.m_logger->log("EF64Submitter Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } // Allocate the memory @@ -975,8 +993,19 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; params.shader.shader = shader.get(); + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_pipeline->getExecutableInfo()); + base.m_logger->log("EF64Benchmark Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } // Allocate the memory diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index e7334bff8..c702d512d 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -181,6 +181,13 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso inline FLIPFluidsApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) @@ -374,8 +381,18 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso params.layout = pipelineLayout.get(); params.shader.entryPoint = entryPoint; params.shader.shader = shader.get(); - + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } m_device->createComputePipelines(nullptr, { ¶ms,1 }, &pipeline); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo && pipeline) + { + auto report = system::to_string(pipeline->getExecutableInfo()); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, ShaderKey.value, report.c_str()); + } }; { @@ -627,16 +644,38 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso params.layout = pipelineLayout.get(); params.shader.entryPoint = "iterateDiffusion"; params.shader.shader = diffusion.get(); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline)) + m_logger->log("Failed to create iterateDiffusion pipeline!\n"); - m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_iterateDiffusionPipeline->getExecutableInfo()); + m_logger->log("iterateDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); params.shader.entryPoint = "applyDiffusion"; params.shader.shader = diffusion.get(); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline)) + m_logger->log("Failed to create applyDiffusion pipeline!\n"); - m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_diffusionPipeline->getExecutableInfo()); + m_logger->log("applyDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } { diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index f6b64c5ca..d56a953b8 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -51,6 +51,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui { auto retval = device_base_t::getPreferredDeviceFeatures(); retval.accelerationStructureHostCommands = true; + retval.pipelineExecutableInfo = true; return retval; } @@ -102,40 +103,40 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto shader = IAsset::castDown(assets[0]); + if (!shader) { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "app_resources"; // virtual root - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto shader = IAsset::castDown(assets[0]); - if (!shader) - { - m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); - return nullptr; - } + m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); + return nullptr; + } - return shader; - }; + return shader; + }; // load shaders - const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" - const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" - const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" - const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" - const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" - const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" - const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" - const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" - const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" - const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" - const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" - const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" + const auto raygenShader = loadPrecompiledShader.operator() < "raytrace_rgen" > (); // "app_resources/raytrace.rgen.hlsl" + const auto closestHitShader = loadPrecompiledShader.operator() < "raytrace_rchit" > (); // "app_resources/raytrace.rchit.hlsl" + const auto proceduralClosestHitShader = loadPrecompiledShader.operator() < "raytrace_procedural_rchit" > (); // "app_resources/raytrace_procedural.rchit.hlsl" + const auto intersectionHitShader = loadPrecompiledShader.operator() < "raytrace_rint" > (); // "app_resources/raytrace.rint.hlsl" + const auto anyHitShaderColorPayload = loadPrecompiledShader.operator() < "raytrace_rahit" > (); // "app_resources/raytrace.rahit.hlsl" + const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator() < "raytrace_shadow_rahit" > (); // "app_resources/raytrace_shadow.rahit.hlsl" + const auto missShader = loadPrecompiledShader.operator() < "raytrace_rmiss" > (); // "app_resources/raytrace.rmiss.hlsl" + const auto missShadowShader = loadPrecompiledShader.operator() < "raytrace_shadow_rmiss" > (); // "app_resources/raytrace_shadow.rmiss.hlsl" + const auto directionalLightCallShader = loadPrecompiledShader.operator() < "light_directional_rcall" > (); // "app_resources/light_directional.rcall.hlsl" + const auto pointLightCallShader = loadPrecompiledShader.operator() < "light_point_rcall" > (); // "app_resources/light_point.rcall.hlsl" + const auto spotLightCallShader = loadPrecompiledShader.operator() < "light_spot_rcall" > (); // "app_resources/light_spot.rcall.hlsl" + const auto fragmentShader = loadPrecompiledShader.operator() < "present_frag" > (); // "app_resources/present.frag.hlsl" m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) @@ -232,18 +233,18 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui { const auto bindings = std::array{ ICPUDescriptorSetLayout::SBinding{ - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, }, { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, } }; auto cpuDescriptorSetLayout = core::make_smart_refctd_ptr(bindings); @@ -256,11 +257,19 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto cpuPipelineLayout = core::make_smart_refctd_ptr(std::span({ pcRange }), std::move(cpuDescriptorSetLayout), nullptr, nullptr, nullptr); const auto pipeline = ICPURayTracingPipeline::create(cpuPipelineLayout.get()); - pipeline->getCachedCreationParams() = { - .flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS, - .maxRecursionDepth = 1, - .dynamicStackSize = true, - }; + { + core::bitflag flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + flags |= IGPURayTracingPipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + flags |= IGPURayTracingPipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + pipeline->getCachedCreationParams() = { + .flags = flags, + .maxRecursionDepth = 1, + .dynamicStackSize = true, + }; + } pipeline->getSpecInfos(ESS_RAYGEN)[0] = { .shader = raygenShader, @@ -287,7 +296,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto intersectionSpecs = pipeline->getSpecInfos(ESS_INTERSECTION); closestHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = closestHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = {.shader = anyHitShaderColorPayload, .entryPoint = "main"}; + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; @@ -295,7 +304,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = intersectionHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = {.shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = intersectionHitShader, .entryPoint = "main" }; pipeline->getSpecInfoVector(ESS_CALLABLE)->resize(ELT_COUNT); @@ -304,9 +313,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui callableGroups[ELT_POINT] = { .shader = pointLightCallShader, .entryPoint = "main" }; callableGroups[ELT_SPOT] = { .shader = spotLightCallShader, .entryPoint = "main" }; - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - CAssetConverter::SInputs inputs = {}; - inputs.logger = m_logger.get(); + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + CAssetConverter::SInputs inputs = {}; + inputs.logger = m_logger.get(); const std::array cpuPipelines = { pipeline.get() }; std::get>(inputs.assets) = cpuPipelines; @@ -314,7 +323,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui CAssetConverter::SConvertParams params = {}; params.utilities = m_utils.get(); - auto reservation = converter->reserve(inputs); + auto reservation = converter->reserve(inputs); auto future = reservation.convert(params); if (future.copy() != IQueue::RESULT::SUCCESS) { @@ -325,10 +334,16 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // assign gpu objects to output auto&& pipelines = reservation.getGPUObjects(); m_rayTracingPipeline = pipelines[0].value; + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_rayTracingPipeline->getExecutableInfo()); + m_logger->log("Ray Tracing Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; const std::array dsLayoutPtrs = { gpuDsLayout }; - m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); + m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(core::smart_refctd_ptr(gpuDsLayout)); calculateRayTracingStackSize(m_rayTracingPipeline); @@ -661,9 +676,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); if (m_useIndirectCommand) - cmdbuf->traceRaysIndirect({.offset=0,.buffer=m_indirectBuffer}); + cmdbuf->traceRaysIndirect({ .offset = 0,.buffer = m_indirectBuffer }); else - cmdbuf->traceRays(m_shaderBindingTable,WIN_W,WIN_H,1); + cmdbuf->traceRays(m_shaderBindingTable, WIN_W, WIN_H, 1); } // pipeline barrier @@ -1058,7 +1073,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui }; const auto planeRotation = hlsl::math::quaternion::create(hlsl::float32_t3(1.f, 0.f, 0.f), core::radians(-90.0f)); - hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::_static_cast(planeRotation)); + hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3, 4, 3, 3>(hlsl::_static_cast(planeRotation)); // triangles geometries auto geometryCreator = make_smart_refctd_ptr(); @@ -1146,7 +1161,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto blasCount = std::size(cpuObjects) + 1; const auto proceduralBlasIdx = std::size(cpuObjects); - std::array, std::size(cpuObjects)+1u> cpuBlasList; + std::array, std::size(cpuObjects) + 1u> cpuBlasList; for (uint32_t i = 0; i < blasCount; i++) { auto& blas = cpuBlasList[i]; @@ -1159,7 +1174,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto& aabb = aabbs->front(); auto& primCount = primitiveCounts->front(); - + primCount = NumberOfProceduralGeometries; aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); @@ -1264,7 +1279,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (uint32_t i = 0; i < cpuObjects.size(); i++) { tmpGeometries[i] = cpuObjects[i].data.get(); - tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + tmpGeometryPatches[i].indexBufferUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; } std::get>(inputs.assets) = tmpTlas; @@ -1275,7 +1290,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto reservation = converter->reserve(inputs); { - auto prepass = [&](const auto & references) -> bool + auto prepass = [&](const auto& references) -> bool { auto objects = reservation.getGPUObjects(); uint32_t counter = {}; @@ -1372,8 +1387,8 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui return false; } // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! - if (compute.getFutureScratchSemaphore().value>3) - m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); + if (compute.getFutureScratchSemaphore().value > 3) + m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!", system::ILogger::ELL_ERROR); // assign gpu objects to output auto&& tlases = reservation.getGPUObjects(); @@ -1395,9 +1410,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto& normalView = gpuPolygon->getNormalView(); const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; - auto normalType = NT_R32G32B32_SFLOAT; - if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) - normalType = NT_R8G8B8A8_SNORM; + auto normalType = NT_R32G32B32_SFLOAT; + if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) + normalType = NT_R8G8B8A8_SNORM; const auto& indexBufferBinding = gpuTriangles.indexData; auto& geomInfo = geomInfos[i]; diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index 81724c1b8..aef50f68c 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -40,6 +40,13 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // Remember to call the base class initialization! @@ -94,8 +101,19 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp params.layout = layout.get(); params.shader.shader = shader.get(); params.shader.entryPoint = "main"; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) return logFail("Failed to create compute pipeline!\n"); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_pipeline->getExecutableInfo()); + m_logger->log("Cooperative Binary Search Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } const size_t sizes[2] = {sizeof(TestCaseIndices),sizeof(uint32_t)*totalValues};