From cd8aab95bd93405bdd2a9c24e9aae2670e3f4b19 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Fri, 20 Feb 2026 04:00:11 +0300 Subject: [PATCH 1/3] add pipeline executable info reports to performance critical examples --- 28_FFTBloom/main.cpp | 39 +- 29_Arithmetic2Bench/main.cpp | 18 + 30_ComputeShaderPathTracer/main.cpp | 1940 ++++++++++++++------------- 64_EmulatedFloatTest/main.cpp | 29 + 67_RayQueryGeometry/main.cpp | 12 + 70_FLIPFluids/main.cpp | 45 +- 71_RayTracingPipeline/main.cpp | 143 +- 72_CooperativeBinarySearch/main.cpp | 18 + 8 files changed, 1216 insertions(+), 1028 deletions(-) diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index 85d746b75..758bf9e50 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -227,6 +227,13 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour FFTBloomApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // Remember to call the base class initialization! @@ -730,12 +737,27 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour // Normalization doesn't require full subgroups params[i].cached.requireFullSubgroups = bool(2-i); params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } } - + smart_refctd_ptr pipelines[3]; if(!m_device->createComputePipelines(nullptr, { params, 3 }, pipelines)) return logFail("Failed to create Compute Pipelines!\n"); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + const char* kernelNames[] = {"Kernel First Axis FFT", "Kernel Second Axis FFT", "Kernel Spectrum Normalize"}; + for (auto i = 0u; i < 3; i++) + { + auto report = m_device->getPipelineExecutableReport(pipelines[i].get(), true); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, kernelNames[i], report.c_str()); + } + } + // Push Constants - only need to specify BDAs here PushConstantData pushConstants; pushConstants.colMajorBufferAddress = m_colMajorBufferAddress[0]; @@ -933,12 +955,27 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour params[i].shader.entryPoint = "main"; params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); params[i].cached.requireFullSubgroups = true; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params[i].flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } } smart_refctd_ptr pipelines[3]; if (!m_device->createComputePipelines(nullptr, { params, 3 }, pipelines)) return logFail("Failed to create Compute Pipelines!\n"); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + const char* imageNames[] = {"Image First Axis FFT", "FFT Convolve IFFT", "Image First Axis IFFT"}; + for (auto i = 0u; i < 3; i++) + { + auto report = m_device->getPipelineExecutableReport(pipelines[i].get(), true); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, imageNames[i], report.c_str()); + } + } + m_firstAxisFFTPipeline = pipelines[0]; m_lastAxisFFT_convolution_lastAxisIFFTPipeline = pipelines[1]; m_firstAxisIFFTPipeline = pipelines[2]; diff --git a/29_Arithmetic2Bench/main.cpp b/29_Arithmetic2Bench/main.cpp index 5809c4a9a..b37a01b7c 100644 --- a/29_Arithmetic2Bench/main.cpp +++ b/29_Arithmetic2Bench/main.cpp @@ -180,6 +180,13 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub ArithmeticBenchApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) @@ -508,9 +515,20 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub .entries = nullptr, }; params.cached.requireFullSubgroups = true; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } core::smart_refctd_ptr pipeline; if (!m_device->createComputePipelines(nullptr,{¶ms,1},&pipeline)) return nullptr; + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(pipeline.get(), true); + m_logger->log("Arithmetic Bench Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } return pipeline; } diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index 82ab9fb91..ba0035df1 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -32,283 +32,290 @@ struct PTPushConstant { // TODO: Do buffer creation using assConv class ComputeShaderPathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = SimpleWindowedApplication; - using asset_base_t = BuiltinResourcesApplication; - using clock_t = std::chrono::steady_clock; - - enum E_LIGHT_GEOMETRY : uint8_t + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t + { + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; + + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + constexpr static inline uint32_t DefaultWorkGroupSize = 16u; + constexpr static inline uint32_t MaxDescriptorCount = 256u; + constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 + constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 + constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; + constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; + constexpr static inline uint8_t MaxUITextureCount = 1u; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::array PTShaderPaths = { "app_resources/litBySphere.comp", "app_resources/litByTriangle.comp", "app_resources/litByRectangle.comp" }; + static inline std::string PresentShaderPath = "app_resources/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; + +public: + inline ComputeShaderPathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool isComputeOnly() const override { return false; } + + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + + inline core::vector getSurfaces() const override + { + if (!m_surface) { - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE, - ELG_COUNT - }; + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } - constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; - constexpr static inline uint32_t MaxFramesInFlight = 5; - constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); - constexpr static inline uint32_t DefaultWorkGroupSize = 16u; - constexpr static inline uint32_t MaxDescriptorCount = 256u; - constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 - constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 - constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; - constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; - constexpr static inline uint8_t MaxUITextureCount = 1u; - static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; - static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::array PTShaderPaths = { "app_resources/litBySphere.comp", "app_resources/litByTriangle.comp", "app_resources/litByRectangle.comp" }; - static inline std::string PresentShaderPath = "app_resources/present.frag.hlsl"; - - const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { - "ELG_SPHERE", - "ELG_TRIANGLE", - "ELG_RECTANGLE" - }; + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } - public: - inline ComputeShaderPathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; - inline bool isComputeOnly() const override { return false; } + return {}; + } - inline core::vector getSurfaces() const override + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Init systems { - if (!m_surface) - { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WindowDimensions.x; - params.height = WindowDimensions.y; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "ComputeShaderPathtracer"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); - } + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; + m_semaphore = m_device->createSemaphore(m_realFrameIx); - return {}; + if (!m_semaphore) + return logFail("Failed to create semaphore!"); } - inline bool onAppInitialized(smart_refctd_ptr&& system) override + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; { - // Init systems - { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - - // Remember to call the base class initialization! - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!asset_base_t::onAppInitialized(std::move(system))) - return false; - - m_semaphore = m_device->createSemaphore(m_realFrameIx); + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); - if (!m_semaphore) - return logFail("Failed to create semaphore!"); - } - - // Create renderpass and init surface - nbl::video::IGPURenderpass* renderpass; + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); - - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - renderpass = scResources->getRenderpass(); - - if (!renderpass) - return logFail("Failed to create Renderpass!"); - - auto gQueue = getGraphicsQueue(); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - } + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; - // image upload utils - { - m_scratchSemaphore = m_device->createSemaphore(0); - if (!m_scratchSemaphore) - return logFail("Could not create Scratch Semaphore"); - m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); - // we don't want to overcomplicate the example with multi-queue - m_intendedSubmit.queue = getGraphicsQueue(); - // wait for nothing before upload - m_intendedSubmit.waitSemaphores = {}; - m_intendedSubmit.waitSemaphores = {}; - // fill later - m_intendedSubmit.scratchCommandBuffers = {}; - m_intendedSubmit.scratchSemaphore = { - .semaphore = m_scratchSemaphore.get(), - .value = 0, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - } + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); - // Create command pool and buffers - { - auto gQueue = getGraphicsQueue(); - m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdPool) - return logFail("Couldn't create Command Pool!"); + if (!renderpass) + return logFail("Failed to create Renderpass!"); - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) - return logFail("Couldn't create Command Buffer!"); - } + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 + // image upload utils + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + // we don't want to overcomplicate the example with multi-queue + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.waitSemaphores = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS }; - auto defaultSampler = m_device->createSampler(samplerParams); + } - // Create descriptors and pipeline for the pathtracer - { - auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuLayout.get(),1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuLayout = reservation.getGPUObjects().front().value; - if (!gpuLayout) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); - std::exit(-1); - } + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); - return gpuLayout; - }; - auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuDS.get(), 1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuDS = reservation.getGPUObjects().front().value; - if (!gpuDS) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); - std::exit(-1); - } + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } - return gpuDS; - }; + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); - std::array descriptorSet0Bindings = {}; - std::array descriptorSet3Bindings = {}; - std::array presentDescriptorSetBindings; - - descriptorSet0Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[1] = { - .binding = 1u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[2] = { - .binding = 2u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuLayout; }; - presentDescriptorSetBindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuDS; }; - auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); - auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + std::array descriptorSet0Bindings = {}; + std::array descriptorSet3Bindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSet0Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[2] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; - auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); - auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); - auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); - auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); + auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); - m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); - m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); - smart_refctd_ptr presentDSPool; - { - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - } - m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - // Create Shaders - auto loadAndCompileShader = [&](std::string pathToShader) + // Create Shaders + auto loadAndCompileShader = [&](std::string pathToShader) { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -334,243 +341,256 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return shader; }; - // Create compute pipelines - { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { - auto ptShader = loadAndCompileShader(PTShaderPaths[index]); - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(PTPushConstant) - }; - auto ptPipelineLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout0), - nullptr, - core::smart_refctd_ptr(gpuDescriptorSetLayout2), - nullptr - ); - if (!ptPipelineLayout) { - return logFail("Failed to create Pathtracing pipeline layout"); - } - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.cached.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { - return logFail("Failed to create compute pipeline!\n"); - } - } - } - - // Create graphics pipeline - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - // Load Fragment Shader - auto fragmentShader = loadAndCompileShader(PresentShaderPath); - if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), - .entryPoint = "main", + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + auto ptShader = loadAndCompileShader(PTShaderPaths[index]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(PTPushConstant) }; - - auto presentLayout = m_device->createPipelineLayout( - {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); + if (!ptPipelineLayout) { + return logFail("Failed to create Pathtracing pipeline layout"); + } + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { + return logFail("Failed to create compute pipeline!\n"); + } + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(m_PTPipelines[index].get(), true); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); + } } } - // load CPUImages and convert to GPUImages - smart_refctd_ptr envMap, scrambleMap; + // Create graphics pipeline { - auto convertImgCPU2GPU = [&](std::span cpuImgs) { - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - std::array commandBufferInfo = { cmdbuf }; - core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); - imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); - - auto converter = CAssetConverter::create({ .device = m_device.get() }); - // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. - struct SInputs final : CAssetConverter::SInputs - { - // we also need to override this to have concurrent sharing - inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override - { - if (familyIndices.size() > 1) - return familyIndices; - return {}; - } + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main", + }; - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return image->getCreationParameters().mipLevels; - } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; - } + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + } + + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); - std::vector familyIndices; - } inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override { - const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; - inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; - } - // scratch command buffers for asset converter transfer commands - SIntendedSubmitInfo transfer = { - .queue = queue, - .waitSemaphores = {}, - .prevCommandBuffers = {}, - .scratchCommandBuffers = commandBufferInfo, - .scratchSemaphore = { - .semaphore = imgFillSemaphore.get(), - .value = 0, - // because of layout transitions - .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS - } - }; - // as per the `SIntendedSubmitInfo` one commandbuffer must be begun - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the - // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing - CAssetConverter::SConvertParams params = {}; - params.transfer = &transfer; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = cpuImgs; - // assert that we don't need to provide patches - assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuImgs = reservation.getGPUObjects(); - for (auto& gpuImg : gpuImgs) { - if (!gpuImg) { - m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); - std::exit(-1); - } + if (familyIndices.size() > 1) + return familyIndices; + return {}; } - // and launch the conversions - m_api->startCapture(); - auto result = reservation.convert(params); - m_api->endCapture(); - if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { - m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); - std::exit(-1); + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; } - envMap = gpuImgs[0].value; - scrambleMap = gpuImgs[1].value; - }; - - smart_refctd_ptr envMapCPU, scrambleMapCPU; + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); { - IAssetLoader::SAssetLoadParams lp; - lp.workingDirectory = this->sharedInputCWD; - SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); - if (bundle.getContents().empty()) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); - std::exit(-1); + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS } - - envMapCPU = IAsset::castDown(bundle.getContents()[0]); - if (!envMapCPU) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); std::exit(-1); } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; }; - { - asset::ICPUImage::SCreationParams info; - info.format = asset::E_FORMAT::EF_R32G32_UINT; - info.type = asset::ICPUImage::ET_2D; - auto extent = envMapCPU->getCreationParameters().extent; - info.extent.width = extent.width; - info.extent.height = extent.height; - info.extent.depth = 1u; - info.mipLevels = 1u; - info.arrayLayers = 1u; - info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - info.flags = static_cast(0u); - info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; - - scrambleMapCPU = ICPUImage::create(std::move(info)); - const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); - const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); - auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); - - core::RandomSampler rng(0xbadc0ffeu); - auto out = reinterpret_cast(texelBuffer->getPointer()); - for (auto index = 0u; index < texelBufferSize / 4; index++) { - out[index] = rng.nextSample(); - } - auto regions = core::make_refctd_dynamic_array>(1u); - ICPUImage::SBufferCopy& region = regions->front(); - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.bufferOffset = 0u; - region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); - region.bufferImageHeight = 0u; - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent = scrambleMapCPU->getCreationParameters().extent; - - scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); - - // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) - scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); } - std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; - convertImgCPU2GPU(cpuImgs); + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + + // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) + scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); } - // create views for textures - { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { - IGPUImage::SCreationParams imgInfo; - imgInfo.format = colorFormat; - imgInfo.type = IGPUImage::ET_2D; - imgInfo.extent.width = width; - imgInfo.extent.height = height; - imgInfo.extent.depth = 1u; - imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; - imgInfo.samples = IGPUImage::ESCF_1_BIT; - imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; - - auto image = m_device->createImage(std::move(imgInfo)); - auto imageMemReqs = image->getMemoryReqs(); - imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - m_device->allocate(imageMemReqs, image.get()); - - return image; + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get() }; + convertImgCPU2GPU(cpuImgs); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; @@ -587,28 +607,28 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return m_device->createImageView(std::move(imgViewInfo)); }; - auto params = envMap->getCreationParameters(); - auto extent = params.extent; - envMap->setObjectDebugName("Env Map"); - m_envMapView = createHDRIImageView(envMap); - m_envMapView->setObjectDebugName("Env Map View"); - scrambleMap->setObjectDebugName("Scramble Map"); - m_scrambleView = createHDRIImageView(scrambleMap); - m_scrambleView->setObjectDebugName("Scramble Map View"); - auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); - outImg->setObjectDebugName("Output Image"); - m_outImgView = createHDRIImageView(outImg); - m_outImgView->setObjectDebugName("Output Image View"); - } + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg); + m_outImgView->setObjectDebugName("Output Image View"); + } - // create sequence buffer view - { - // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` - auto createBufferFromCacheFile = [this]( - system::path filename, - size_t bufferSize, - void *data, - smart_refctd_ptr& buffer + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void* data, + smart_refctd_ptr& buffer ) -> std::pair, bool> { ISystem::future_t> owenSamplerFileFuture; @@ -641,7 +661,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return { owenSamplerFile, true }; }; - auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) { ISystem::future_t owenSamplerFileWriteFuture; size_t owenSamplerFileBytesWritten; @@ -651,196 +671,196 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); }; - constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; - std::array data = {}; - smart_refctd_ptr sampleSeq; + constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; + std::array data = {}; + smart_refctd_ptr sampleSeq; - auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) - { - core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD / OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); - ICPUBuffer::SCreationParams params = {}; - params.size = MaxBufferDimensions*MaxBufferSamples*sizeof(uint32_t); - sampleSeq = ICPUBuffer::create(std::move(params)); + ICPUBuffer::SCreationParams params = {}; + params.size = MaxBufferDimensions * MaxBufferSamples * sizeof(uint32_t); + sampleSeq = ICPUBuffer::create(std::move(params)); - auto out = reinterpret_cast(sampleSeq->getPointer()); - for (auto dim = 0u; dim < MaxBufferDimensions; dim++) - for (uint32_t i = 0; i < MaxBufferSamples; i++) - { - out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); - } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); - } + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxBufferSamples; i++) + { + out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } - IGPUBuffer::SCreationParams params = {}; - params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; - params.size = sampleSeq->getSize(); + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; + params.size = sampleSeq->getSize(); - // we don't want to overcomplicate the example with multi-queue - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; - m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + // we don't want to overcomplicate the example with multi-queue + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - m_api->startCapture(); - auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( - m_intendedSubmit, - std::move(params), - sampleSeq->getPointer() - ); - m_api->endCapture(); - bufferFuture.wait(); - auto buffer = bufferFuture.get(); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_api->startCapture(); + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, + std::move(params), + sampleSeq->getPointer() + ); + m_api->endCapture(); + bufferFuture.wait(); + auto buffer = bufferFuture.get(); - m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); - m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); - } + m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); + m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); + } - // Update Descriptors - { - ISampler::SParams samplerParams0 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_FLOAT_OPAQUE_BLACK, - ISampler::ETF_LINEAR, - ISampler::ETF_LINEAR, - ISampler::ESMM_LINEAR, - 0u, - false, - ECO_ALWAYS - }; - auto sampler0 = m_device->createSampler(samplerParams0); - ISampler::SParams samplerParams1 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_INT_OPAQUE_BLACK, - ISampler::ETF_NEAREST, - ISampler::ETF_NEAREST, - ISampler::ESMM_NEAREST, - 0u, - false, - ECO_ALWAYS - }; - auto sampler1 = m_device->createSampler(samplerParams1); - - std::array writeDSInfos = {}; - writeDSInfos[0].desc = m_outImgView; - writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writeDescriptorSets = {}; - writeDescriptorSets[0] = { - .dstSet = m_descriptorSet0.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[0] - }; - writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[1] - }; - writeDescriptorSets[2] = { - .dstSet = m_descriptorSet2.get(), - .binding = 1, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[2] - }; - writeDescriptorSets[3] = { - .dstSet = m_descriptorSet2.get(), - .binding = 2, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[3] - }; - writeDescriptorSets[4] = { - .dstSet = m_presentDescriptorSet.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[4] - }; + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[2].desc = m_sequenceBufferView; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet0.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet2.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet2.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; - m_device->updateDescriptorSets(writeDescriptorSets, {}); - } + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } - // Create ui descriptors + // Create ui descriptors + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; { - using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; - { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - - m_ui.samplers.gui = m_device->createSampler(params); - m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); - } + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } - std::array, 69u> immutableSamplers; - for (auto& it : immutableSamplers) - it = smart_refctd_ptr(m_ui.samplers.scene); + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); - immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); - nbl::ext::imgui::UI::SCreationParameters params; + nbl::ext::imgui::UI::SCreationParameters params; - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; - params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; - params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); - params.renderpass = smart_refctd_ptr(renderpass); - params.streamingBuffer = nullptr; - params.subpassIx = 0u; - params.transfer = getTransferUpQueue(); - params.utilities = m_utils; - { - m_ui.manager = ext::imgui::UI::create(std::move(params)); + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); - // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - const auto& params = m_ui.manager->getCreationParameters(); + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); - IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; - descriptorPoolInfo.maxSets = 1u; - descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; - m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); - assert(m_guiDescriptorSetPool); + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); - m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); - assert(m_ui.descriptorSet); - } + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); } - m_ui.manager->registerListener( - [this]() -> void { - ImGuiIO& io = ImGui::GetIO(); + } + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); - m_camera.setProjectionMatrix([&]() + m_camera.setProjectionMatrix([&]() { static hlsl::float32_t4x4 projection; @@ -849,308 +869,308 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return projection; }()); - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); - ImGui::SameLine(); + ImGui::SameLine(); - ImGui::Text("Camera"); + ImGui::Text("Camera"); - ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); - ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); - ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - ImGui::End(); - } - ); - - // Set Camera - { - core::vectorSIMDf cameraPosition(0, 5, -10); - hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( - core::radians(60.0f), - float(WindowDimensions.x / WindowDimensions.y), - 0.01f, - 500.0f - ); - m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + ImGui::End(); } + ); - m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - m_oracle.reportBeginFrameRecord(); - m_camera.mapKeysToWASD(); - - return true; + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( + core::radians(60.0f), + float(WindowDimensions.x / WindowDimensions.y), + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); } - bool updateGUIDescriptorSet() - { - // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout - static std::array descriptorInfo; - static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + return true; + } - for (uint32_t i = 0; i < descriptorInfo.size(); ++i) - { - writes[i].dstSet = m_ui.descriptorSet.get(); - writes[i].binding = 0u; - writes[i].arrayElement = i; - writes[i].count = 1u; - } - writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; - return m_device->updateDescriptorSets(writes, {}); - } + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); - inline void workLoopBody() override + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) + const ISemaphore::SWaitInfo cbDonePending[] = { - const ISemaphore::SWaitInfo cbDonePending[] = { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_api->startCapture(); + m_api->startCapture(); - // CPU events - update(); + // CPU events + update(); - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[resourceIx].get(); + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); - if (!keepRunning()) - return; + if (!keepRunning()) + return; - // render whole scene to offline frame buffer & submit + // render whole scene to offline frame buffer & submit + { + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + PTPushConstant pc; + pc.invMVP = hlsl::inverse(viewProjectionMatrix); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; - pc.invMVP = hlsl::inverse(viewProjectionMatrix); - pc.sampleCount = spp; - pc.depth = depth; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // cube envmap handle - { - auto pipeline = m_PTPipelines[PTPipline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - cmdbuf->dispatch(1 + (WindowDimensions.x - 1) / DefaultWorkGroupSize, 1 + (WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u); - } - - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } - // TODO: tone mapping and stuff + // cube envmap handle + { + auto pipeline = m_PTPipelines[PTPipline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); + cmdbuf->dispatch(1 + (WindowDimensions.x - 1) / DefaultWorkGroupSize, 1 + (WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u); } - asset::SViewport viewport; + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WindowDimensions.x; - viewport.height = WindowDimensions.y; + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } - cmdbuf->setViewport(0u, 1u, &viewport); + // TODO: tone mapping and stuff + } + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); - VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; - cmdbuf->setScissor(defaultScisors); - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - auto scRes = static_cast(m_surface->getSwapchainResources()); + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + auto scRes = static_cast(m_surface->getSwapchainResources()); - // Upload m_outImg to swapchain + UI + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = { - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearColor, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); - const auto uiParams = m_ui.manager->getCreationParameters(); - auto* uiPipeline = m_ui.manager->getPipeline(); - cmdbuf->bindGraphicsPipeline(uiPipeline); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); - m_ui.manager->render(cmdbuf, waitInfo); + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); - cmdbuf->endRenderPass(); - } + cmdbuf->endRenderPass(); + } - cmdbuf->end(); + cmdbuf->end(); + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT - } - }; + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + { { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cmdbuf } - }; + {.cmdbuf = cmdbuf } + }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; - updateGUIDescriptorSet(); + updateGUIDescriptorSet(); - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; - } + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; } - - m_window->setCaption("[Nabla Engine] Computer Path Tracer"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); } - m_api->endCapture(); + + m_window->setCaption("[Nabla Engine] Computer Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); } + m_api->endCapture(); + } - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; - return true; - } + return true; + } - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } - inline void update() - { - m_camera.setMoveSpeed(moveSpeed); - m_camera.setRotateSpeed(rotateSpeed); + inline void update() + { + m_camera.setMoveSpeed(moveSpeed); + m_camera.setRotateSpeed(rotateSpeed); - static std::chrono::microseconds previousEventTimestamp{}; + static std::chrono::microseconds previousEventTimestamp{}; - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); - auto updatePresentationTimestamp = [&]() + auto updatePresentationTimestamp = [&]() { m_currentImageAcquire = m_surface->acquireNextImage(); @@ -1161,17 +1181,17 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return timestamp; }; - const auto nextPresentationTimestamp = updatePresentationTimestamp(); + const auto nextPresentationTimestamp = updatePresentationTimestamp(); - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; - m_camera.beginInputProcessing(nextPresentationTimestamp); - { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl @@ -1188,7 +1208,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B } }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl @@ -1201,85 +1221,85 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B capturedEvents.keyboard.emplace_back(e); } }, m_logger.get()); - } - m_camera.endInputProcessing(nextPresentationTimestamp); + } + m_camera.endInputProcessing(nextPresentationTimestamp); - const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); - const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); - const ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = mousePosition, - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = mouseEvents, - .keyboardEvents = keyboardEvents - }; + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; - m_ui.manager->update(params); - } + m_ui.manager->update(params); + } - private: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; +private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; - // gpu resources - smart_refctd_ptr m_cmdPool; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTPipelines; - smart_refctd_ptr m_presentPipeline; - uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTPipelines; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; - core::smart_refctd_ptr m_guiDescriptorSetPool; + core::smart_refctd_ptr m_guiDescriptorSetPool; - // system resources - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; - // pathtracer resources - smart_refctd_ptr m_envMapView, m_scrambleView; - smart_refctd_ptr m_sequenceBufferView; - smart_refctd_ptr m_outImgView; + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBufferView; + smart_refctd_ptr m_outImgView; - // sync - smart_refctd_ptr m_semaphore; + // sync + smart_refctd_ptr m_semaphore; - // image upload resources - smart_refctd_ptr m_scratchSemaphore; - SIntendedSubmitInfo m_intendedSubmit; + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; - struct C_UI - { - nbl::core::smart_refctd_ptr manager; + struct C_UI + { + nbl::core::smart_refctd_ptr manager; - struct - { - core::smart_refctd_ptr gui, scene; - } samplers; + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; - core::smart_refctd_ptr descriptorSet; - } m_ui; + core::smart_refctd_ptr descriptorSet; + } m_ui; - Camera m_camera; + Camera m_camera; - video::CDumbPresentationOracle m_oracle; + video::CDumbPresentationOracle m_oracle; - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - int PTPipline = E_LIGHT_GEOMETRY::ELG_SPHERE; - int spp = 32; - int depth = 3; + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int spp = 32; + int depth = 3; - bool m_firstFrame = true; - IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; }; NBL_MAIN_FUNC(ComputeShaderPathtracer) diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index ea8def7ba..405ac150b 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -37,6 +37,13 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso CompatibilityTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // since emulated_float64_t rounds to zero @@ -317,8 +324,19 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; params.shader.shader = shader.get(); + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = base.m_device->getPipelineExecutableReport(m_pipeline.get(), true); + base.m_logger->log("EF64Submitter Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } // Allocate the memory @@ -975,8 +993,19 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso params.layout = m_pplnLayout.get(); params.shader.entryPoint = "main"; params.shader.shader = shader.get(); + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!base.m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) base.logFail("Failed to create pipelines (compile & link shaders)!\n"); + + if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = base.m_device->getPipelineExecutableReport(m_pipeline.get(), true); + base.m_logger->log("EF64Benchmark Pipeline Executable Report:\n%s", ILogger::ELL_INFO, report.c_str()); + } } // Allocate the memory diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index 464583352..f1fbd73cd 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -32,6 +32,7 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built { auto retval = device_base_t::getPreferredDeviceFeatures(); retval.accelerationStructureHostCommands = true; + retval.pipelineExecutableInfo = true; return retval; } @@ -175,8 +176,19 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built params.layout = pipelineLayout.get(); params.shader.shader = shader.get(); params.shader.entryPoint = "main"; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &renderPipeline)) return logFail("Failed to create compute pipeline"); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(renderPipeline.get(), true); + m_logger->log("Ray Query Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } // write descriptors diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index e7334bff8..32f7a64a9 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -181,6 +181,13 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso inline FLIPFluidsApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + inline core::vector getSurfaces() const override { if (!m_surface) @@ -374,8 +381,18 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso params.layout = pipelineLayout.get(); params.shader.entryPoint = entryPoint; params.shader.shader = shader.get(); - + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } m_device->createComputePipelines(nullptr, { ¶ms,1 }, &pipeline); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo && pipeline) + { + auto report = m_device->getPipelineExecutableReport(pipeline.get(), true); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, ShaderKey.value, report.c_str()); + } }; { @@ -627,16 +644,38 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso params.layout = pipelineLayout.get(); params.shader.entryPoint = "iterateDiffusion"; params.shader.shader = diffusion.get(); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline)) + m_logger->log("Failed to create iterateDiffusion pipeline!\n"); - m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_iterateDiffusionPipeline); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(m_iterateDiffusionPipeline.get(), true); + m_logger->log("iterateDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } { IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); params.shader.entryPoint = "applyDiffusion"; params.shader.shader = diffusion.get(); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline)) + m_logger->log("Failed to create applyDiffusion pipeline!\n"); - m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_diffusionPipeline); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(m_diffusionPipeline.get(), true); + m_logger->log("applyDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } { diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index f6b64c5ca..c38b83470 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -51,6 +51,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui { auto retval = device_base_t::getPreferredDeviceFeatures(); retval.accelerationStructureHostCommands = true; + retval.pipelineExecutableInfo = true; return retval; } @@ -102,40 +103,40 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // Load Custom Shader auto loadPrecompiledShader = [&]() -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; // virtual root + auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto shader = IAsset::castDown(assets[0]); + if (!shader) { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = "app_resources"; // virtual root - auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); - auto assetBundle = m_assetMgr->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto shader = IAsset::castDown(assets[0]); - if (!shader) - { - m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); - return nullptr; - } + m_logger->log("Failed to load a precompiled shader.", ILogger::ELL_ERROR); + return nullptr; + } - return shader; - }; + return shader; + }; // load shaders - const auto raygenShader = loadPrecompiledShader.operator()<"raytrace_rgen">(); // "app_resources/raytrace.rgen.hlsl" - const auto closestHitShader = loadPrecompiledShader.operator()<"raytrace_rchit">(); // "app_resources/raytrace.rchit.hlsl" - const auto proceduralClosestHitShader = loadPrecompiledShader.operator()<"raytrace_procedural_rchit">(); // "app_resources/raytrace_procedural.rchit.hlsl" - const auto intersectionHitShader = loadPrecompiledShader.operator()<"raytrace_rint">(); // "app_resources/raytrace.rint.hlsl" - const auto anyHitShaderColorPayload = loadPrecompiledShader.operator()<"raytrace_rahit">(); // "app_resources/raytrace.rahit.hlsl" - const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator()<"raytrace_shadow_rahit">(); // "app_resources/raytrace_shadow.rahit.hlsl" - const auto missShader = loadPrecompiledShader.operator()<"raytrace_rmiss">(); // "app_resources/raytrace.rmiss.hlsl" - const auto missShadowShader = loadPrecompiledShader.operator()<"raytrace_shadow_rmiss">(); // "app_resources/raytrace_shadow.rmiss.hlsl" - const auto directionalLightCallShader = loadPrecompiledShader.operator()<"light_directional_rcall">(); // "app_resources/light_directional.rcall.hlsl" - const auto pointLightCallShader = loadPrecompiledShader.operator()<"light_point_rcall">(); // "app_resources/light_point.rcall.hlsl" - const auto spotLightCallShader = loadPrecompiledShader.operator()<"light_spot_rcall">(); // "app_resources/light_spot.rcall.hlsl" - const auto fragmentShader = loadPrecompiledShader.operator()<"present_frag">(); // "app_resources/present.frag.hlsl" + const auto raygenShader = loadPrecompiledShader.operator() < "raytrace_rgen" > (); // "app_resources/raytrace.rgen.hlsl" + const auto closestHitShader = loadPrecompiledShader.operator() < "raytrace_rchit" > (); // "app_resources/raytrace.rchit.hlsl" + const auto proceduralClosestHitShader = loadPrecompiledShader.operator() < "raytrace_procedural_rchit" > (); // "app_resources/raytrace_procedural.rchit.hlsl" + const auto intersectionHitShader = loadPrecompiledShader.operator() < "raytrace_rint" > (); // "app_resources/raytrace.rint.hlsl" + const auto anyHitShaderColorPayload = loadPrecompiledShader.operator() < "raytrace_rahit" > (); // "app_resources/raytrace.rahit.hlsl" + const auto anyHitShaderShadowPayload = loadPrecompiledShader.operator() < "raytrace_shadow_rahit" > (); // "app_resources/raytrace_shadow.rahit.hlsl" + const auto missShader = loadPrecompiledShader.operator() < "raytrace_rmiss" > (); // "app_resources/raytrace.rmiss.hlsl" + const auto missShadowShader = loadPrecompiledShader.operator() < "raytrace_shadow_rmiss" > (); // "app_resources/raytrace_shadow.rmiss.hlsl" + const auto directionalLightCallShader = loadPrecompiledShader.operator() < "light_directional_rcall" > (); // "app_resources/light_directional.rcall.hlsl" + const auto pointLightCallShader = loadPrecompiledShader.operator() < "light_point_rcall" > (); // "app_resources/light_point.rcall.hlsl" + const auto spotLightCallShader = loadPrecompiledShader.operator() < "light_spot_rcall" > (); // "app_resources/light_spot.rcall.hlsl" + const auto fragmentShader = loadPrecompiledShader.operator() < "present_frag" > (); // "app_resources/present.frag.hlsl" m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) @@ -232,18 +233,18 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui { const auto bindings = std::array{ ICPUDescriptorSetLayout::SBinding{ - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, }, { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, - .count = 1, + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, } }; auto cpuDescriptorSetLayout = core::make_smart_refctd_ptr(bindings); @@ -256,11 +257,19 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto cpuPipelineLayout = core::make_smart_refctd_ptr(std::span({ pcRange }), std::move(cpuDescriptorSetLayout), nullptr, nullptr, nullptr); const auto pipeline = ICPURayTracingPipeline::create(cpuPipelineLayout.get()); - pipeline->getCachedCreationParams() = { - .flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS, - .maxRecursionDepth = 1, - .dynamicStackSize = true, - }; + { + core::bitflag flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + flags |= IGPURayTracingPipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + flags |= IGPURayTracingPipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } + pipeline->getCachedCreationParams() = { + .flags = flags, + .maxRecursionDepth = 1, + .dynamicStackSize = true, + }; + } pipeline->getSpecInfos(ESS_RAYGEN)[0] = { .shader = raygenShader, @@ -287,7 +296,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto intersectionSpecs = pipeline->getSpecInfos(ESS_INTERSECTION); closestHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = closestHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = {.shader = anyHitShaderColorPayload, .entryPoint = "main"}; + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; @@ -295,7 +304,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = intersectionHitShader, .entryPoint = "main" }; - anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = {.shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = intersectionHitShader, .entryPoint = "main" }; pipeline->getSpecInfoVector(ESS_CALLABLE)->resize(ELT_COUNT); @@ -304,9 +313,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui callableGroups[ELT_POINT] = { .shader = pointLightCallShader, .entryPoint = "main" }; callableGroups[ELT_SPOT] = { .shader = spotLightCallShader, .entryPoint = "main" }; - smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - CAssetConverter::SInputs inputs = {}; - inputs.logger = m_logger.get(); + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + CAssetConverter::SInputs inputs = {}; + inputs.logger = m_logger.get(); const std::array cpuPipelines = { pipeline.get() }; std::get>(inputs.assets) = cpuPipelines; @@ -314,7 +323,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui CAssetConverter::SConvertParams params = {}; params.utilities = m_utils.get(); - auto reservation = converter->reserve(inputs); + auto reservation = converter->reserve(inputs); auto future = reservation.convert(params); if (future.copy() != IQueue::RESULT::SUCCESS) { @@ -325,10 +334,16 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui // assign gpu objects to output auto&& pipelines = reservation.getGPUObjects(); m_rayTracingPipeline = pipelines[0].value; + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(m_rayTracingPipeline.get(), true); + m_logger->log("Ray Tracing Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; const std::array dsLayoutPtrs = { gpuDsLayout }; - m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); + m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(core::smart_refctd_ptr(gpuDsLayout)); calculateRayTracingStackSize(m_rayTracingPipeline); @@ -661,9 +676,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); if (m_useIndirectCommand) - cmdbuf->traceRaysIndirect({.offset=0,.buffer=m_indirectBuffer}); + cmdbuf->traceRaysIndirect({ .offset = 0,.buffer = m_indirectBuffer }); else - cmdbuf->traceRays(m_shaderBindingTable,WIN_W,WIN_H,1); + cmdbuf->traceRays(m_shaderBindingTable, WIN_W, WIN_H, 1); } // pipeline barrier @@ -1058,7 +1073,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui }; const auto planeRotation = hlsl::math::quaternion::create(hlsl::float32_t3(1.f, 0.f, 0.f), core::radians(-90.0f)); - hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3,4,3,3>(hlsl::_static_cast(planeRotation)); + hlsl::float32_t3x4 planeTransform = hlsl::math::linalg::promote_affine<3, 4, 3, 3>(hlsl::_static_cast(planeRotation)); // triangles geometries auto geometryCreator = make_smart_refctd_ptr(); @@ -1146,7 +1161,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto blasCount = std::size(cpuObjects) + 1; const auto proceduralBlasIdx = std::size(cpuObjects); - std::array, std::size(cpuObjects)+1u> cpuBlasList; + std::array, std::size(cpuObjects) + 1u> cpuBlasList; for (uint32_t i = 0; i < blasCount; i++) { auto& blas = cpuBlasList[i]; @@ -1159,7 +1174,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto& aabb = aabbs->front(); auto& primCount = primitiveCounts->front(); - + primCount = NumberOfProceduralGeometries; aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); @@ -1264,7 +1279,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui for (uint32_t i = 0; i < cpuObjects.size(); i++) { tmpGeometries[i] = cpuObjects[i].data.get(); - tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + tmpGeometryPatches[i].indexBufferUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; } std::get>(inputs.assets) = tmpTlas; @@ -1275,7 +1290,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui auto reservation = converter->reserve(inputs); { - auto prepass = [&](const auto & references) -> bool + auto prepass = [&](const auto& references) -> bool { auto objects = reservation.getGPUObjects(); uint32_t counter = {}; @@ -1372,8 +1387,8 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui return false; } // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! - if (compute.getFutureScratchSemaphore().value>3) - m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); + if (compute.getFutureScratchSemaphore().value > 3) + m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!", system::ILogger::ELL_ERROR); // assign gpu objects to output auto&& tlases = reservation.getGPUObjects(); @@ -1395,9 +1410,9 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui const auto& normalView = gpuPolygon->getNormalView(); const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; - auto normalType = NT_R32G32B32_SFLOAT; - if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) - normalType = NT_R8G8B8A8_SNORM; + auto normalType = NT_R32G32B32_SFLOAT; + if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) + normalType = NT_R8G8B8A8_SNORM; const auto& indexBufferBinding = gpuTriangles.indexData; auto& geomInfo = geomInfos[i]; diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index 81724c1b8..0a00c52f2 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -40,6 +40,13 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp CooperativeBinarySearch(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } + bool onAppInitialized(smart_refctd_ptr&& system) override { // Remember to call the base class initialization! @@ -94,8 +101,19 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp params.layout = layout.get(); params.shader.shader = shader.get(); params.shader.entryPoint = "main"; + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) return logFail("Failed to create compute pipeline!\n"); + + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = m_device->getPipelineExecutableReport(m_pipeline.get(), true); + m_logger->log("Cooperative Binary Search Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + } } const size_t sizes[2] = {sizeof(TestCaseIndices),sizeof(uint32_t)*totalValues}; From 97a8695d021e817433f38abcf98fb0294fd7abd4 Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Sun, 22 Feb 2026 22:58:13 +0300 Subject: [PATCH 2/3] removed from 67, used system::to_string, reverted example 30's formatting --- 28_FFTBloom/main.cpp | 4 +- 29_Arithmetic2Bench/main.cpp | 2 +- 30_ComputeShaderPathTracer/main.cpp | 1959 +++++++++++++-------------- 64_EmulatedFloatTest/main.cpp | 6 +- 67_RayQueryGeometry/main.cpp | 12 - 70_FLIPFluids/main.cpp | 6 +- 71_RayTracingPipeline/main.cpp | 2 +- 72_CooperativeBinarySearch/main.cpp | 2 +- 8 files changed, 990 insertions(+), 1003 deletions(-) diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index 758bf9e50..f608755b1 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -753,7 +753,7 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour const char* kernelNames[] = {"Kernel First Axis FFT", "Kernel Second Axis FFT", "Kernel Spectrum Normalize"}; for (auto i = 0u; i < 3; i++) { - auto report = m_device->getPipelineExecutableReport(pipelines[i].get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(pipelines[i].get(), true)); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, kernelNames[i], report.c_str()); } } @@ -971,7 +971,7 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour const char* imageNames[] = {"Image First Axis FFT", "FFT Convolve IFFT", "Image First Axis IFFT"}; for (auto i = 0u; i < 3; i++) { - auto report = m_device->getPipelineExecutableReport(pipelines[i].get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(pipelines[i].get(), true)); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, imageNames[i], report.c_str()); } } diff --git a/29_Arithmetic2Bench/main.cpp b/29_Arithmetic2Bench/main.cpp index b37a01b7c..ec2cf3a1b 100644 --- a/29_Arithmetic2Bench/main.cpp +++ b/29_Arithmetic2Bench/main.cpp @@ -526,7 +526,7 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = m_device->getPipelineExecutableReport(pipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(pipeline.get(), true)); m_logger->log("Arithmetic Bench Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } return pipeline; diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index ba0035df1..a2cc4f655 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -32,290 +32,290 @@ struct PTPushConstant { // TODO: Do buffer creation using assConv class ComputeShaderPathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = SimpleWindowedApplication; - using asset_base_t = BuiltinResourcesApplication; - using clock_t = std::chrono::steady_clock; - - enum E_LIGHT_GEOMETRY : uint8_t - { - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE, - ELG_COUNT - }; - - constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; - constexpr static inline uint32_t MaxFramesInFlight = 5; - constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); - constexpr static inline uint32_t DefaultWorkGroupSize = 16u; - constexpr static inline uint32_t MaxDescriptorCount = 256u; - constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 - constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 - constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; - constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; - constexpr static inline uint8_t MaxUITextureCount = 1u; - static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; - static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::array PTShaderPaths = { "app_resources/litBySphere.comp", "app_resources/litByTriangle.comp", "app_resources/litByRectangle.comp" }; - static inline std::string PresentShaderPath = "app_resources/present.frag.hlsl"; - - const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { - "ELG_SPHERE", - "ELG_TRIANGLE", - "ELG_RECTANGLE" - }; - -public: - inline ComputeShaderPathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - inline bool isComputeOnly() const override { return false; } - - virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override - { - auto retval = device_base_t::getPreferredDeviceFeatures(); - retval.pipelineExecutableInfo = true; - return retval; - } - - inline core::vector getSurfaces() const override - { - if (!m_surface) + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WindowDimensions.x; - params.height = WindowDimensions.y; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "ComputeShaderPathtracer"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); - } + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + constexpr static inline uint32_t DefaultWorkGroupSize = 16u; + constexpr static inline uint32_t MaxDescriptorCount = 256u; + constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 + constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 + constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; + constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; + constexpr static inline uint8_t MaxUITextureCount = 1u; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::array PTShaderPaths = { "app_resources/litBySphere.comp", "app_resources/litByTriangle.comp", "app_resources/litByRectangle.comp" }; + static inline std::string PresentShaderPath = "app_resources/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; + public: + inline ComputeShaderPathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - return {}; - } + inline bool isComputeOnly() const override { return false; } - inline bool onAppInitialized(smart_refctd_ptr&& system) override - { - // Init systems + virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.pipelineExecutableInfo = true; + return retval; + } - // Remember to call the base class initialization! - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!asset_base_t::onAppInitialized(std::move(system))) - return false; + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } - m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; - if (!m_semaphore) - return logFail("Failed to create semaphore!"); + return {}; } - // Create renderpass and init surface - nbl::video::IGPURenderpass* renderpass; + inline bool onAppInitialized(smart_refctd_ptr&& system) override { - ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); + // Init systems + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + if (!m_semaphore) + return logFail("Failed to create semaphore!"); + } - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - renderpass = scResources->getRenderpass(); + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); - if (!renderpass) - return logFail("Failed to create Renderpass!"); + if (!renderpass) + return logFail("Failed to create Renderpass!"); - auto gQueue = getGraphicsQueue(); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - } + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } - // image upload utils - { - m_scratchSemaphore = m_device->createSemaphore(0); - if (!m_scratchSemaphore) - return logFail("Could not create Scratch Semaphore"); - m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); - // we don't want to overcomplicate the example with multi-queue - m_intendedSubmit.queue = getGraphicsQueue(); - // wait for nothing before upload - m_intendedSubmit.waitSemaphores = {}; - m_intendedSubmit.waitSemaphores = {}; - // fill later - m_intendedSubmit.scratchCommandBuffers = {}; - m_intendedSubmit.scratchSemaphore = { - .semaphore = m_scratchSemaphore.get(), - .value = 0, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - } + // image upload utils + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + // we don't want to overcomplicate the example with multi-queue + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.waitSemaphores = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } - // Create command pool and buffers - { - auto gQueue = getGraphicsQueue(); - m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!m_cmdPool) - return logFail("Couldn't create Command Pool!"); + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) - return logFail("Couldn't create Command Buffer!"); - } + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } - ISampler::SParams samplerParams = { - .AnisotropicFilter = 0 - }; - auto defaultSampler = m_device->createSampler(samplerParams); + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); - // Create descriptors and pipeline for the pathtracer - { - auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuLayout.get(),1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuLayout = reservation.getGPUObjects().front().value; - if (!gpuLayout) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); - std::exit(-1); - } + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } - return gpuLayout; - }; - auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { - auto converter = CAssetConverter::create({ .device = m_device.get() }); - CAssetConverter::SInputs inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = { &cpuDS.get(), 1 }; - // don't need to assert that we don't need to provide patches since layouts are not patchable - //assert(true); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuDS = reservation.getGPUObjects().front().value; - if (!gpuDS) { - m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); - std::exit(-1); - } + return gpuLayout; + }; + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } - return gpuDS; - }; + return gpuDS; + }; - std::array descriptorSet0Bindings = {}; - std::array descriptorSet3Bindings = {}; - std::array presentDescriptorSetBindings; - - descriptorSet0Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[1] = { - .binding = 1u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - descriptorSet3Bindings[2] = { - .binding = 2u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1u, - .immutableSamplers = nullptr - }; - presentDescriptorSetBindings[0] = { - .binding = 0u, - .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - .immutableSamplers = &defaultSampler - }; + std::array descriptorSet0Bindings = {}; + std::array descriptorSet3Bindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSet0Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[2] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; - auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); - auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); - auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); - auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); - auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); - auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); - auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); + auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); - m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); - m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); - smart_refctd_ptr presentDSPool; - { - const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; - const uint32_t setCounts[] = { 1u }; - presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - } - m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - // Create Shaders - auto loadAndCompileShader = [&](std::string pathToShader) + // Create Shaders + auto loadAndCompileShader = [&](std::string pathToShader) { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -341,256 +341,255 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return shader; }; - // Create compute pipelines - { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { - auto ptShader = loadAndCompileShader(PTShaderPaths[index]); - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(PTPushConstant) - }; - auto ptPipelineLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout0), - nullptr, - core::smart_refctd_ptr(gpuDescriptorSetLayout2), - nullptr - ); - if (!ptPipelineLayout) { - return logFail("Failed to create Pathtracing pipeline layout"); - } - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.cached.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + auto ptShader = loadAndCompileShader(PTShaderPaths[index]); + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(PTPushConstant) + }; + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!ptPipelineLayout) { + return logFail("Failed to create Pathtracing pipeline layout"); + } - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; - } + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; + params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; + } - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { - return logFail("Failed to create compute pipeline!\n"); - } + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { + return logFail("Failed to create compute pipeline!\n"); + } - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = m_device->getPipelineExecutableReport(m_PTPipelines[index].get(), true); - m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); + if (m_device->getEnabledFeatures().pipelineExecutableInfo) + { + auto report = system::to_string(m_device->getPipelineExecutableProperties(m_PTPipelines[index].get(), true)); + m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); + } } } - } - // Create graphics pipeline - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); - if (!fsTriProtoPPln) - return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - - // Load Fragment Shader - auto fragmentShader = loadAndCompileShader(PresentShaderPath); - if (!fragmentShader) - return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - - const IGPUPipelineBase::SShaderSpecInfo fragSpec = { - .shader = fragmentShader.get(), - .entryPoint = "main", - }; + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main", + }; - auto presentLayout = m_device->createPipelineLayout( - {}, - core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); - if (!m_presentPipeline) - return logFail("Could not create Graphics Pipeline!"); + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } } - } - - // load CPUImages and convert to GPUImages - smart_refctd_ptr envMap, scrambleMap; - { - auto convertImgCPU2GPU = [&](std::span cpuImgs) { - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - std::array commandBufferInfo = { cmdbuf }; - core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); - imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); - auto converter = CAssetConverter::create({ .device = m_device.get() }); - // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. - struct SInputs final : CAssetConverter::SInputs - { - // we also need to override this to have concurrent sharing - inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs { - if (familyIndices.size() > 1) - return familyIndices; - return {}; - } + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); { - return image->getCreationParameters().mipLevels; + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } } - std::vector familyIndices; - } inputs = {}; - inputs.readCache = converter.get(); - inputs.logger = m_logger.get(); - { - const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; - inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; - } - // scratch command buffers for asset converter transfer commands - SIntendedSubmitInfo transfer = { - .queue = queue, - .waitSemaphores = {}, - .prevCommandBuffers = {}, - .scratchCommandBuffers = commandBufferInfo, - .scratchSemaphore = { - .semaphore = imgFillSemaphore.get(), - .value = 0, - // because of layout transitions - .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS - } - }; - // as per the `SIntendedSubmitInfo` one commandbuffer must be begun - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the - // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing - CAssetConverter::SConvertParams params = {}; - params.transfer = &transfer; - params.utilities = m_utils.get(); - - std::get>(inputs.assets) = cpuImgs; - // assert that we don't need to provide patches - assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); - auto reservation = converter->reserve(inputs); - // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable - auto gpuImgs = reservation.getGPUObjects(); - for (auto& gpuImg : gpuImgs) { - if (!gpuImg) { - m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); std::exit(-1); } - } - // and launch the conversions - m_api->startCapture(); - auto result = reservation.convert(params); - m_api->endCapture(); - if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { - m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); - std::exit(-1); - } - - envMap = gpuImgs[0].value; - scrambleMap = gpuImgs[1].value; + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; }; - smart_refctd_ptr envMapCPU, scrambleMapCPU; - { - IAssetLoader::SAssetLoadParams lp; - lp.workingDirectory = this->sharedInputCWD; - SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); - if (bundle.getContents().empty()) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); - std::exit(-1); - } + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } - envMapCPU = IAsset::castDown(bundle.getContents()[0]); - if (!envMapCPU) { - m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); - std::exit(-1); + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } } - }; - { - asset::ICPUImage::SCreationParams info; - info.format = asset::E_FORMAT::EF_R32G32_UINT; - info.type = asset::ICPUImage::ET_2D; - auto extent = envMapCPU->getCreationParameters().extent; - info.extent.width = extent.width; - info.extent.height = extent.height; - info.extent.depth = 1u; - info.mipLevels = 1u; - info.arrayLayers = 1u; - info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; - info.flags = static_cast(0u); - info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; - - scrambleMapCPU = ICPUImage::create(std::move(info)); - const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); - const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); - auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); - - core::RandomSampler rng(0xbadc0ffeu); - auto out = reinterpret_cast(texelBuffer->getPointer()); - for (auto index = 0u; index < texelBufferSize / 4; index++) { - out[index] = rng.nextSample(); + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + + // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) + scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); } - auto regions = core::make_refctd_dynamic_array>(1u); - ICPUImage::SBufferCopy& region = regions->front(); - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.bufferOffset = 0u; - region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); - region.bufferImageHeight = 0u; - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent = scrambleMapCPU->getCreationParameters().extent; - - scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); - - // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) - scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; + convertImgCPU2GPU(cpuImgs); } - std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get() }; - convertImgCPU2GPU(cpuImgs); - } - - // create views for textures - { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { - IGPUImage::SCreationParams imgInfo; - imgInfo.format = colorFormat; - imgInfo.type = IGPUImage::ET_2D; - imgInfo.extent.width = width; - imgInfo.extent.height = height; - imgInfo.extent.depth = 1u; - imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; - imgInfo.samples = IGPUImage::ESCF_1_BIT; - imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; - - auto image = m_device->createImage(std::move(imgInfo)); - auto imageMemReqs = image->getMemoryReqs(); - imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - m_device->allocate(imageMemReqs, image.get()); - - return image; + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; @@ -607,28 +606,28 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return m_device->createImageView(std::move(imgViewInfo)); }; - auto params = envMap->getCreationParameters(); - auto extent = params.extent; - envMap->setObjectDebugName("Env Map"); - m_envMapView = createHDRIImageView(envMap); - m_envMapView->setObjectDebugName("Env Map View"); - scrambleMap->setObjectDebugName("Scramble Map"); - m_scrambleView = createHDRIImageView(scrambleMap); - m_scrambleView->setObjectDebugName("Scramble Map View"); - auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); - outImg->setObjectDebugName("Output Image"); - m_outImgView = createHDRIImageView(outImg); - m_outImgView->setObjectDebugName("Output Image View"); - } + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg); + m_outImgView->setObjectDebugName("Output Image View"); + } - // create sequence buffer view - { - // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` - auto createBufferFromCacheFile = [this]( - system::path filename, - size_t bufferSize, - void* data, - smart_refctd_ptr& buffer + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void *data, + smart_refctd_ptr& buffer ) -> std::pair, bool> { ISystem::future_t> owenSamplerFileFuture; @@ -661,7 +660,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return { owenSamplerFile, true }; }; - auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) { ISystem::future_t owenSamplerFileWriteFuture; size_t owenSamplerFileBytesWritten; @@ -671,196 +670,196 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); }; - constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; - std::array data = {}; - smart_refctd_ptr sampleSeq; + constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; + std::array data = {}; + smart_refctd_ptr sampleSeq; - auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD / OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) - { - core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); - ICPUBuffer::SCreationParams params = {}; - params.size = MaxBufferDimensions * MaxBufferSamples * sizeof(uint32_t); - sampleSeq = ICPUBuffer::create(std::move(params)); + ICPUBuffer::SCreationParams params = {}; + params.size = MaxBufferDimensions*MaxBufferSamples*sizeof(uint32_t); + sampleSeq = ICPUBuffer::create(std::move(params)); - auto out = reinterpret_cast(sampleSeq->getPointer()); - for (auto dim = 0u; dim < MaxBufferDimensions; dim++) - for (uint32_t i = 0; i < MaxBufferSamples; i++) - { - out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); - } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); - } + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxBufferSamples; i++) + { + out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } - IGPUBuffer::SCreationParams params = {}; - params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; - params.size = sampleSeq->getSize(); + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; + params.size = sampleSeq->getSize(); - // we don't want to overcomplicate the example with multi-queue - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[0].get(); - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; - m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + // we don't want to overcomplicate the example with multi-queue + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - m_api->startCapture(); - auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( - m_intendedSubmit, - std::move(params), - sampleSeq->getPointer() - ); - m_api->endCapture(); - bufferFuture.wait(); - auto buffer = bufferFuture.get(); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_api->startCapture(); + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, + std::move(params), + sampleSeq->getPointer() + ); + m_api->endCapture(); + bufferFuture.wait(); + auto buffer = bufferFuture.get(); - m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); - m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); - } + m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); + m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); + } - // Update Descriptors - { - ISampler::SParams samplerParams0 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_FLOAT_OPAQUE_BLACK, - ISampler::ETF_LINEAR, - ISampler::ETF_LINEAR, - ISampler::ESMM_LINEAR, - 0u, - false, - ECO_ALWAYS - }; - auto sampler0 = m_device->createSampler(samplerParams0); - ISampler::SParams samplerParams1 = { - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, - ISampler::ETBC_INT_OPAQUE_BLACK, - ISampler::ETF_NEAREST, - ISampler::ETF_NEAREST, - ISampler::ESMM_NEAREST, - 0u, - false, - ECO_ALWAYS - }; - auto sampler1 = m_device->createSampler(samplerParams1); - - std::array writeDSInfos = {}; - writeDSInfos[0].desc = m_outImgView; - writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; - // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - std::array writeDescriptorSets = {}; - writeDescriptorSets[0] = { - .dstSet = m_descriptorSet0.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[0] - }; - writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[1] - }; - writeDescriptorSets[2] = { - .dstSet = m_descriptorSet2.get(), - .binding = 1, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[2] - }; - writeDescriptorSets[3] = { - .dstSet = m_descriptorSet2.get(), - .binding = 2, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[3] - }; - writeDescriptorSets[4] = { - .dstSet = m_presentDescriptorSet.get(), - .binding = 0, - .arrayElement = 0u, - .count = 1u, - .info = &writeDSInfos[4] - }; + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[2].desc = m_sequenceBufferView; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet0.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet2.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet2.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; - m_device->updateDescriptorSets(writeDescriptorSets, {}); - } + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } - // Create ui descriptors - { - using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + // Create ui descriptors { - IGPUSampler::SParams params; - params.AnisotropicFilter = 1u; - params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; - - m_ui.samplers.gui = m_device->createSampler(params); - m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); - } + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } - std::array, 69u> immutableSamplers; - for (auto& it : immutableSamplers) - it = smart_refctd_ptr(m_ui.samplers.scene); + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); - immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); - nbl::ext::imgui::UI::SCreationParameters params; + nbl::ext::imgui::UI::SCreationParameters params; - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; - params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; - params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); - params.renderpass = smart_refctd_ptr(renderpass); - params.streamingBuffer = nullptr; - params.subpassIx = 0u; - params.transfer = getTransferUpQueue(); - params.utilities = m_utils; - { - m_ui.manager = ext::imgui::UI::create(std::move(params)); + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); - // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - const auto& params = m_ui.manager->getCreationParameters(); + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); - IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; - descriptorPoolInfo.maxSets = 1u; - descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; - m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); - assert(m_guiDescriptorSetPool); + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); - m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); - assert(m_ui.descriptorSet); + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } } - } - m_ui.manager->registerListener( - [this]() -> void { - ImGuiIO& io = ImGui::GetIO(); + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); - m_camera.setProjectionMatrix([&]() + m_camera.setProjectionMatrix([&]() { static hlsl::float32_t4x4 projection; @@ -869,308 +868,308 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return projection; }()); - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); - ImGui::SameLine(); + ImGui::SameLine(); - ImGui::Text("Camera"); + ImGui::Text("Camera"); - ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); - ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); - ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - - ImGui::End(); - } - ); + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - // Set Camera - { - core::vectorSIMDf cameraPosition(0, 5, -10); - hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( - core::radians(60.0f), - float(WindowDimensions.x / WindowDimensions.y), - 0.01f, - 500.0f + ImGui::End(); + } ); - m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - } - - m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - m_oracle.reportBeginFrameRecord(); - m_camera.mapKeysToWASD(); - - return true; - } - bool updateGUIDescriptorSet() - { - // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout - static std::array descriptorInfo; - static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + hlsl::float32_t4x4 proj = hlsl::math::thin_lens::rhPerspectiveFovMatrix( + core::radians(60.0f), + float(WindowDimensions.x / WindowDimensions.y), + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); - for (uint32_t i = 0; i < descriptorInfo.size(); ++i) - { - writes[i].dstSet = m_ui.descriptorSet.get(); - writes[i].binding = 0u; - writes[i].arrayElement = i; - writes[i].count = 1u; - } - writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; - - return m_device->updateDescriptorSets(writes, {}); - } - - inline void workLoopBody() override - { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = - { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; + return true; } - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_api->startCapture(); + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; - // CPU events - update(); + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); - auto queue = getGraphicsQueue(); - auto cmdbuf = m_cmdBufs[resourceIx].get(); + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; - if (!keepRunning()) - return; + return m_device->updateDescriptorSets(writes, {}); + } - // render whole scene to offline frame buffer & submit + inline void workLoopBody() override { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; - pc.invMVP = hlsl::inverse(viewProjectionMatrix); - pc.sampleCount = spp; - pc.depth = depth; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + const ISemaphore::SWaitInfo cbDonePending[] = + { { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight } }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - // cube envmap handle - { - auto pipeline = m_PTPipelines[PTPipline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - cmdbuf->dispatch(1 + (WindowDimensions.x - 1) / DefaultWorkGroupSize, 1 + (WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u); - } + m_api->startCapture(); + + // CPU events + update(); - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + // render whole scene to offline frame buffer & submit { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + PTPushConstant pc; + pc.invMVP = hlsl::inverse(viewProjectionMatrix); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - // TODO: tone mapping and stuff - } + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WindowDimensions.x; - viewport.height = WindowDimensions.y; - } - cmdbuf->setViewport(0u, 1u, &viewport); + // cube envmap handle + { + auto pipeline = m_PTPipelines[PTPipline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); + cmdbuf->dispatch(1 + (WindowDimensions.x - 1) / DefaultWorkGroupSize, 1 + (WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + // TODO: tone mapping and stuff + } - VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; - cmdbuf->setScissor(defaultScisors); + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; - auto scRes = static_cast(m_surface->getSwapchainResources()); - // Upload m_outImg to swapchain + UI - { - const IGPUCommandBuffer::SRenderpassBeginInfo info = + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clearColor, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + auto scRes = static_cast(m_surface->getSwapchainResources()); - cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - const auto uiParams = m_ui.manager->getCreationParameters(); - auto* uiPipeline = m_ui.manager->getPipeline(); - cmdbuf->bindGraphicsPipeline(uiPipeline); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); - m_ui.manager->render(cmdbuf, waitInfo); + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); - cmdbuf->endRenderPass(); - } + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); - cmdbuf->end(); - { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + cmdbuf->endRenderPass(); + } + + cmdbuf->end(); { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT - } - }; - { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = { - {.cmdbuf = cmdbuf } - }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; - updateGUIDescriptorSet(); + updateGUIDescriptorSet(); - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } } - } - m_window->setCaption("[Nabla Engine] Computer Path Tracer"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); + m_window->setCaption("[Nabla Engine] Computer Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + m_api->endCapture(); } - m_api->endCapture(); - } - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; - return true; - } + return true; + } - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } - inline void update() - { - m_camera.setMoveSpeed(moveSpeed); - m_camera.setRotateSpeed(rotateSpeed); + inline void update() + { + m_camera.setMoveSpeed(moveSpeed); + m_camera.setRotateSpeed(rotateSpeed); - static std::chrono::microseconds previousEventTimestamp{}; + static std::chrono::microseconds previousEventTimestamp{}; - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); - auto updatePresentationTimestamp = [&]() + auto updatePresentationTimestamp = [&]() { m_currentImageAcquire = m_surface->acquireNextImage(); @@ -1181,17 +1180,17 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B return timestamp; }; - const auto nextPresentationTimestamp = updatePresentationTimestamp(); + const auto nextPresentationTimestamp = updatePresentationTimestamp(); - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; - m_camera.beginInputProcessing(nextPresentationTimestamp); - { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl @@ -1208,7 +1207,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B } }, m_logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl @@ -1221,85 +1220,85 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B capturedEvents.keyboard.emplace_back(e); } }, m_logger.get()); - } - m_camera.endInputProcessing(nextPresentationTimestamp); + } + m_camera.endInputProcessing(nextPresentationTimestamp); - const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); - const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); - const auto cursorPosition = m_window->getCursorControl()->getPosition(); - const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); - const ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = mousePosition, - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = mouseEvents, - .keyboardEvents = keyboardEvents - }; - - m_ui.manager->update(params); - } + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; -private: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; + m_ui.manager->update(params); + } - // gpu resources - smart_refctd_ptr m_cmdPool; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTPipelines; - smart_refctd_ptr m_presentPipeline; - uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; - core::smart_refctd_ptr m_guiDescriptorSetPool; + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTPipelines; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; - // system resources - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; + core::smart_refctd_ptr m_guiDescriptorSetPool; - // pathtracer resources - smart_refctd_ptr m_envMapView, m_scrambleView; - smart_refctd_ptr m_sequenceBufferView; - smart_refctd_ptr m_outImgView; + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; - // sync - smart_refctd_ptr m_semaphore; + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBufferView; + smart_refctd_ptr m_outImgView; - // image upload resources - smart_refctd_ptr m_scratchSemaphore; - SIntendedSubmitInfo m_intendedSubmit; + // sync + smart_refctd_ptr m_semaphore; - struct C_UI - { - nbl::core::smart_refctd_ptr manager; + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; - struct + struct C_UI { - core::smart_refctd_ptr gui, scene; - } samplers; + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; - core::smart_refctd_ptr descriptorSet; - } m_ui; + core::smart_refctd_ptr descriptorSet; + } m_ui; - Camera m_camera; + Camera m_camera; - video::CDumbPresentationOracle m_oracle; + video::CDumbPresentationOracle m_oracle; - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed - float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; - int PTPipline = E_LIGHT_GEOMETRY::ELG_SPHERE; - int spp = 32; - int depth = 3; + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int spp = 32; + int depth = 3; - bool m_firstFrame = true; - IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; }; -NBL_MAIN_FUNC(ComputeShaderPathtracer) +NBL_MAIN_FUNC(ComputeShaderPathtracer) \ No newline at end of file diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index 405ac150b..ff8f7a90f 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -334,7 +334,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = base.m_device->getPipelineExecutableReport(m_pipeline.get(), true); + auto report = system::to_string(base.m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); base.m_logger->log("EF64Submitter Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } @@ -1003,8 +1003,8 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = base.m_device->getPipelineExecutableReport(m_pipeline.get(), true); - base.m_logger->log("EF64Benchmark Pipeline Executable Report:\n%s", ILogger::ELL_INFO, report.c_str()); + auto report = system::to_string(base.m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); + base.m_logger->log("EF64Benchmark Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index f1fbd73cd..464583352 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -32,7 +32,6 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built { auto retval = device_base_t::getPreferredDeviceFeatures(); retval.accelerationStructureHostCommands = true; - retval.pipelineExecutableInfo = true; return retval; } @@ -176,19 +175,8 @@ class RayQueryGeometryApp final : public SimpleWindowedApplication, public Built params.layout = pipelineLayout.get(); params.shader.shader = shader.get(); params.shader.entryPoint = "main"; - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; - } if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &renderPipeline)) return logFail("Failed to create compute pipeline"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = m_device->getPipelineExecutableReport(renderPipeline.get(), true); - m_logger->log("Ray Query Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); - } } // write descriptors diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 32f7a64a9..8cc74693e 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -390,7 +390,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo && pipeline) { - auto report = m_device->getPipelineExecutableReport(pipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(pipeline.get(), true)); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, ShaderKey.value, report.c_str()); } }; @@ -654,7 +654,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = m_device->getPipelineExecutableReport(m_iterateDiffusionPipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(m_iterateDiffusionPipeline.get(), true)); m_logger->log("iterateDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } @@ -673,7 +673,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = m_device->getPipelineExecutableReport(m_diffusionPipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(m_diffusionPipeline.get(), true)); m_logger->log("applyDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index c38b83470..5f4b4aa47 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -337,7 +337,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = m_device->getPipelineExecutableReport(m_rayTracingPipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(m_rayTracingPipeline.get(), true)); m_logger->log("Ray Tracing Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index 0a00c52f2..1719b9562 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -111,7 +111,7 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = m_device->getPipelineExecutableReport(m_pipeline.get(), true); + auto report = system::to_string(m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); m_logger->log("Cooperative Binary Search Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } From 0e20b7d86ca3ebad9d49ceefafdc58665a34133b Mon Sep 17 00:00:00 2001 From: Karim Mohamed Date: Wed, 25 Feb 2026 06:09:40 +0300 Subject: [PATCH 3/3] update examples to use `pipeline->getExecutableInfo()` instead of bad API --- 28_FFTBloom/main.cpp | 4 ++-- 29_Arithmetic2Bench/main.cpp | 2 +- 30_ComputeShaderPathTracer/main.cpp | 2 +- 64_EmulatedFloatTest/main.cpp | 4 ++-- 70_FLIPFluids/main.cpp | 6 +++--- 71_RayTracingPipeline/main.cpp | 2 +- 72_CooperativeBinarySearch/main.cpp | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index f608755b1..e8ec015e3 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -753,7 +753,7 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour const char* kernelNames[] = {"Kernel First Axis FFT", "Kernel Second Axis FFT", "Kernel Spectrum Normalize"}; for (auto i = 0u; i < 3; i++) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(pipelines[i].get(), true)); + auto report = system::to_string(pipelines[i]->getExecutableInfo()); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, kernelNames[i], report.c_str()); } } @@ -971,7 +971,7 @@ class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResour const char* imageNames[] = {"Image First Axis FFT", "FFT Convolve IFFT", "Image First Axis IFFT"}; for (auto i = 0u; i < 3; i++) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(pipelines[i].get(), true)); + auto report = system::to_string(pipelines[i]->getExecutableInfo()); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, imageNames[i], report.c_str()); } } diff --git a/29_Arithmetic2Bench/main.cpp b/29_Arithmetic2Bench/main.cpp index ec2cf3a1b..889401d3d 100644 --- a/29_Arithmetic2Bench/main.cpp +++ b/29_Arithmetic2Bench/main.cpp @@ -526,7 +526,7 @@ class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, pub if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(pipeline.get(), true)); + auto report = system::to_string(pipeline->getExecutableInfo()); m_logger->log("Arithmetic Bench Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } return pipeline; diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index a2cc4f655..1ba8c53ef 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -380,7 +380,7 @@ class ComputeShaderPathtracer final : public SimpleWindowedApplication, public B if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(m_PTPipelines[index].get(), true)); + auto report = system::to_string(m_PTPipelines[index]->getExecutableInfo()); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, PTShaderPaths[index].c_str(), report.c_str()); } } diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index ff8f7a90f..7919f68c5 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -334,7 +334,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(base.m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); + auto report = system::to_string(m_pipeline->getExecutableInfo()); base.m_logger->log("EF64Submitter Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } @@ -1003,7 +1003,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public BuiltinReso if (base.m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(base.m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); + auto report = system::to_string(m_pipeline->getExecutableInfo()); base.m_logger->log("EF64Benchmark Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 8cc74693e..c702d512d 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -390,7 +390,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo && pipeline) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(pipeline.get(), true)); + auto report = system::to_string(pipeline->getExecutableInfo()); m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, ShaderKey.value, report.c_str()); } }; @@ -654,7 +654,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(m_iterateDiffusionPipeline.get(), true)); + auto report = system::to_string(m_iterateDiffusionPipeline->getExecutableInfo()); m_logger->log("iterateDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } @@ -673,7 +673,7 @@ class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinReso if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(m_diffusionPipeline.get(), true)); + auto report = system::to_string(m_diffusionPipeline->getExecutableInfo()); m_logger->log("applyDiffusion Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } } diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp index 5f4b4aa47..d56a953b8 100644 --- a/71_RayTracingPipeline/main.cpp +++ b/71_RayTracingPipeline/main.cpp @@ -337,7 +337,7 @@ class RaytracingPipelineApp final : public SimpleWindowedApplication, public Bui if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(m_rayTracingPipeline.get(), true)); + auto report = system::to_string(m_rayTracingPipeline->getExecutableInfo()); m_logger->log("Ray Tracing Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; diff --git a/72_CooperativeBinarySearch/main.cpp b/72_CooperativeBinarySearch/main.cpp index 1719b9562..aef50f68c 100644 --- a/72_CooperativeBinarySearch/main.cpp +++ b/72_CooperativeBinarySearch/main.cpp @@ -111,7 +111,7 @@ class CooperativeBinarySearch final : public application_templates::MonoDeviceAp if (m_device->getEnabledFeatures().pipelineExecutableInfo) { - auto report = system::to_string(m_device->getPipelineExecutableProperties(m_pipeline.get(), true)); + auto report = system::to_string(m_pipeline->getExecutableInfo()); m_logger->log("Cooperative Binary Search Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); } }