From 7b92a245bd4e0be8e2f0673205f4ff268e54baee Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 12 Jan 2026 20:32:41 +0700 Subject: [PATCH 1/9] Initial Envmap importance sampling demo --- 74_EnvmapImportanceSampling/CMakeLists.txt | 8 ++ 74_EnvmapImportanceSampling/main.cpp | 102 +++++++++++++++++++++ CMakeLists.txt | 1 + 3 files changed, 111 insertions(+) create mode 100644 74_EnvmapImportanceSampling/CMakeLists.txt create mode 100644 74_EnvmapImportanceSampling/main.cpp diff --git a/74_EnvmapImportanceSampling/CMakeLists.txt b/74_EnvmapImportanceSampling/CMakeLists.txt new file mode 100644 index 000000000..a3b1ea1f7 --- /dev/null +++ b/74_EnvmapImportanceSampling/CMakeLists.txt @@ -0,0 +1,8 @@ +set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) +set(NBL_LIBRARIES) + +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + +target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::EnvmapImportanceSampling) diff --git a/74_EnvmapImportanceSampling/main.cpp b/74_EnvmapImportanceSampling/main.cpp new file mode 100644 index 000000000..5c0cbaff2 --- /dev/null +++ b/74_EnvmapImportanceSampling/main.cpp @@ -0,0 +1,102 @@ +// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/application_templates/MonoSystemMonoLoggerApplication.hpp" + +using namespace nbl; +using namespace core; +using namespace asset; +using namespace system; + +// instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platofmrs +class EnvmapImportanceSamplingApp final : public nbl::application_templates::MonoSystemMonoLoggerApplication +{ + using base_t = application_templates::MonoSystemMonoLoggerApplication; +public: + using base_t::base_t; + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!base_t::onAppInitialized(std::move(system))) + return false; + + constexpr std::string_view defaultImagePath = "../../media/envmap/envmap_0.exr"; + + const auto targetFilePath = [&]() -> std::string_view + { + const auto argc = argv.size(); + const bool isDefaultImageRequested = argc == 1; + + if (isDefaultImageRequested) + { + m_logger->log("No image specified, loading default \"%s\" OpenEXR image from media directory!", ILogger::ELL_INFO, defaultImagePath.data()); + return defaultImagePath; + } + else if (argc == 2) + { + const std::string_view target(argv[1]); + m_logger->log("Requested \"%s\"", ILogger::ELL_INFO, target.data()); + return { target }; + } + else + { + m_logger->log("To many arguments! Pass a single filename to an OpenEXR image w.r.t CWD.", ILogger::ELL_ERROR); + return {}; + } + }(); + + if (targetFilePath.empty()) + return false; + + auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + + nbl::asset::IAssetLoader::SAssetLoadParams lp; + const asset::COpenEXRMetadata* meta; + + auto image_bundle = assetManager->getAsset(targetFilePath.data(), lp); + auto contents = image_bundle.getContents(); + { + if (contents.empty()) + { + m_logger->log("Could not load \"%s\"", ILogger::ELL_ERROR, targetFilePath.data()); + return false; + } + + meta = image_bundle.getMetadata()->selfCast(); + + if (!meta) + { + m_logger->log("Could not selfCast \"%s\" asset's metadata to COpenEXRMetadata, the tool expects valid OpenEXR input image, terminating!", ILogger::ELL_ERROR, targetFilePath.data()); + return false; + } + } + + uint32_t i = 0u; + for (auto asset : contents) + { + auto image = IAsset::castDown(asset); + const auto* metadata = static_cast(meta->getAssetSpecificMetadata(image.get())); + + ICPUImageView::SCreationParams imgViewParams; + imgViewParams.flags = static_cast(0u); + imgViewParams.image = std::move(image); + imgViewParams.format = imgViewParams.image->getCreationParameters().format; + imgViewParams.viewType = ICPUImageView::ET_2D; + imgViewParams.subresourceRange = { static_cast(0u),0u,1u,0u,1u }; + + auto imageView = ICPUImageView::create(std::move(imgViewParams)); + auto channelsName = metadata->m_name; + + } + + return true; + } + + void workLoopBody() override {} + + bool keepRunning() override { return false; } + +}; + +NBL_MAIN_FUNC(EnvmapImportanceSamplingApp) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8581a472..35e1da598 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,7 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) + add_subdirectory(74_EnvmapImportanceSampling) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From e51759ca86ec2df6cd13f9991e5321bff5e8d052 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 14 Jan 2026 15:09:26 +0700 Subject: [PATCH 2/9] Dummy --- 74_EnvmapImportanceSampling/main.cpp | 176 +++++++++++++++++++++++---- 1 file changed, 155 insertions(+), 21 deletions(-) diff --git a/74_EnvmapImportanceSampling/main.cpp b/74_EnvmapImportanceSampling/main.cpp index 5c0cbaff2..7c723d96f 100644 --- a/74_EnvmapImportanceSampling/main.cpp +++ b/74_EnvmapImportanceSampling/main.cpp @@ -2,25 +2,50 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/application_templates/MonoSystemMonoLoggerApplication.hpp" +#include "nbl/examples/examples.hpp" +#include "nbl/video/utilities/CAssetConverter.h" using namespace nbl; using namespace core; -using namespace asset; +using namespace hlsl; using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; // instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platofmrs -class EnvmapImportanceSamplingApp final : public nbl::application_templates::MonoSystemMonoLoggerApplication +class EnvmapImportanceSamplingApp final : public MonoWindowApplication, public BuiltinResourcesApplication { - using base_t = application_templates::MonoSystemMonoLoggerApplication; + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; public: - using base_t::base_t; + + inline EnvmapImportanceSamplingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} bool onAppInitialized(smart_refctd_ptr&& system) override { - if (!base_t::onAppInitialized(std::move(system))) + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; icreateCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1})) + return logFail("Couldn't create Command Buffer!"); + } + constexpr std::string_view defaultImagePath = "../../media/envmap/envmap_0.exr"; const auto targetFilePath = [&]() -> std::string_view @@ -72,31 +97,140 @@ class EnvmapImportanceSamplingApp final : public nbl::application_templates::Mon } } - uint32_t i = 0u; - for (auto asset : contents) - { - auto image = IAsset::castDown(asset); - const auto* metadata = static_cast(meta->getAssetSpecificMetadata(image.get())); + auto asset = contents[0]; + auto image = IAsset::castDown(asset); + const auto* metadata = static_cast(meta->getAssetSpecificMetadata(image.get())); - ICPUImageView::SCreationParams imgViewParams; - imgViewParams.flags = static_cast(0u); - imgViewParams.image = std::move(image); - imgViewParams.format = imgViewParams.image->getCreationParameters().format; - imgViewParams.viewType = ICPUImageView::ET_2D; - imgViewParams.subresourceRange = { static_cast(0u),0u,1u,0u,1u }; + ICPUImageView::SCreationParams imgViewParams; + imgViewParams.flags = static_cast(0u); + imgViewParams.image = std::move(image); + imgViewParams.format = imgViewParams.image->getCreationParameters().format; + imgViewParams.viewType = ICPUImageView::ET_2D; + imgViewParams.subresourceRange = { static_cast(0u),0u,1u,0u,1u }; - auto imageView = ICPUImageView::create(std::move(imgViewParams)); - auto channelsName = metadata->m_name; + auto imageView = ICPUImageView::create(std::move(imgViewParams)); + auto channelsName = metadata->m_name; + + auto converter = CAssetConverter::create( { .device=m_device.get() }); + + { + // Test the provision of a custom patch this time + CAssetConverter::patch_t patch(imageView.get(),IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); + + // We don't want to generate mip-maps for these images (YET), to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + } inputs = {}; + std::get>(inputs.assets) = { &imageView.get(),1 }; + std::get>(inputs.patches) = { &patch,1 }; + inputs.logger = m_logger.get(); + + // + auto reservation = converter->reserve(inputs); + + // get the created image view + auto gpuView = reservation.getGPUObjects().front().value; + if (!gpuView) + return false; + gpuView->getCreationParameters().image->setObjectDebugName("envmap"); } return true; } - void workLoopBody() override {} + protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { - bool keepRunning() override { return false; } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cb->end(); + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array,MaxFramesInFlight> m_cmdBufs; }; NBL_MAIN_FUNC(EnvmapImportanceSamplingApp) From 53b9190bf28f08dcde1d01726730fbd4c91ce13a Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 21 Feb 2026 16:00:09 +0700 Subject: [PATCH 3/9] Implement example 74 for unit testing environment map sampling --- 74_EnvmapImportanceSampling/CMakeLists.txt | 74 ++- .../app_resources/common.hlsl | 33 + .../app_resources/present.frag.hlsl | 24 + .../app_resources/test.comp.hlsl | 108 ++++ .../config.json.template | 28 + .../imagesTestList.txt | 7 + 74_EnvmapImportanceSampling/main.cpp | 580 ++++++++++++------ 7 files changed, 661 insertions(+), 193 deletions(-) create mode 100644 74_EnvmapImportanceSampling/app_resources/common.hlsl create mode 100644 74_EnvmapImportanceSampling/app_resources/present.frag.hlsl create mode 100644 74_EnvmapImportanceSampling/app_resources/test.comp.hlsl create mode 100644 74_EnvmapImportanceSampling/config.json.template create mode 100644 74_EnvmapImportanceSampling/imagesTestList.txt diff --git a/74_EnvmapImportanceSampling/CMakeLists.txt b/74_EnvmapImportanceSampling/CMakeLists.txt index a3b1ea1f7..997d42fc7 100644 --- a/74_EnvmapImportanceSampling/CMakeLists.txt +++ b/74_EnvmapImportanceSampling/CMakeLists.txt @@ -1,8 +1,72 @@ -set(NBL_INCLUDE_SERACH_DIRECTORIES - "${CMAKE_CURRENT_SOURCE_DIR}/include" +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + +if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) +endif() + +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) + +enable_testing() + +add_test(NAME NBL_IMAGE_HASH_RUN_TESTS + COMMAND "$" --test hash + WORKING_DIRECTORY "$" + COMMAND_EXPAND_LISTS ) -set(NBL_LIBRARIES) -nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") + +set(SM 6_8) +set(JSON [=[ +[ + { + "INPUT": "app_resources/test.comp.hlsl", + "KEY": "test", + }] +]=]) +string(CONFIGURE "${JSON}" JSON) + +set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}" + -T lib_${SM} +) -target_link_libraries(${EXECUTABLE_NAME} PRIVATE Nabla::ext::EnvmapImportanceSampling) +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) diff --git a/74_EnvmapImportanceSampling/app_resources/common.hlsl b/74_EnvmapImportanceSampling/app_resources/common.hlsl new file mode 100644 index 000000000..b21da2372 --- /dev/null +++ b/74_EnvmapImportanceSampling/app_resources/common.hlsl @@ -0,0 +1,33 @@ +#ifndef _ENVMAP_IMPORTANCE_SAMPLING_SEARCH_H_INCLUDED_ +#define _ENVMAP_IMPORTANCE_SAMPLING_SEARCH_H_INCLUDED_ + +#include +#include +#include + +using namespace nbl; +using namespace nbl::hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 128; + +struct STestPushConstants +{ + float32_t eps; + uint64_t outputAddress; + uint32_t2 warpResolution; + float32_t avgLuma; +}; + +struct TestSample +{ + float32_t2 xi; + float32_t2 uv; + float32_t3 L; + float32_t jacobian; + float32_t pdf; + float32_t deferredPdf; +}; + +using test_sample_t = TestSample; + +#endif // _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ diff --git a/74_EnvmapImportanceSampling/app_resources/present.frag.hlsl b/74_EnvmapImportanceSampling/app_resources/present.frag.hlsl new file mode 100644 index 000000000..0ce9eac3d --- /dev/null +++ b/74_EnvmapImportanceSampling/app_resources/present.frag.hlsl @@ -0,0 +1,24 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl::ext::FullScreenTriangle; + +[[vk::binding(0, 3)]] Texture2D warpMap; +[[vk::combinedImageSampler]][[vk::binding(1, 3)]] Texture2D envMap; +[[vk::combinedImageSampler]][[vk::binding(1, 3)]] SamplerState envMapSampler; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + uint width; + uint height; + warpMap.GetDimensions(width, height); + float32_t2 uv = warpMap.Load(uint32_t3(width * vxAttr.uv.x, height * vxAttr.uv.y, 0)); + float32_t4 color = envMap.Sample(envMapSampler, uv); + + return float32_t4(color.xyz, 1.0); +} diff --git a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl new file mode 100644 index 000000000..9224e3c29 --- /dev/null +++ b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl @@ -0,0 +1,108 @@ +#include "common.hlsl" + +#include +#include +#include +#include +#include +#include + +[[vk::push_constant]] STestPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] Texture2D lumaMap; +[[vk::combinedImageSampler]][[vk::binding(0, 0)]] SamplerState lumaSampler; + +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] Texture2D warpMap; +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState warpSampler; + +using namespace nbl::hlsl::sampling::hierarchical_image; + + +struct LuminanceAccessor +{ + template && + concepts::same_as + ) + void get(IndexT index, NBL_REF_ARG(ValT) val) + { + val = lumaMap.SampleLevel(lumaSampler, index, 0); + } + +}; + +struct WarpAccessor +{ + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + { + const float32_t2 dir0 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(0, 1), 0)); + const float32_t2 dir1 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(1, 1), 0)); + const float32_t2 dir2 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(1, 0), 0)); + const float32_t2 dir3 = warpMap.Load(int32_t3(sampleCoord, 0)); + return matrix( + dir0, + dir1, + dir2, + dir3 + ); + } +}; + +using hierarchical_image_type = sampling::HierarchicalImage >; +static const LuminanceAccessor luminanceAccessor; +static const WarpAccessor warpAccessor; +static const hierarchical_image_type hImage = hierarchical_image_type::create(luminanceAccessor, warpAccessor, pc.warpResolution, pc.avgLuma); + +float32_t2 convertToFloat01(uint32_t2 xi_uint) +{ + return float32_t2(xi_uint) / promote(float32_t(numeric_limits::max)); +} + +[numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + float32_t eps = pc.eps; + + random::PCG32 pcg = random::PCG32::construct(threadID.x); + random::DimAdaptorRecursive rng = random::DimAdaptorRecursive::construct(pcg); + uint32_t2 xi_uint = rng(); + + float32_t2 xi = convertToFloat01(xi_uint); + + // uint32_t2 xi_uint = (threadID.x / 1000, threadID.x % 1000); + // + // float32_t2 xi = float32_t2(xi_uint) / float32_t2(1000, 1000); + + + xi.x = hlsl::clamp(xi.x, eps, 1.f - eps); + xi.y = hlsl::clamp(xi.y, eps, 1.f - eps); + + float pdf; + float32_t2 uv; + + const float3 L = hImage.generate_and_pdf(pdf, uv, xi); + + float eps_x = eps; + float eps_y = eps; + + float32_t2 d_uv; + float32_t d_pdf; + const float3 L_plus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0.5f * eps_x, 0)); + const float3 L_plus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0, 0.5f * eps_y)); + + const float3 L_minus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0.5f * eps_x, 0)); + const float3 L_minus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0, 0.5f * eps_y)); + + float jacobian = length(cross(L_plus_du - L_minus_du, L_plus_dv - L_minus_dv)) / (eps_x * eps_y); + + test_sample_t testSample; + testSample.xi = xi; + testSample.uv = uv; + testSample.L = L; + testSample.jacobian = jacobian; + testSample.pdf = pdf; + testSample.deferredPdf = hImage.deferredPdf(L); + vk::RawBufferStore(pc.outputAddress + threadID.x * sizeof(test_sample_t), testSample); +} diff --git a/74_EnvmapImportanceSampling/config.json.template b/74_EnvmapImportanceSampling/config.json.template new file mode 100644 index 000000000..24adf54fb --- /dev/null +++ b/74_EnvmapImportanceSampling/config.json.template @@ -0,0 +1,28 @@ +{ + "enableParallelBuild": true, + "threadsPerBuildProcess" : 2, + "isExecuted": false, + "scriptPath": "", + "cmake": { + "configurations": [ "Release", "Debug", "RelWithDebInfo" ], + "buildModes": [], + "requiredOptions": [] + }, + "profiles": [ + { + "backend": "vulkan", + "platform": "windows", + "buildModes": [], + "runConfiguration": "Release", + "gpuArchitectures": [] + } + ], + "dependencies": [], + "data": [ + { + "dependencies": [], + "command": [""], + "outputs": [] + } + ] +} diff --git a/74_EnvmapImportanceSampling/imagesTestList.txt b/74_EnvmapImportanceSampling/imagesTestList.txt new file mode 100644 index 000000000..34a40079c --- /dev/null +++ b/74_EnvmapImportanceSampling/imagesTestList.txt @@ -0,0 +1,7 @@ +; This is the testing suite for various Nabla loaders/writers (JPG/PNG/TGA/BMP/DDS/KTX). +; BMP is currently unsupported for now. +; 16-bit PNG & 8-bit RLE (compressed) TGA is not supported. +; For licensing attribution, see LICENSE. + +; JPG, colored & 8-bit grayscale +../../media/envmap/envmap_1.exr diff --git a/74_EnvmapImportanceSampling/main.cpp b/74_EnvmapImportanceSampling/main.cpp index 7c723d96f..b5ae4a59a 100644 --- a/74_EnvmapImportanceSampling/main.cpp +++ b/74_EnvmapImportanceSampling/main.cpp @@ -1,9 +1,18 @@ -// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - +#include "nbl/this_example/builtin/build/spirv/keys.hpp" #include "nbl/examples/examples.hpp" -#include "nbl/video/utilities/CAssetConverter.h" + +#include "nbl/core/sampling/EnvmapSampler.h" +#include "nbl/core/hash/blake.h" + +#include "nlohmann/json.hpp" +#include "argparse/argparse.hpp" + +#include "app_resources/common.hlsl" + +using json = nlohmann::json; using namespace nbl; using namespace core; @@ -12,225 +21,420 @@ using namespace system; using namespace asset; using namespace ui; using namespace video; -using namespace scene; using namespace nbl::examples; -// instead of defining our own `int main()` we derive from `nbl::system::IApplicationFramework` to play "nice" wil all platofmrs -class EnvmapImportanceSamplingApp final : public MonoWindowApplication, public BuiltinResourcesApplication +namespace { - using device_base_t = MonoWindowApplication; - using asset_base_t = BuiltinResourcesApplication; -public: - - inline EnvmapImportanceSamplingApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), - device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - bool onAppInitialized(smart_refctd_ptr&& system) override - { - if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; - - m_semaphore = m_device->createSemaphore(m_realFrameIx); - if (!m_semaphore) - return logFail("Failed to Create a Semaphore!"); - - auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - for (auto i = 0u; icreateCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1})) - return logFail("Couldn't create Command Buffer!"); - } + template + smart_refctd_ptr loadPrecompiledShader(ILogicalDevice* device, IAssetManager* assetManager, ILogger* logger) + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = logger; + lp.workingDirectory = "app_resources"; - constexpr std::string_view defaultImagePath = "../../media/envmap/envmap_0.exr"; + auto key = nbl::this_example::builtin::build::get_spirv_key(device); + auto assetBundle = assetManager->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; - const auto targetFilePath = [&]() -> std::string_view - { - const auto argc = argv.size(); - const bool isDefaultImageRequested = argc == 1; + auto shader = IAsset::castDown(assets[0]); + return shader; + }; +} - if (isDefaultImageRequested) - { - m_logger->log("No image specified, loading default \"%s\" OpenEXR image from media directory!", ILogger::ELL_INFO, defaultImagePath.data()); - return defaultImagePath; - } - else if (argc == 2) - { - const std::string_view target(argv[1]); - m_logger->log("Requested \"%s\"", ILogger::ELL_INFO, target.data()); - return { target }; - } - else - { - m_logger->log("To many arguments! Pass a single filename to an OpenEXR image w.r.t CWD.", ILogger::ELL_ERROR); - return {}; - } - }(); +class EnvmapImportanceSampleApp final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication +{ + using device_base_t = application_templates::BasicMultiQueueApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + using perf_clock_resolution_t = std::chrono::milliseconds; - if (targetFilePath.empty()) - return false; - - auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + constexpr static inline std::string_view DefaultImagePathsFile = "../imagesTestList.txt"; - nbl::asset::IAssetLoader::SAssetLoadParams lp; - const asset::COpenEXRMetadata* meta; + public: + // Yay thanks to multiple inheritance we cannot forward ctors anymore + inline EnvmapImportanceSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} + + virtual bool isComputeOnly() const {return false;} - auto image_bundle = assetManager->getAsset(targetFilePath.data(), lp); - auto contents = image_bundle.getContents(); + inline bool onAppInitialized(smart_refctd_ptr&& system) override { - if (contents.empty()) - { - m_logger->log("Could not load \"%s\"", ILogger::ELL_ERROR, targetFilePath.data()); + core::blake3_hasher hasher(); + argparse::ArgumentParser program("Envmap Importance Sampling Test"); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) return false; - } - meta = image_bundle.getMetadata()->selfCast(); + // get custom input list of files to execute the program with + system::path m_loadCWD = DefaultImagePathsFile; - if (!meta) - { - m_logger->log("Could not selfCast \"%s\" asset's metadata to COpenEXRMetadata, the tool expects valid OpenEXR input image, terminating!", ILogger::ELL_ERROR, targetFilePath.data()); - return false; - } - } + if (!m_testPathsFile.is_open()) + m_testPathsFile = std::ifstream(m_loadCWD); - auto asset = contents[0]; - auto image = IAsset::castDown(asset); - const auto* metadata = static_cast(meta->getAssetSpecificMetadata(image.get())); + if (!m_testPathsFile.is_open()) + return logFail("Could not open the test paths file"); - ICPUImageView::SCreationParams imgViewParams; - imgViewParams.flags = static_cast(0u); - imgViewParams.image = std::move(image); - imgViewParams.format = imgViewParams.image->getCreationParameters().format; - imgViewParams.viewType = ICPUImageView::ET_2D; - imgViewParams.subresourceRange = { static_cast(0u),0u,1u,0u,1u }; + m_logger->log("Connected \"%s\" input test list!", ILogger::ELL_INFO, m_loadCWD.string().c_str()); + m_loadCWD = m_loadCWD.parent_path(); - auto imageView = ICPUImageView::create(std::move(imgViewParams)); - auto channelsName = metadata->m_name; + + const auto* queue = getGraphicsQueue(); + { + smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&m_cmdbuf,1})) + { + m_logger->log("Failed to create command buffer", ILogger::ELL_ERROR); + return false; + } + } - auto converter = CAssetConverter::create( { .device=m_device.get() }); - - { - // Test the provision of a custom patch this time - CAssetConverter::patch_t patch(imageView.get(),IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); + smart_refctd_ptr dsLayout; + { + auto defaultSampler = m_device->createSampler({ + .TextureWrapU = ETC_CLAMP_TO_EDGE, + .TextureWrapV = ETC_CLAMP_TO_EDGE, + .TextureWrapW = ETC_CLAMP_TO_EDGE, + .MinFilter = ISampler::ETF_NEAREST, + .MaxFilter = ISampler::ETF_NEAREST, + .AnisotropicFilter = 0 + }); - // We don't want to generate mip-maps for these images (YET), to ensure that we must override the default callbacks. - struct SInputs final : CAssetConverter::SInputs - { - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return image->getCreationParameters().mipLevels; - } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; - } - } inputs = {}; - std::get>(inputs.assets) = { &imageView.get(),1 }; - std::get>(inputs.patches) = { &patch,1 }; - inputs.logger = m_logger.get(); - - // - auto reservation = converter->reserve(inputs); - - // get the created image view - auto gpuView = reservation.getGPUObjects().front().value; - if (!gpuView) - return false; - gpuView->getCreationParameters().image->setObjectDebugName("envmap"); + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + }, + { + .binding = 1, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + }, + }; + dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + { + m_logger->log("Failed to Create Descriptor Layout", ILogger::ELL_ERROR); + return false; + } + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(STestPushConstants) + }; + const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, dsLayout); + + const auto shader = loadPrecompiledShader<"test">(m_device.get(), m_assetMgr.get(), m_logger.get()); + + video::IGPUComputePipeline::SCreationParams pipelineParams = { + .layout = pipelineLayout.get(), + .shader = { + .shader = shader.get(), + .entryPoint = "main", + } + }; + + if (!m_device->createComputePipelines(nullptr, { &pipelineParams, 1 }, &m_pipeline)) + { + m_logger->log("Fail to create test pipeline", ILogger::ELL_ERROR); + return false; + } + + const auto dsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, pipelineLayout->getDescriptorSetLayouts()); + + m_descriptorSet = dsPool->createDescriptorSet(core::smart_refctd_ptr(pipelineLayout->getDescriptorSetLayouts()[0])); + + auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + uint32_t outputSize = sizeof(test_sample_t) * m_sampleCount; + m_outputOffset = downStreamingBuffer->invalid_value; + const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); + const uint32_t alignment = core::max(deviceLimits.nonCoherentAtomSize,alignof(float)); + downStreamingBuffer->multi_allocate(waitTill, 1, &m_outputOffset, &outputSize, &alignment); + + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + { + logFail("Could not create Scratch Semaphore"); + return false; + } + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + + m_semaphore = m_device->createSemaphore(0); + if (!m_semaphore) + { + logFail("Could not create Scratch Semaphore"); + return false; + } + m_semaphore->setObjectDebugName("Semaphore"); + m_timelineValue = 0; + + // now convert + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.prevCommandBuffers = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + std::string nextPath; + while (std::getline(m_testPathsFile,nextPath)) + { + if (nextPath!="" && nextPath[0]!=';') + { + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // load the image view + system::path filename, extension; + const core::smart_refctd_ptr imgView = getImageView(nextPath, filename, extension, m_cmdbuf.get()); + + { + EnvmapSampler::SCreationParameters params; + params.utilities = m_utils; + params.assetManager = m_assetMgr; + params.envMap = imgView; + m_envmapImportanceSampling = EnvmapSampler::create(std::move(params)); + m_envmapImportanceSampling->computeWarpMap(getGraphicsQueue()); + } + + const auto lumaMap = m_envmapImportanceSampling->getLumaMapView(); + const auto warpMap = m_envmapImportanceSampling->getWarpMapView(); + + auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + + + IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo = {}; + lumaMapDescriptorInfo.desc = lumaMap; + lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; + warpMapDescriptorInfo.desc = warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = m_descriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapDescriptorInfo + }, + { + .dstSet = m_descriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + m_utils->getLogicalDevice()->updateDescriptorSets(writes, {}); + + const auto warpExtent = warpMap->getCreationParameters().image->getCreationParameters().extent; + const STestPushConstants pc = { + .eps = 1e-4, + .outputAddress = downStreamingBuffer->getBuffer()->getDeviceAddress() + m_outputOffset, + .warpResolution = uint32_t2(warpExtent.width, warpExtent.height), + .avgLuma = m_envmapImportanceSampling->getAvgLuma(), + }; + + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0, 1, &m_descriptorSet.get()); + m_cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_COMPUTE, 0, sizeof(STestPushConstants), &pc); + m_cmdbuf->dispatch(m_sampleCount / WorkgroupSize, 1, 1); + + m_cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo signal[1] = {{.semaphore = m_semaphore.get(),.value=++m_timelineValue}}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1] = {{.cmdbuf=m_cmdbuf.get()}}; + const IQueue::SSubmitInfo submits[1] = {{.commandBuffers=cmdbufs,.signalSemaphores=signal}}; + getGraphicsQueue()->submit(submits); + const ISemaphore::SWaitInfo wait[1] = {{.semaphore=m_semaphore.get(),.value=m_timelineValue}}; + m_device->blockForSemaphores(wait); + + auto* gpuDownstreamingBuffer = downStreamingBuffer->getBuffer(); + if (downStreamingBuffer->needsManualFlushOrInvalidate()) + { + const auto nonCoherentAtomSize = m_device->getPhysicalDevice()->getLimits().nonCoherentAtomSize; + auto flushRange = ILogicalDevice::MappedMemoryRange(gpuDownstreamingBuffer->getBoundMemory().memory,m_outputOffset,m_sampleCount * sizeof(test_sample_t),ILogicalDevice::MappedMemoryRange::align_non_coherent_tag); + m_device->invalidateMappedMemoryRanges(1u,&flushRange); + } + + // Call the function + const uint8_t* bufSrc = reinterpret_cast(downStreamingBuffer->getBufferPointer()) + m_outputOffset; + const auto* testOutputs = reinterpret_cast(bufSrc); + + for (uint32_t sample_i = 0; sample_i < m_sampleCount; sample_i++) + { + const auto& testOutput = testOutputs[sample_i]; + if (testOutput.jacobian < 1e-3) continue; + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 1e-2) + { + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testOutput.xi.x, testOutput.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); + m_totalFailCount++; + continue; + } + + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.deferredPdf)); diff > 1e-2) + { + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, deferredPdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testOutput.xi.x, testOutput.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.deferredPdf, diff); + m_totalFailCount++; + } + } + } + } + + return true; + } } - return true; - } + inline void workLoopBody() override {} - protected: - const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + inline bool keepRunning() override { return false; } + + inline bool onAppTerminated() override { - // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway - .srcAccessMask = ACCESS_FLAGS::NONE, - // layout transition needs to finish before the color write - .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - // want layout transition to begin after all color output is done - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the color can get modified, depth is implicitly earlier - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - return dependencies; + m_logger->log("==========Result==========", ILogger::ELL_INFO); + m_logger->log("Fail Count: %u / %u", ILogger::ELL_INFO, m_totalFailCount, m_sampleCount); + return true; } - inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + protected: + + private: + smart_refctd_ptr m_envmapImportanceSampling; + + smart_refctd_ptr getImageView(std::string inAssetPath, system::path& outFilename, system::path& outExtension, IGPUCommandBuffer* cmdbuf) { + smart_refctd_ptr cpuView; - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - auto* const cb = m_cmdBufs.data()[resourceIx].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cb->end(); - IQueue::SSubmitInfo::SSemaphoreInfo retval = - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - }; - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + m_logger->log("Loading image from path %s", ILogger::ELL_DEBUG, inAssetPath.c_str()); + + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); + + auto bundle = m_assetMgr->getAsset(inAssetPath, loadParams); + + auto contents = bundle.getContents(); + if (contents.empty()) { - {.cmdbuf = cb } - }; - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { - { - .semaphore = device_base_t::getCurrentAcquire().semaphore, - .value = device_base_t::getCurrentAcquire().acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = + logFail("Failed to load image with path %s, skipping!", (m_loadCWD / inAssetPath).c_str()); + return nullptr; + } + + core::splitFilename(inAssetPath.c_str(), nullptr, &outFilename, &outExtension); + + const auto& asset = contents[0]; + switch (asset->getAssetType()) { + case IAsset::ET_IMAGE: { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = {&retval,1} - } - }; - - if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) - { - retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal - m_realFrameIx--; + auto image = smart_refctd_ptr_static_cast(asset); + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = + { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuView = smart_refctd_ptr_static_cast(asset); + break; + default: + logFail("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!"); + return nullptr; } + auto converter = CAssetConverter::create({ .device = m_device.get() }); + + // Test the provision of a custom patch this time + CAssetConverter::patch_t patch(cpuView.get(), IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); + + // We don't want to generate mip-maps for these images (YET), to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + } inputs = {}; + std::get>(inputs.assets) = { &cpuView.get(),1 }; + std::get>(inputs.patches) = { &patch,1 }; + inputs.logger = m_logger.get(); + + // + auto reservation = converter->reserve(inputs); + + // get the created image view + auto gpuView = reservation.getGPUObjects().front().value; + + if (!gpuView) + return nullptr; + + gpuView->getCreationParameters().image->setObjectDebugName(inAssetPath.c_str()); + + // we should multi-buffer to not stall before renderpass recording but oh well + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo,1 }; + CAssetConverter::SConvertParams params = {}; + params.transfer = &m_intendedSubmit; + params.utilities = m_utils.get(); + auto result = reservation.convert(params); + + if (result.copy() != IQueue::RESULT::SUCCESS) + return nullptr; + + return gpuView; - m_window->setCaption("[Nabla Engine] UI App Test Demo"); - return retval; } - smart_refctd_ptr m_semaphore; - uint64_t m_realFrameIx = 0; - std::array,MaxFramesInFlight> m_cmdBufs; + std::ifstream m_testPathsFile; + system::path m_loadCWD; + + smart_refctd_ptr m_scratchSemaphore; + smart_refctd_ptr m_semaphore; + uint64_t m_timelineValue; + + smart_refctd_ptr m_cmdPool; + SIntendedSubmitInfo m_intendedSubmit; + + smart_refctd_ptr m_cmdbuf; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_descriptorSet; + core::smart_refctd_ptr m_outputBuffer; + + + uint32_t m_sampleCount = 10000; + uint32_t m_outputOffset; + uint32_t m_totalFailCount = 0; + }; -NBL_MAIN_FUNC(EnvmapImportanceSamplingApp) +NBL_MAIN_FUNC(EnvmapImportanceSampleApp) From 2b1dd37b42d2567865c575d85733582e9607b85c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 22 Feb 2026 11:26:01 +0700 Subject: [PATCH 4/9] Reemove unused comment --- 74_EnvmapImportanceSampling/app_resources/test.comp.hlsl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl index 9224e3c29..b4c842c16 100644 --- a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl +++ b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl @@ -71,11 +71,6 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) float32_t2 xi = convertToFloat01(xi_uint); - // uint32_t2 xi_uint = (threadID.x / 1000, threadID.x % 1000); - // - // float32_t2 xi = float32_t2(xi_uint) / float32_t2(1000, 1000); - - xi.x = hlsl::clamp(xi.x, eps, 1.f - eps); xi.y = hlsl::clamp(xi.y, eps, 1.f - eps); From b712d1e49cfc43a0ab3e82d4b6ef689f0e0f0edc Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 22 Feb 2026 11:26:21 +0700 Subject: [PATCH 5/9] Initial integration of envmap importance sampling to example 31 --- .../app_resources/hlsl/example_common.hlsl | 21 ++- .../hlsl/next_event_estimator.hlsl | 76 +++++++++ .../app_resources/hlsl/pathtracer.hlsl | 14 +- .../app_resources/hlsl/render.comp.hlsl | 86 ++++++++++- .../app_resources/hlsl/render_common.hlsl | 1 + .../app_resources/hlsl/scene.hlsl | 55 +++++++ 31_HLSLPathTracer/main.cpp | 146 ++++++++++++------ 7 files changed, 337 insertions(+), 62 deletions(-) diff --git a/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl index 9055468f5..14a8b63bd 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/example_common.hlsl @@ -24,7 +24,8 @@ enum IntersectMode : uint32_t { IM_RAY_QUERY, IM_RAY_TRACING, - IM_PROCEDURAL + IM_PROCEDURAL, + IM_ENVMAP, }; template // TODO make type T Spectrum @@ -107,6 +108,24 @@ struct Light ObjectID objectID; }; +template +struct EnvmapLight +{ + using spectral_type = float32_t3; + using this_type = EnvmapLight; + + static this_type create(NBL_CONST_REF_ARG(EnvMapT) envMap, NBL_CONST_REF_ARG(HierarchicalImageT) hierarchicalImage) + { + this_type retval; + retval.envMap = envMap; + retval.hierarchicalImage = hierarchicalImage; + return retval; + } + + EnvMapT envMap; + HierarchicalImageT hierarchicalImage; +}; + template) struct SBxDFCreationParams { diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl index 5c34eed3a..d77018509 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -379,4 +379,80 @@ struct NextEventEstimator +struct NextEventEstimator +{ + using scalar_type = typename Ray::scalar_type; + using vector2_type = vector; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = Light; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // affected by https://github.com/microsoft/DirectXShaderCompiler/issues/7007 + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + const light_type light = lights[lightID]; + + vector2_type envmapUv = light.hierarchicalImage.inverseWarp_and_deferredPdf(pdf, ray.direction); + pdf *= (1.0 / scalar_type(lightCount)); + + spectral_type radiance; + light.envMap.get(envmapUv, radiance); + + return radiance; + } + + sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + newRayMaxT = numeric_limits::max; + + const light_type light = lights[lightID]; + + scalar_type pdf; + vector2_type envmapUv; + const vector3_type sampleL = light.hierarchicalImage.generate_and_pdf(pdf, envmapUv, xi.xy); + + ray_dir_info_type rayL; + if (hlsl::isinf(pdf)) + { + quotient_pdf = quotient_pdf_type::create(hlsl::promote(0.0), 0.0); + return sample_type::createInvalid(); + } + + const vector3_type N = interaction.getN(); + const scalar_type NdotL = nbl::hlsl::dot(N, sampleL); + + rayL.setDirection(sampleL); + sample_type L = sample_type::create(rayL, interaction.getT(), interaction.getB(), NdotL); + + newRayMaxT *= path_tracing::Tolerance::getEnd(depth); + + // Ray ray; + // ray.origin = origin; + // ray.direction = sampleL; + // spectral_type radiance = deferredEvalAndPdf(pdf, scene, 0, ray); + + pdf *= 1.0 / scalar_type(lightCount); + spectral_type radiance; + light.envMap.get(envmapUv, radiance); + + spectral_type quo = radiance / pdf; + + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } + + light_type lights[scene_type::SCENE_LIGHT_COUNT]; + uint32_t lightCount; +}; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index 6f67cd79e..413eb9660 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -205,16 +205,18 @@ struct Unidirectional return false; } - void missProgram(NBL_REF_ARG(ray_type) ray) + void missProgram(NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) { vector3_type finalContribution = ray.payload.throughput; - // #ifdef USE_ENVMAP - // vec2 uv = SampleSphericalMap(_immutable.direction); - // finalContribution *= textureLod(envMap, uv, 0.0).rgb; - // #else +#ifdef ENVMAP_LIGHT + float _pdf; + ray.payload.accumulation += nee.deferredEvalAndPdf(_pdf, scene, + 0, ray) * ray.payload.throughput / (1.0 + _pdf * _pdf * ray.payload.otherTechniqueHeuristic); +#else const vector3_type kConstantEnvLightRadiance = vector3_type(0.15, 0.21, 0.3); // TODO: match spectral_type finalContribution *= kConstantEnvLightRadiance; ray.payload.accumulation += finalContribution; +#endif // #endif } @@ -238,7 +240,7 @@ struct Unidirectional rayAlive = closestHitProgram(1, sampleIndex, ray, scene); } if (!hit) - missProgram(ray); + missProgram(ray, scene); const uint32_t sampleCount = sampleIndex + 1; accumulator.addSample(sampleCount, ray.payload.accumulation); diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 46660bac3..bdafd6ca3 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -2,6 +2,8 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/random/pcg.hlsl" #include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#include "nbl/builtin/hlsl/sampling/warps/spherical.hlsl" +#include "nbl/builtin/hlsl/sampling/hierarchical_image.hlsl" #ifdef PERSISTENT_WORKGROUPS #include "nbl/builtin/hlsl/math/morton.hlsl" #endif @@ -34,6 +36,10 @@ [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; +[[vk::combinedImageSampler]] [[vk::binding(3, 2)]] Texture2D lumaMap; +[[vk::combinedImageSampler]] [[vk::binding(3, 2)]] SamplerState lumaSampler; +[[vk::binding(4, 2)]] Texture2D warpMap; + [[vk::image_format("rgba16f")]] [[vk::binding(0)]] RWTexture2DArray outImage; [[vk::image_format("rgba16f")]] [[vk::binding(1)]] RWTexture2DArray cascade; @@ -59,6 +65,9 @@ NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_TRIANGLE; #ifdef RECTANGLE_LIGHT NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_RECTANGLE; #endif +#ifdef ENVMAP_LIGHT +NBL_CONSTEXPR ProceduralShapeType LIGHT_TYPE = PST_NONE; +#endif NBL_CONSTEXPR path_tracing::PTPolygonMethod POLYGON_METHOD = path_tracing::PPM_SOLID_ANGLE; @@ -96,14 +105,70 @@ using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; using ray_type = Ray; + +#ifdef ENVMAP_LIGHT +struct EnvmapAccessor +{ + template && + concepts::same_as + ) + void get(IndexT index, NBL_REF_ARG(ValT) val) + { + val = envMap.SampleLevel(envSampler, index, 0); + } +}; + +struct LuminanceAccessor +{ + template && + concepts::same_as + ) + void get(IndexT index, NBL_REF_ARG(ValT) val) + { + val = lumaMap.SampleLevel(lumaSampler, index, 0); + } + +}; + +struct WarpAccessor +{ + matrix sampleUvs(uint32_t2 sampleCoord) NBL_CONST_MEMBER_FUNC + { + const float32_t2 dir0 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(0, 1), 0)); + const float32_t2 dir1 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(1, 1), 0)); + const float32_t2 dir2 = warpMap.Load(int32_t3(sampleCoord + uint32_t2(1, 0), 0)); + const float32_t2 dir3 = warpMap.Load(int32_t3(sampleCoord, 0)); + return matrix( + dir0, + dir1, + dir2, + dir3 + ); + } +}; + +using hierarchical_image_type = sampling::HierarchicalImage; +using light_type = EnvmapLight; +#else using light_type = Light; +#endif + using bxdfnode_type = BxDFNode; using scene_type = Scene; using randgen_type = RandGen::Uniform3D; using raygen_type = RayGen::Basic; using intersector_type = Intersector; using material_system_type = MaterialSystem; + +#ifdef ENVMAP_LIGHT +using nee_type = NextEventEstimator; +#else using nee_type = NextEventEstimator; +#endif #ifdef RWMC_ENABLED using accumulator_type = rwmc::CascadeAccumulator; @@ -131,15 +196,22 @@ static const Shape rectangles[scene_type::SCENE_LIGHT_COUN }; #endif -static const light_type lights[scene_type::SCENE_LIGHT_COUNT] = { - light_type::create(LightEminence, +#ifdef ENVMAP_LIGHT +static const EnvmapAccessor envmapAccessor; +static const LuminanceAccessor luminanceAccessor; +static const WarpAccessor warpAccessor; +static const hierarchical_image_type hierarchicalImage = hierarchical_image_type::create(luminanceAccessor, warpAccessor, uint32_t2(2048, 1024), pc.avgLuma); +static const light_type light = light_type::create(envmapAccessor, hierarchicalImage); +#else +static const light_type light = +light_type::create(LightEminence, #ifdef SPHERE_LIGHT - scene_type::SCENE_SPHERE_COUNT, + scene_type::SCENE_SPHERE_COUNT, #else - 0u, + 0u, +#endif + IM_PROCEDURAL, LIGHT_TYPE); #endif - IM_PROCEDURAL, LIGHT_TYPE) -}; static const bxdfnode_type bxdfs[scene_type::SCENE_BXDF_COUNT] = { bxdfnode_type::create(MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)), @@ -233,7 +305,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) scene.updateLight(renderPushConstants.generalPurposeLightMatrix); pathtracer.rayGen = raygen_type::create(pixOffsetParam, camPos, NDC, renderPushConstants.invMVP); - pathtracer.nee.lights = lights; + pathtracer.nee.lights[0] = light; pathtracer.nee.lightCount = scene_type::SCENE_LIGHT_COUNT; pathtracer.materialSystem.bxdfs = bxdfs; pathtracer.materialSystem.bxdfCount = scene_type::SCENE_BXDF_COUNT; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 76e8abfe3..52b20302c 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -18,6 +18,7 @@ struct RenderPushConstants int sampleCount; int depth; uint64_t pSampleSequence; + float avgLuma; }; NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl index 3d004664e..633857c6b 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl @@ -36,6 +36,61 @@ const Shape SceneBase::scene_spheres[SCENE_SPHERE_COUNT] = { template struct Scene; +template<> +struct Scene : SceneBase +{ + using scalar_type = float; + using vector3_type = vector; + using this_t = Scene; + using base_t = SceneBase; + using id_type = ObjectID; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t SphereCount = base_t::SCENE_SPHERE_COUNT + base_t::SCENE_LIGHT_COUNT; + NBL_CONSTEXPR_STATIC_INLINE uint32_t TriangleCount = 0u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t RectangleCount = 0u; + + Shape light_spheres[1]; + Shape light_triangles[1]; + Shape light_rectangles[1]; + + Shape getSphere(uint32_t idx) + { + assert(idx < SphereCount); + if (idx < base_t::SCENE_SPHERE_COUNT) + return base_t::scene_spheres[idx]; + else + return light_spheres[idx-base_t::SCENE_SPHERE_COUNT]; + } + + Shape getTriangle(uint32_t idx) + { + assert(false); + return light_triangles[0]; + } + + Shape getRectangle(uint32_t idx) + { + assert(false); + return light_rectangles[0]; + } + + void updateLight(NBL_CONST_REF_ARG(float32_t3x4) generalPurposeLightMatrix) + { + } + + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(id_type) objectID) + { + assert(false); + return getSphere(objectID.id).bsdfLightIDs; + } + + vector3_type getNormal(NBL_CONST_REF_ARG(id_type) objectID, NBL_CONST_REF_ARG(vector3_type) intersection) + { + assert(objectID.shapeType == PST_SPHERE); + return getSphere(objectID.id).getNormal(intersection); + } +}; + template<> struct Scene : SceneBase { diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index d4fcdc427..c3fac9e83 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -5,6 +5,7 @@ #include "nbl/examples/examples.hpp" #include "nbl/this_example/transform.hpp" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/ext/EnvmapImportanceSampling/CEnvmapImportanceSampling.h" #include "nbl/builtin/hlsl/surface_transform.h" #include "nbl/this_example/common.hpp" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" @@ -35,8 +36,9 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui enum E_LIGHT_GEOMETRY : uint8_t { ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE, + // ELG_TRIANGLE, + // ELG_RECTANGLE, + ELG_ENVMAP, ELG_COUNT }; @@ -52,26 +54,19 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui constexpr static inline uint32_t MaxFramesInFlight = 5; constexpr static inline uint32_t MaxDescriptorCount = 256u; constexpr static inline uint8_t MaxUITextureCount = 1u; - static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string DefaultImagePathsFile = "envmap/envmap_2.exr"; static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::array PTGLSLShaderPaths = { - "app_resources/glsl/litBySphere.comp", - "app_resources/glsl/litByTriangle.comp", - "app_resources/glsl/litByRectangle.comp" - }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", - "TRIANGLE_LIGHT", - "RECTANGLE_LIGHT" + "ENVMAP_LIGHT", }; static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { "ELG_SPHERE", - "ELG_TRIANGLE", - "ELG_RECTANGLE" + "ELG_ENVMAP", }; const char* shaderTypes[E_RENDER_MODE::ERM_COUNT] = { @@ -243,7 +238,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }; std::array descriptorSet0Bindings = {}; - std::array descriptorSet3Bindings = {}; + std::array descriptorSet2Bindings = {}; std::array presentDescriptorSetBindings; descriptorSet0Bindings[0] = { @@ -264,7 +259,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui .immutableSamplers = nullptr }; - descriptorSet3Bindings[0] = { + descriptorSet2Bindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, @@ -272,7 +267,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui .count = 1u, .immutableSamplers = nullptr }; - descriptorSet3Bindings[1] = { + descriptorSet2Bindings[1] = { .binding = 2u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, @@ -280,6 +275,22 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui .count = 1u, .immutableSamplers = nullptr }; + descriptorSet2Bindings[2] = { + .binding = 3u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet2Bindings[3] = { + .binding = 4u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; presentDescriptorSetBindings[0] = { .binding = 0u, @@ -291,7 +302,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }; auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); - auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet2Bindings); auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); @@ -469,29 +480,29 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) return logFail("Failed to create HLSL compute pipeline!\n"); } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) - return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); - } - - // rwmc pipelines - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC compute pipeline!\n"); - } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); - } + // { + // auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); + // auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + // + // if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) + // return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); + // } + // + // // rwmc pipelines + // { + // auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + // auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + // + // if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + // return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + // } + // { + // auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + // auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + // + // if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + // return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + // } } } @@ -772,6 +783,16 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui m_cascadeView = createHDRIImageView(cascade, CascadeCount, IGPUImageView::ET_2D_ARRAY); m_cascadeView->setObjectDebugName("Cascade View"); + // Create resources related to envmap importance sampling + { + ext::envmap_importance_sampling::EnvmapSampler::SCreationParameters params = {}; + params.assetManager = m_assetMgr; + params.utilities = m_utils; + params.envMap = m_envMapView; + m_envmapImportanceSampling = nbl::ext::envmap_importance_sampling::EnvmapSampler::create(std::move(params)); + m_envmapImportanceSampling->computeWarpMap(getGraphicsQueue()); + } + // TODO: change cascade layout to general } @@ -897,23 +918,34 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }; auto sampler1 = m_device->createSampler(samplerParams1); - std::array writeDSInfos = {}; + std::array writeDSInfos = {}; writeDSInfos[0].desc = m_outImgView; writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_cascadeView; writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_scrambleView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writeDescriptorSets = {}; + writeDSInfos[4].desc = m_envmapImportanceSampling->getLumaMapView(); + writeDSInfos[4].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[4].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + + writeDSInfos[5].desc = m_envmapImportanceSampling->getWarpMapView(); + writeDSInfos[5].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + + writeDSInfos[6].desc = m_outImgView; + writeDSInfos[6].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; writeDescriptorSets[0] = { .dstSet = m_descriptorSet0.get(), .binding = 0, @@ -943,11 +975,25 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui .info = &writeDSInfos[3] }; writeDescriptorSets[4] = { + .dstSet = m_descriptorSet2.get(), + .binding = 3, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + writeDescriptorSets[5] = { + .dstSet = m_descriptorSet2.get(), + .binding = 4, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[5] + }; + writeDescriptorSets[6] = { .dstSet = m_presentDescriptorSet.get(), .binding = 0, .arrayElement = 0u, .count = 1u, - .info = &writeDSInfos[4] + .info = &writeDSInfos[6] }; m_device->updateDescriptorSets(writeDescriptorSets, {}); @@ -1089,7 +1135,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui ); } - if (E_LIGHT_GEOMETRY::ELG_SPHERE == PTPipeline) + if (E_LIGHT_GEOMETRY::ELG_ENVMAP == PTPipeline) { m_transformParams.allowedOp = ImGuizmo::OPERATION::TRANSLATE | ImGuizmo::OPERATION::SCALEU; m_transformParams.isSphere = true; @@ -1101,7 +1147,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } EditTransform(&imguizmoM16InOut.view[0][0], &imguizmoM16InOut.projection[0][0], &m_lightModelMatrix[0][0], m_transformParams); - if (E_LIGHT_GEOMETRY::ELG_SPHERE == PTPipeline) + if (E_LIGHT_GEOMETRY::ELG_ENVMAP == PTPipeline) { // keep uniform scale for sphere float32_t uniformScale = (m_lightModelMatrix[0][0] + m_lightModelMatrix[1][1] + m_lightModelMatrix[2][2]) / 3.0f; @@ -1230,7 +1276,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }, .oldLayout = IImage::LAYOUT::UNDEFINED, .newLayout = IImage::LAYOUT::GENERAL - } + }, }; cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } @@ -1571,6 +1617,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui rwmcPushConstants.renderPushConstants.depth = depth; rwmcPushConstants.renderPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; rwmcPushConstants.renderPushConstants.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + rwmcPushConstants.renderPushConstants.avgLuma = m_envmapImportanceSampling->getAvgLuma(); float32_t2 packParams = float32_t2(rwmcBase, rwmcStart); rwmcPushConstants.packedSplattingParams = hlsl::packHalf2x16(packParams); } @@ -1581,6 +1628,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui pc.sampleCount = spp; pc.depth = depth; pc.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); + pc.avgLuma = m_envmapImportanceSampling->getAvgLuma(); } } @@ -1612,6 +1660,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui smart_refctd_ptr m_window; smart_refctd_ptr> m_surface; + smart_refctd_ptr m_envmapImportanceSampling; + // gpu resources smart_refctd_ptr m_cmdPool; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPipelines; @@ -1669,7 +1719,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui float viewWidth = 10.f; float camYAngle = 165.f / 180.f * 3.14159f; float camXAngle = 32.f / 180.f * 3.14159f; - int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int PTPipeline = E_LIGHT_GEOMETRY::ELG_ENVMAP; int renderMode = E_RENDER_MODE::ERM_HLSL; int spp = 32; int depth = 3; From 368da9c5f98262a8486c662356d1c3ceff6d6e43 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 22 Feb 2026 16:05:55 +0700 Subject: [PATCH 6/9] Add test for direct output and cached output --- .../app_resources/common.hlsl | 13 ++- .../app_resources/test.comp.hlsl | 89 +++++++++++++------ 74_EnvmapImportanceSampling/main.cpp | 36 +++++--- 3 files changed, 93 insertions(+), 45 deletions(-) diff --git a/74_EnvmapImportanceSampling/app_resources/common.hlsl b/74_EnvmapImportanceSampling/app_resources/common.hlsl index b21da2372..3a06547fa 100644 --- a/74_EnvmapImportanceSampling/app_resources/common.hlsl +++ b/74_EnvmapImportanceSampling/app_resources/common.hlsl @@ -2,7 +2,6 @@ #define _ENVMAP_IMPORTANCE_SAMPLING_SEARCH_H_INCLUDED_ #include -#include #include using namespace nbl; @@ -18,16 +17,22 @@ struct STestPushConstants float32_t avgLuma; }; -struct TestSample +struct TestOutput { - float32_t2 xi; - float32_t2 uv; float32_t3 L; + float32_t2 uv; float32_t jacobian; float32_t pdf; float32_t deferredPdf; }; +struct TestSample +{ + TestOutput directOutput; + TestOutput cachedOutput; + float32_t2 xi; +}; + using test_sample_t = TestSample; #endif // _COOPERATIVE_BINARY_SEARCH_H_INCLUDED_ diff --git a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl index b4c842c16..1ef6a0564 100644 --- a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl +++ b/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl @@ -30,6 +30,20 @@ struct LuminanceAccessor val = lumaMap.SampleLevel(lumaSampler, index, 0); } + float32_t texelFetch(uint32_t2 coord, uint32_t level) + { + return lumaMap.Load(uint32_t3(coord, level)); + } + + float32_t4 texelGather(uint32_t2 coord, uint32_t level) + { + return float32_t4( + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 1)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(1, 0)), + lumaMap.Load(uint32_t3(coord, level), uint32_t2(0, 0)) + ); + } }; struct WarpAccessor @@ -49,10 +63,36 @@ struct WarpAccessor } }; -using hierarchical_image_type = sampling::HierarchicalImage >; -static const LuminanceAccessor luminanceAccessor; -static const WarpAccessor warpAccessor; -static const hierarchical_image_type hImage = hierarchical_image_type::create(luminanceAccessor, warpAccessor, pc.warpResolution, pc.avgLuma); + +template +TestOutput GenerateTestOutput(NBL_CONST_REF_ARG(HierarchicalImageT) hImage, float32_t2 xi) +{ + float pdf; + float32_t2 uv; + + const float3 L = hImage.generate_and_pdf(pdf, uv, xi); + + float eps_x = pc.eps; + float eps_y = pc.eps; + + float32_t2 d_uv; + float32_t d_pdf; + const float3 L_plus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0.5f * eps_x, 0)); + const float3 L_plus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0, 0.5f * eps_y)); + + const float3 L_minus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0.5f * eps_x, 0)); + const float3 L_minus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0, 0.5f * eps_y)); + + float jacobian = length(cross(L_plus_du - L_minus_du, L_plus_dv - L_minus_dv)) / (eps_x * eps_y); + + TestOutput testOutput; + testOutput.uv = uv; + testOutput.L = L; + testOutput.jacobian = jacobian; + testOutput.pdf = pdf; + testOutput.deferredPdf = hImage.deferredPdf(L); + return testOutput; +} float32_t2 convertToFloat01(uint32_t2 xi_uint) { @@ -63,41 +103,32 @@ float32_t2 convertToFloat01(uint32_t2 xi_uint) [shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { + const LuminanceAccessor luminanceAccessor; + const WarpAccessor warpAccessor; + using luminance_sampler_type = nbl::hlsl::sampling::LuminanceMapSampler; + + using direct_hierarchical_image_type = sampling::HierarchicalImage >; + + const luminance_sampler_type luminanceSampler = luminance_sampler_type::create(luminanceAccessor, pc.warpResolution, true, pc.warpResolution); + float32_t eps = pc.eps; random::PCG32 pcg = random::PCG32::construct(threadID.x); - random::DimAdaptorRecursive rng = random::DimAdaptorRecursive::construct(pcg); - uint32_t2 xi_uint = rng(); + uint32_t2 xi_uint = random::DimAdaptorRecursive::__call(pcg); float32_t2 xi = convertToFloat01(xi_uint); xi.x = hlsl::clamp(xi.x, eps, 1.f - eps); xi.y = hlsl::clamp(xi.y, eps, 1.f - eps); - float pdf; - float32_t2 uv; - - const float3 L = hImage.generate_and_pdf(pdf, uv, xi); - - float eps_x = eps; - float eps_y = eps; - - float32_t2 d_uv; - float32_t d_pdf; - const float3 L_plus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0.5f * eps_x, 0)); - const float3 L_plus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi + float32_t2(0, 0.5f * eps_y)); - - const float3 L_minus_du = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0.5f * eps_x, 0)); - const float3 L_minus_dv = hImage.generate_and_pdf(d_pdf, d_uv, xi - float32_t2(0, 0.5f * eps_y)); - - float jacobian = length(cross(L_plus_du - L_minus_du, L_plus_dv - L_minus_dv)) / (eps_x * eps_y); - test_sample_t testSample; testSample.xi = xi; - testSample.uv = uv; - testSample.L = L; - testSample.jacobian = jacobian; - testSample.pdf = pdf; - testSample.deferredPdf = hImage.deferredPdf(L); + + const direct_hierarchical_image_type directHImage = direct_hierarchical_image_type::create(luminanceAccessor, luminanceSampler, pc.warpResolution, pc.avgLuma); + testSample.directOutput = GenerateTestOutput(directHImage, xi); + + using cached_hierarchical_image_type = sampling::HierarchicalImage >; + const cached_hierarchical_image_type cachedHImage = cached_hierarchical_image_type::create(luminanceAccessor, warpAccessor, pc.warpResolution, pc.avgLuma); + testSample.cachedOutput = GenerateTestOutput(cachedHImage, xi); vk::RawBufferStore(pc.outputAddress + threadID.x * sizeof(test_sample_t), testSample); } diff --git a/74_EnvmapImportanceSampling/main.cpp b/74_EnvmapImportanceSampling/main.cpp index b5ae4a59a..57bf3d62d 100644 --- a/74_EnvmapImportanceSampling/main.cpp +++ b/74_EnvmapImportanceSampling/main.cpp @@ -41,6 +41,14 @@ namespace auto shader = IAsset::castDown(assets[0]); return shader; }; + + template + bool checkEq(T a, T b, float32_t eps = 1e-4) + { + T _a = hlsl::max(hlsl::abs(a), hlsl::promote(1e-5)); + T _b = hlsl::max(hlsl::abs(b), hlsl::promote(1e-5)); + return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); + } } class EnvmapImportanceSampleApp final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication @@ -271,23 +279,30 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti // Call the function const uint8_t* bufSrc = reinterpret_cast(downStreamingBuffer->getBufferPointer()) + m_outputOffset; - const auto* testOutputs = reinterpret_cast(bufSrc); + const auto* testSamples = reinterpret_cast(bufSrc); for (uint32_t sample_i = 0; sample_i < m_sampleCount; sample_i++) { - const auto& testOutput = testOutputs[sample_i]; + const auto& testSample = testSamples[sample_i]; + const auto& directOutput = testSample.directOutput; + const auto& cachedOutput = testSample.cachedOutput; + + if (!checkEq(cachedOutput.L, directOutput.L) || !checkEq(cachedOutput.uv, directOutput.uv) || !checkEq(cachedOutput.pdf, directOutput.pdf) || !checkEq(cachedOutput.deferredPdf, directOutput.deferredPdf)) + { + logFail("Failed similarity test between direct sampling and cached sampling. Direct Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}, Cached Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}", directOutput.uv.x, directOutput.uv.y, directOutput.L.x, directOutput.L.y, directOutput.L.z, directOutput.pdf, directOutput.deferredPdf, cachedOutput.uv.x, cachedOutput.uv.y, cachedOutput.L.x, cachedOutput.L.y, cachedOutput.L.z, cachedOutput.pdf, cachedOutput.pdf); + } + + const auto& testOutput = directOutput; if (testOutput.jacobian < 1e-3) continue; - if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 1e-2) + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 0.05) { - m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testOutput.xi.x, testOutput.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); - m_totalFailCount++; + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); continue; } - - if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.deferredPdf)); diff > 1e-2) + + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.deferredPdf)); diff > 0.05) { - m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, deferredPdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testOutput.xi.x, testOutput.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.deferredPdf, diff); - m_totalFailCount++; + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, deferredPdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.deferredPdf, diff); } } } @@ -303,8 +318,6 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti inline bool onAppTerminated() override { - m_logger->log("==========Result==========", ILogger::ELL_INFO); - m_logger->log("Fail Count: %u / %u", ILogger::ELL_INFO, m_totalFailCount, m_sampleCount); return true; } @@ -433,7 +446,6 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti uint32_t m_sampleCount = 10000; uint32_t m_outputOffset; - uint32_t m_totalFailCount = 0; }; From 44621a42a3e176c1e7f0625578a19eb3c2fd21df Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 17:22:40 +0700 Subject: [PATCH 7/9] Fix compile error on example 74 --- 74_EnvmapImportanceSampling/main.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/74_EnvmapImportanceSampling/main.cpp b/74_EnvmapImportanceSampling/main.cpp index 57bf3d62d..8a86c8d17 100644 --- a/74_EnvmapImportanceSampling/main.cpp +++ b/74_EnvmapImportanceSampling/main.cpp @@ -45,9 +45,16 @@ namespace template bool checkEq(T a, T b, float32_t eps = 1e-4) { + if constexpr (!is_vector_v) + { + return abs(a - b) <= eps; + } + else + { T _a = hlsl::max(hlsl::abs(a), hlsl::promote(1e-5)); T _b = hlsl::max(hlsl::abs(b), hlsl::promote(1e-5)); return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); + } } } @@ -249,7 +256,7 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti const auto warpExtent = warpMap->getCreationParameters().image->getCreationParameters().extent; const STestPushConstants pc = { - .eps = 1e-4, + .eps = 5 * 1e-5, .outputAddress = downStreamingBuffer->getBuffer()->getDeviceAddress() + m_outputOffset, .warpResolution = uint32_t2(warpExtent.width, warpExtent.height), .avgLuma = m_envmapImportanceSampling->getAvgLuma(), @@ -293,7 +300,7 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti } const auto& testOutput = directOutput; - if (testOutput.jacobian < 1e-3) continue; + if (testOutput.jacobian < 0.05) continue; if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 0.05) { m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); From e551553664ad8aea571fdf63272be53517b59648 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 17:26:41 +0700 Subject: [PATCH 8/9] Rename example 74 to 75 --- .../CMakeLists.txt | 0 .../app_resources/common.hlsl | 0 .../app_resources/present.frag.hlsl | 0 .../app_resources/test.comp.hlsl | 0 .../config.json.template | 0 .../imagesTestList.txt | 0 .../main.cpp | 6 +++--- CMakeLists.txt | 2 +- 8 files changed, 4 insertions(+), 4 deletions(-) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/CMakeLists.txt (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/app_resources/common.hlsl (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/app_resources/present.frag.hlsl (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/app_resources/test.comp.hlsl (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/config.json.template (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/imagesTestList.txt (100%) rename {74_EnvmapImportanceSampling => 75_EnvmapImportanceSamplingTest}/main.cpp (98%) diff --git a/74_EnvmapImportanceSampling/CMakeLists.txt b/75_EnvmapImportanceSamplingTest/CMakeLists.txt similarity index 100% rename from 74_EnvmapImportanceSampling/CMakeLists.txt rename to 75_EnvmapImportanceSamplingTest/CMakeLists.txt diff --git a/74_EnvmapImportanceSampling/app_resources/common.hlsl b/75_EnvmapImportanceSamplingTest/app_resources/common.hlsl similarity index 100% rename from 74_EnvmapImportanceSampling/app_resources/common.hlsl rename to 75_EnvmapImportanceSamplingTest/app_resources/common.hlsl diff --git a/74_EnvmapImportanceSampling/app_resources/present.frag.hlsl b/75_EnvmapImportanceSamplingTest/app_resources/present.frag.hlsl similarity index 100% rename from 74_EnvmapImportanceSampling/app_resources/present.frag.hlsl rename to 75_EnvmapImportanceSamplingTest/app_resources/present.frag.hlsl diff --git a/74_EnvmapImportanceSampling/app_resources/test.comp.hlsl b/75_EnvmapImportanceSamplingTest/app_resources/test.comp.hlsl similarity index 100% rename from 74_EnvmapImportanceSampling/app_resources/test.comp.hlsl rename to 75_EnvmapImportanceSamplingTest/app_resources/test.comp.hlsl diff --git a/74_EnvmapImportanceSampling/config.json.template b/75_EnvmapImportanceSamplingTest/config.json.template similarity index 100% rename from 74_EnvmapImportanceSampling/config.json.template rename to 75_EnvmapImportanceSamplingTest/config.json.template diff --git a/74_EnvmapImportanceSampling/imagesTestList.txt b/75_EnvmapImportanceSamplingTest/imagesTestList.txt similarity index 100% rename from 74_EnvmapImportanceSampling/imagesTestList.txt rename to 75_EnvmapImportanceSamplingTest/imagesTestList.txt diff --git a/74_EnvmapImportanceSampling/main.cpp b/75_EnvmapImportanceSamplingTest/main.cpp similarity index 98% rename from 74_EnvmapImportanceSampling/main.cpp rename to 75_EnvmapImportanceSamplingTest/main.cpp index 8a86c8d17..1ce7ab609 100644 --- a/74_EnvmapImportanceSampling/main.cpp +++ b/75_EnvmapImportanceSamplingTest/main.cpp @@ -58,7 +58,7 @@ namespace } } -class EnvmapImportanceSampleApp final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication +class EnvmapImportanceSamplingTest final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::BasicMultiQueueApplication; using asset_base_t = BuiltinResourcesApplication; @@ -70,7 +70,7 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti public: // Yay thanks to multiple inheritance we cannot forward ctors anymore - inline EnvmapImportanceSampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + inline EnvmapImportanceSamplingTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} virtual bool isComputeOnly() const {return false;} @@ -456,4 +456,4 @@ class EnvmapImportanceSampleApp final : public application_templates::BasicMulti }; -NBL_MAIN_FUNC(EnvmapImportanceSampleApp) +NBL_MAIN_FUNC(EnvmapImportanceSamplingTest) diff --git a/CMakeLists.txt b/CMakeLists.txt index 85910a8c1..1824bbe6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,13 +106,13 @@ if(NBL_BUILD_EXAMPLES) add_subdirectory(70_FLIPFluids) add_subdirectory(71_RayTracingPipeline) add_subdirectory(72_CooperativeBinarySearch) - add_subdirectory(74_EnvmapImportanceSampling) if (NBL_BUILD_MITSUBA_LOADER) add_subdirectory(73_GeometryInspector) endif() add_subdirectory(74_QuantizedSequenceTests) + add_subdirectory(75_EnvmapImportanceSamplingTest) # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory NBL_GET_ALL_TARGETS(TARGETS) From 4b6a00361b922a85bdcfce3a06f7154006c9ebd5 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 23 Feb 2026 19:34:52 +0700 Subject: [PATCH 9/9] Fix indentation --- 75_EnvmapImportanceSamplingTest/main.cpp | 614 ++++++++++++----------- 1 file changed, 320 insertions(+), 294 deletions(-) diff --git a/75_EnvmapImportanceSamplingTest/main.cpp b/75_EnvmapImportanceSamplingTest/main.cpp index 1ce7ab609..b5a9a1f16 100644 --- a/75_EnvmapImportanceSamplingTest/main.cpp +++ b/75_EnvmapImportanceSamplingTest/main.cpp @@ -5,15 +5,12 @@ #include "nbl/examples/examples.hpp" #include "nbl/core/sampling/EnvmapSampler.h" -#include "nbl/core/hash/blake.h" #include "nlohmann/json.hpp" #include "argparse/argparse.hpp" #include "app_resources/common.hlsl" -using json = nlohmann::json; - using namespace nbl; using namespace core; using namespace hlsl; @@ -26,36 +23,36 @@ using namespace nbl::examples; namespace { template - smart_refctd_ptr loadPrecompiledShader(ILogicalDevice* device, IAssetManager* assetManager, ILogger* logger) - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = logger; - lp.workingDirectory = "app_resources"; - - auto key = nbl::this_example::builtin::build::get_spirv_key(device); - auto assetBundle = assetManager->getAsset(key.data(), lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return nullptr; - - auto shader = IAsset::castDown(assets[0]); - return shader; - }; - - template - bool checkEq(T a, T b, float32_t eps = 1e-4) - { - if constexpr (!is_vector_v) - { - return abs(a - b) <= eps; - } - else - { - T _a = hlsl::max(hlsl::abs(a), hlsl::promote(1e-5)); - T _b = hlsl::max(hlsl::abs(b), hlsl::promote(1e-5)); - return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); - } - } + smart_refctd_ptr loadPrecompiledShader(ILogicalDevice* device, IAssetManager* assetManager, ILogger* logger) + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = logger; + lp.workingDirectory = "app_resources"; + + auto key = nbl::this_example::builtin::build::get_spirv_key(device); + auto assetBundle = assetManager->getAsset(key.data(), lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + auto shader = IAsset::castDown(assets[0]); + return shader; + }; + + template + bool checkEq(T a, T b, float32_t eps = 1e-4) + { + if constexpr (!is_vector_v) + { + return abs(a - b) <= eps; + } + else + { + T _a = hlsl::max(hlsl::abs(a), hlsl::promote(1e-5)); + T _b = hlsl::max(hlsl::abs(b), hlsl::promote(1e-5)); + return nbl::hlsl::all::Dimension> >(nbl::hlsl::max(_a / _b, _b / _a) <= hlsl::promote(1 + eps)); + } + } } class EnvmapImportanceSamplingTest final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication @@ -77,16 +74,45 @@ class EnvmapImportanceSamplingTest final : public application_templates::BasicMu inline bool onAppInitialized(smart_refctd_ptr&& system) override { - core::blake3_hasher hasher(); argparse::ArgumentParser program("Envmap Importance Sampling Test"); + program.add_argument("--input-list") + .help("File path to override input list with image file paths to execute this program with."); + program.add_argument("--sample-count") + .default_value(static_cast(1000)) + .help("Sample count for each input (Default : 1000)"); + try + { + program.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& err) + { + std::cerr << err.what() << std::endl << program; + return 1; + } - if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) - return false; + m_sampleCount = program.get("--sample-count"); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; if (!asset_base_t::onAppInitialized(std::move(system))) return false; // get custom input list of files to execute the program with system::path m_loadCWD = DefaultImagePathsFile; + { + const auto hook = program.present("--input-list"); + + if (hook) + { + const auto inputList = *hook; + + m_testPathsFile = std::ifstream(inputList); + if (m_testPathsFile.is_open()) + m_loadCWD = inputList; + else + m_logger->log("Couldn't open test file given by argument --input-list \"%s\", falling back to default list!", ILogger::ELL_ERROR, inputList.c_str()); + } + } if (!m_testPathsFile.is_open()) m_testPathsFile = std::ifstream(m_loadCWD); @@ -100,223 +126,223 @@ class EnvmapImportanceSamplingTest final : public application_templates::BasicMu const auto* queue = getGraphicsQueue(); - { - smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&m_cmdbuf,1})) - { - m_logger->log("Failed to create command buffer", ILogger::ELL_ERROR); - return false; - } - } - - smart_refctd_ptr dsLayout; - { - auto defaultSampler = m_device->createSampler({ - .TextureWrapU = ETC_CLAMP_TO_EDGE, - .TextureWrapV = ETC_CLAMP_TO_EDGE, - .TextureWrapW = ETC_CLAMP_TO_EDGE, - .MinFilter = ISampler::ETF_NEAREST, - .MaxFilter = ISampler::ETF_NEAREST, - .AnisotropicFilter = 0 - }); - - const IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = &defaultSampler - }, - { - .binding = 1, - .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - .immutableSamplers = &defaultSampler - }, - }; - dsLayout = m_device->createDescriptorSetLayout(bindings); - if (!dsLayout) - { - m_logger->log("Failed to Create Descriptor Layout", ILogger::ELL_ERROR); - return false; - } - asset::SPushConstantRange pcRange = { - .stageFlags = hlsl::ESS_COMPUTE, - .offset = 0, - .size = sizeof(STestPushConstants) - }; - const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, dsLayout); - - const auto shader = loadPrecompiledShader<"test">(m_device.get(), m_assetMgr.get(), m_logger.get()); - - video::IGPUComputePipeline::SCreationParams pipelineParams = { - .layout = pipelineLayout.get(), - .shader = { - .shader = shader.get(), - .entryPoint = "main", - } - }; - - if (!m_device->createComputePipelines(nullptr, { &pipelineParams, 1 }, &m_pipeline)) - { - m_logger->log("Fail to create test pipeline", ILogger::ELL_ERROR); - return false; - } - - const auto dsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, pipelineLayout->getDescriptorSetLayouts()); - - m_descriptorSet = dsPool->createDescriptorSet(core::smart_refctd_ptr(pipelineLayout->getDescriptorSetLayouts()[0])); - - auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); - std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); - uint32_t outputSize = sizeof(test_sample_t) * m_sampleCount; - m_outputOffset = downStreamingBuffer->invalid_value; - const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); - const uint32_t alignment = core::max(deviceLimits.nonCoherentAtomSize,alignof(float)); - downStreamingBuffer->multi_allocate(waitTill, 1, &m_outputOffset, &outputSize, &alignment); - - m_scratchSemaphore = m_device->createSemaphore(0); - if (!m_scratchSemaphore) - { - logFail("Could not create Scratch Semaphore"); - return false; - } - m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); - - m_semaphore = m_device->createSemaphore(0); - if (!m_semaphore) - { - logFail("Could not create Scratch Semaphore"); - return false; - } - m_semaphore->setObjectDebugName("Semaphore"); - m_timelineValue = 0; - - // now convert - m_intendedSubmit.queue = getGraphicsQueue(); - // wait for nothing before upload - m_intendedSubmit.waitSemaphores = {}; - m_intendedSubmit.prevCommandBuffers = {}; - // fill later - m_intendedSubmit.scratchCommandBuffers = {}; - m_intendedSubmit.scratchSemaphore = { - .semaphore = m_scratchSemaphore.get(), - .value = 0, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - - std::string nextPath; - while (std::getline(m_testPathsFile,nextPath)) - { - if (nextPath!="" && nextPath[0]!=';') - { - m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // load the image view - system::path filename, extension; - const core::smart_refctd_ptr imgView = getImageView(nextPath, filename, extension, m_cmdbuf.get()); - - { - EnvmapSampler::SCreationParameters params; - params.utilities = m_utils; - params.assetManager = m_assetMgr; - params.envMap = imgView; - m_envmapImportanceSampling = EnvmapSampler::create(std::move(params)); - m_envmapImportanceSampling->computeWarpMap(getGraphicsQueue()); - } - - const auto lumaMap = m_envmapImportanceSampling->getLumaMapView(); - const auto warpMap = m_envmapImportanceSampling->getWarpMapView(); - - auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); - - - IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo = {}; - lumaMapDescriptorInfo.desc = lumaMap; - lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; - warpMapDescriptorInfo.desc = warpMap; - warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - - const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { - { - .dstSet = m_descriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapDescriptorInfo - }, - { - .dstSet = m_descriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo - }, - }; - - m_utils->getLogicalDevice()->updateDescriptorSets(writes, {}); - - const auto warpExtent = warpMap->getCreationParameters().image->getCreationParameters().extent; - const STestPushConstants pc = { - .eps = 5 * 1e-5, - .outputAddress = downStreamingBuffer->getBuffer()->getDeviceAddress() + m_outputOffset, - .warpResolution = uint32_t2(warpExtent.width, warpExtent.height), - .avgLuma = m_envmapImportanceSampling->getAvgLuma(), - }; - - m_cmdbuf->bindComputePipeline(m_pipeline.get()); - m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0, 1, &m_descriptorSet.get()); - m_cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_COMPUTE, 0, sizeof(STestPushConstants), &pc); - m_cmdbuf->dispatch(m_sampleCount / WorkgroupSize, 1, 1); - - m_cmdbuf->end(); - - const IQueue::SSubmitInfo::SSemaphoreInfo signal[1] = {{.semaphore = m_semaphore.get(),.value=++m_timelineValue}}; - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1] = {{.cmdbuf=m_cmdbuf.get()}}; - const IQueue::SSubmitInfo submits[1] = {{.commandBuffers=cmdbufs,.signalSemaphores=signal}}; - getGraphicsQueue()->submit(submits); - const ISemaphore::SWaitInfo wait[1] = {{.semaphore=m_semaphore.get(),.value=m_timelineValue}}; - m_device->blockForSemaphores(wait); - - auto* gpuDownstreamingBuffer = downStreamingBuffer->getBuffer(); - if (downStreamingBuffer->needsManualFlushOrInvalidate()) - { - const auto nonCoherentAtomSize = m_device->getPhysicalDevice()->getLimits().nonCoherentAtomSize; - auto flushRange = ILogicalDevice::MappedMemoryRange(gpuDownstreamingBuffer->getBoundMemory().memory,m_outputOffset,m_sampleCount * sizeof(test_sample_t),ILogicalDevice::MappedMemoryRange::align_non_coherent_tag); - m_device->invalidateMappedMemoryRanges(1u,&flushRange); - } - - // Call the function - const uint8_t* bufSrc = reinterpret_cast(downStreamingBuffer->getBufferPointer()) + m_outputOffset; - const auto* testSamples = reinterpret_cast(bufSrc); - - for (uint32_t sample_i = 0; sample_i < m_sampleCount; sample_i++) - { - const auto& testSample = testSamples[sample_i]; - const auto& directOutput = testSample.directOutput; - const auto& cachedOutput = testSample.cachedOutput; - - if (!checkEq(cachedOutput.L, directOutput.L) || !checkEq(cachedOutput.uv, directOutput.uv) || !checkEq(cachedOutput.pdf, directOutput.pdf) || !checkEq(cachedOutput.deferredPdf, directOutput.deferredPdf)) - { - logFail("Failed similarity test between direct sampling and cached sampling. Direct Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}, Cached Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}", directOutput.uv.x, directOutput.uv.y, directOutput.L.x, directOutput.L.y, directOutput.L.z, directOutput.pdf, directOutput.deferredPdf, cachedOutput.uv.x, cachedOutput.uv.y, cachedOutput.L.x, cachedOutput.L.y, cachedOutput.L.z, cachedOutput.pdf, cachedOutput.pdf); - } - - const auto& testOutput = directOutput; - if (testOutput.jacobian < 0.05) continue; - if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 0.05) - { - m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); - continue; - } - - if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.deferredPdf)); diff > 0.05) - { - m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, deferredPdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.deferredPdf, diff); - } - } - } - } - - return true; - } + { + smart_refctd_ptr cmdpool = m_device->createCommandPool(queue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&m_cmdbuf,1})) + { + m_logger->log("Failed to create command buffer", ILogger::ELL_ERROR); + return false; + } + } + + smart_refctd_ptr dsLayout; + { + auto defaultSampler = m_device->createSampler({ + .TextureWrapU = ETC_CLAMP_TO_EDGE, + .TextureWrapV = ETC_CLAMP_TO_EDGE, + .TextureWrapW = ETC_CLAMP_TO_EDGE, + .MinFilter = ISampler::ETF_NEAREST, + .MaxFilter = ISampler::ETF_NEAREST, + .AnisotropicFilter = 0 + }); + + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + }, + { + .binding = 1, + .type = IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + .immutableSamplers = &defaultSampler + }, + }; + dsLayout = m_device->createDescriptorSetLayout(bindings); + if (!dsLayout) + { + m_logger->log("Failed to Create Descriptor Layout", ILogger::ELL_ERROR); + return false; + } + asset::SPushConstantRange pcRange = { + .stageFlags = hlsl::ESS_COMPUTE, + .offset = 0, + .size = sizeof(STestPushConstants) + }; + const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, dsLayout); + + const auto shader = loadPrecompiledShader<"test">(m_device.get(), m_assetMgr.get(), m_logger.get()); + + video::IGPUComputePipeline::SCreationParams pipelineParams = { + .layout = pipelineLayout.get(), + .shader = { + .shader = shader.get(), + .entryPoint = "main", + } + }; + + if (!m_device->createComputePipelines(nullptr, { &pipelineParams, 1 }, &m_pipeline)) + { + m_logger->log("Fail to create test pipeline", ILogger::ELL_ERROR); + return false; + } + + const auto dsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, pipelineLayout->getDescriptorSetLayouts()); + + m_descriptorSet = dsPool->createDescriptorSet(core::smart_refctd_ptr(pipelineLayout->getDescriptorSetLayouts()[0])); + + auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + std::chrono::steady_clock::time_point waitTill(std::chrono::years(45)); + uint32_t outputSize = sizeof(test_sample_t) * m_sampleCount; + m_outputOffset = downStreamingBuffer->invalid_value; + const auto& deviceLimits = m_device->getPhysicalDevice()->getLimits(); + const uint32_t alignment = core::max(deviceLimits.nonCoherentAtomSize,alignof(float)); + downStreamingBuffer->multi_allocate(waitTill, 1, &m_outputOffset, &outputSize, &alignment); + + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + { + logFail("Could not create Scratch Semaphore"); + return false; + } + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + + m_semaphore = m_device->createSemaphore(0); + if (!m_semaphore) + { + logFail("Could not create Scratch Semaphore"); + return false; + } + m_semaphore->setObjectDebugName("Semaphore"); + m_timelineValue = 0; + + // now convert + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.prevCommandBuffers = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + std::string nextPath; + while (std::getline(m_testPathsFile,nextPath)) + { + if (nextPath!="" && nextPath[0]!=';') + { + m_cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // load the image view + system::path filename, extension; + const core::smart_refctd_ptr imgView = getImageView(nextPath, filename, extension, m_cmdbuf.get()); + + { + EnvmapSampler::SCreationParameters params; + params.utilities = m_utils; + params.assetManager = m_assetMgr; + params.envMap = imgView; + m_envmapImportanceSampling = EnvmapSampler::create(std::move(params)); + m_envmapImportanceSampling->computeWarpMap(getGraphicsQueue()); + } + + const auto lumaMap = m_envmapImportanceSampling->getLumaMapView(); + const auto warpMap = m_envmapImportanceSampling->getWarpMapView(); + + auto downStreamingBuffer = m_utils->getDefaultDownStreamingBuffer(); + + + IGPUDescriptorSet::SDescriptorInfo lumaMapDescriptorInfo = {}; + lumaMapDescriptorInfo.desc = lumaMap; + lumaMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; + warpMapDescriptorInfo.desc = warpMap; + warpMapDescriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + const IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + { + .dstSet = m_descriptorSet.get(), .binding = 0, .count = 1, .info = &lumaMapDescriptorInfo + }, + { + .dstSet = m_descriptorSet.get(), .binding = 1, .count = 1, .info = &warpMapDescriptorInfo + }, + }; + + m_utils->getLogicalDevice()->updateDescriptorSets(writes, {}); + + const auto warpExtent = warpMap->getCreationParameters().image->getCreationParameters().extent; + const STestPushConstants pc = { + .eps = 5 * 1e-5, + .outputAddress = downStreamingBuffer->getBuffer()->getDeviceAddress() + m_outputOffset, + .warpResolution = uint32_t2(warpExtent.width, warpExtent.height), + .avgLuma = m_envmapImportanceSampling->getAvgLuma(), + }; + + m_cmdbuf->bindComputePipeline(m_pipeline.get()); + m_cmdbuf->bindDescriptorSets(EPBP_COMPUTE, m_pipeline->getLayout(), 0, 1, &m_descriptorSet.get()); + m_cmdbuf->pushConstants(m_pipeline->getLayout(), ESS_COMPUTE, 0, sizeof(STestPushConstants), &pc); + m_cmdbuf->dispatch(m_sampleCount / WorkgroupSize, 1, 1); + + m_cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo signal[1] = {{.semaphore = m_semaphore.get(),.value=++m_timelineValue}}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1] = {{.cmdbuf=m_cmdbuf.get()}}; + const IQueue::SSubmitInfo submits[1] = {{.commandBuffers=cmdbufs,.signalSemaphores=signal}}; + getGraphicsQueue()->submit(submits); + const ISemaphore::SWaitInfo wait[1] = {{.semaphore=m_semaphore.get(),.value=m_timelineValue}}; + m_device->blockForSemaphores(wait); + + auto* gpuDownstreamingBuffer = downStreamingBuffer->getBuffer(); + if (downStreamingBuffer->needsManualFlushOrInvalidate()) + { + const auto nonCoherentAtomSize = m_device->getPhysicalDevice()->getLimits().nonCoherentAtomSize; + auto flushRange = ILogicalDevice::MappedMemoryRange(gpuDownstreamingBuffer->getBoundMemory().memory,m_outputOffset,m_sampleCount * sizeof(test_sample_t),ILogicalDevice::MappedMemoryRange::align_non_coherent_tag); + m_device->invalidateMappedMemoryRanges(1u,&flushRange); + } + + // Call the function + const uint8_t* bufSrc = reinterpret_cast(downStreamingBuffer->getBufferPointer()) + m_outputOffset; + const auto* testSamples = reinterpret_cast(bufSrc); + + for (uint32_t sample_i = 0; sample_i < m_sampleCount; sample_i++) + { + const auto& testSample = testSamples[sample_i]; + const auto& directOutput = testSample.directOutput; + const auto& cachedOutput = testSample.cachedOutput; + + if (!checkEq(cachedOutput.L, directOutput.L) || !checkEq(cachedOutput.uv, directOutput.uv) || !checkEq(cachedOutput.pdf, directOutput.pdf) || !checkEq(cachedOutput.deferredPdf, directOutput.deferredPdf)) + { + logFail("Failed similarity test between direct sampling and cached sampling. Direct Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}, Cached Sampling = {uv = (%f, %f), L = (%f, %f %f), pdf = %f, deferredPdf = %f}", directOutput.uv.x, directOutput.uv.y, directOutput.L.x, directOutput.L.y, directOutput.L.z, directOutput.pdf, directOutput.deferredPdf, cachedOutput.uv.x, cachedOutput.uv.y, cachedOutput.L.x, cachedOutput.L.y, cachedOutput.L.z, cachedOutput.pdf, cachedOutput.pdf); + } + + const auto& testOutput = directOutput; + if (testOutput.jacobian < 0.05) continue; + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.pdf)); diff > 0.05) + { + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, pdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.pdf, diff); + continue; + } + + if (const auto diff = abs(1.0f - (testOutput.jacobian * testOutput.deferredPdf)); diff > 0.05) + { + m_logger->log("Failed similarity test of jacobian and pdf for image %s for sample number %d. xi = (%f, %f), uv = (%f, %f), Jacobian = %f, deferredPdf = %f, difference = %f", ILogger::ELL_ERROR, "dummy", sample_i, testSample.xi.x, testSample.xi.y, testOutput.uv.x, testOutput.uv.y, testOutput.jacobian, testOutput.deferredPdf, diff); + } + } + } + } + + return true; + } } inline void workLoopBody() override {} @@ -325,7 +351,7 @@ class EnvmapImportanceSamplingTest final : public application_templates::BasicMu inline bool onAppTerminated() override { - return true; + return true; } protected: @@ -387,49 +413,49 @@ class EnvmapImportanceSamplingTest final : public application_templates::BasicMu return nullptr; } - auto converter = CAssetConverter::create({ .device = m_device.get() }); + auto converter = CAssetConverter::create({ .device = m_device.get() }); - // Test the provision of a custom patch this time - CAssetConverter::patch_t patch(cpuView.get(), IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); + // Test the provision of a custom patch this time + CAssetConverter::patch_t patch(cpuView.get(), IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); - // We don't want to generate mip-maps for these images (YET), to ensure that we must override the default callbacks. - struct SInputs final : CAssetConverter::SInputs - { - inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return image->getCreationParameters().mipLevels; - } - inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override - { - return 0b0u; - } - } inputs = {}; - std::get>(inputs.assets) = { &cpuView.get(),1 }; - std::get>(inputs.patches) = { &patch,1 }; - inputs.logger = m_logger.get(); + // We don't want to generate mip-maps for these images (YET), to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + } inputs = {}; + std::get>(inputs.assets) = { &cpuView.get(),1 }; + std::get>(inputs.patches) = { &patch,1 }; + inputs.logger = m_logger.get(); - // - auto reservation = converter->reserve(inputs); + // + auto reservation = converter->reserve(inputs); - // get the created image view - auto gpuView = reservation.getGPUObjects().front().value; + // get the created image view + auto gpuView = reservation.getGPUObjects().front().value; - if (!gpuView) - return nullptr; + if (!gpuView) + return nullptr; - gpuView->getCreationParameters().image->setObjectDebugName(inAssetPath.c_str()); + gpuView->getCreationParameters().image->setObjectDebugName(inAssetPath.c_str()); - // we should multi-buffer to not stall before renderpass recording but oh well - IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + // we should multi-buffer to not stall before renderpass recording but oh well + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; - m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo,1 }; - CAssetConverter::SConvertParams params = {}; - params.transfer = &m_intendedSubmit; - params.utilities = m_utils.get(); - auto result = reservation.convert(params); + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo,1 }; + CAssetConverter::SConvertParams params = {}; + params.transfer = &m_intendedSubmit; + params.utilities = m_utils.get(); + auto result = reservation.convert(params); - if (result.copy() != IQueue::RESULT::SUCCESS) - return nullptr; + if (result.copy() != IQueue::RESULT::SUCCESS) + return nullptr; return gpuView; @@ -440,19 +466,19 @@ class EnvmapImportanceSamplingTest final : public application_templates::BasicMu smart_refctd_ptr m_scratchSemaphore; smart_refctd_ptr m_semaphore; - uint64_t m_timelineValue; + uint64_t m_timelineValue; smart_refctd_ptr m_cmdPool; SIntendedSubmitInfo m_intendedSubmit; smart_refctd_ptr m_cmdbuf; - core::smart_refctd_ptr m_pipeline; - core::smart_refctd_ptr m_descriptorSet; - core::smart_refctd_ptr m_outputBuffer; + core::smart_refctd_ptr m_pipeline; + core::smart_refctd_ptr m_descriptorSet; + core::smart_refctd_ptr m_outputBuffer; - uint32_t m_sampleCount = 10000; - uint32_t m_outputOffset; + uint32_t m_sampleCount = 10000; + uint32_t m_outputOffset; };